In [1]:
import numpy as np
import pandas as pd

In [28]:
np.random.seed(seed=0)
df = pd.DataFrame(data=np.random.randint(0, 9, (3, 5)),
                  index=["a", "b", "c"], 
                  columns=["priority level 1", "priority level 2", 
                           "priority level 3", "priority level 4", "priority level 5"])
df

Unnamed: 0,priority level 1,priority level 2,priority level 3,priority level 4,priority level 5
a,5,0,3,3,7
b,3,5,2,4,7
c,6,8,8,1,6


In [None]:
df["labels= a, b"] #check if this can be achieved

In [30]:
df["priority level 1"]

a    5
b    3
c    6
Name: priority level 1, dtype: int32

In [31]:
type(df["priority level 1"])

pandas.core.series.Series

In [27]:
len(df["priority level 2"])

3

In [32]:
df[["priority level 3"]]

Unnamed: 0,priority level 3
a,3
b,2
c,8


In [33]:
type(df[["priority level 3"]])

pandas.core.frame.DataFrame

In [34]:
df[["priority level 1", "priority level 3"]]

Unnamed: 0,priority level 1,priority level 3
a,5,3
b,3,2
c,6,8


## Filtering columns

### why do we need this when you can otherwise select a column by simply calling df["coltitle"]?

In [71]:
np.random.seed(seed=1)
df = pd.DataFrame(data=(np.random.randn(4, 5)),
                  index = ["A", "B", "C", "D"], 
                  columns= ["product qnt", "sold to", "happy customers", 
                            "returns", "retention rate"]
)
df

Unnamed: 0,product qnt,sold to,happy customers,returns,retention rate
A,1.624345,-0.611756,-0.528172,-1.072969,0.865408
B,-2.301539,1.744812,-0.761207,0.319039,-0.24937
C,1.462108,-2.060141,-0.322417,-0.384054,1.133769
D,-1.099891,-0.172428,-0.877858,0.042214,0.582815


In [72]:
df.filter(["product qnt"])

Unnamed: 0,product qnt
A,1.624345
B,-2.301539
C,1.462108
D,-1.099891


In [73]:
df.filter(["product qnt", "sold to", "happy customers"], axis=1)

Unnamed: 0,product qnt,sold to,happy customers
A,1.624345,-0.611756,-0.528172
B,-2.301539,1.744812,-0.761207
C,1.462108,-2.060141,-0.322417
D,-1.099891,-0.172428,-0.877858


## Adding values within a DataFrame

In [56]:
np.random.seed(seed=1)
df = pd.DataFrame(data = (np.random.randn(5, 4)),
                  index = ["Cus 1", "Cus 2", "Cus 3",  "Cus 4", "Cus 5"],
                  columns = ["cost", "profit", "referrals", "discounts"]
                  )
df

Unnamed: 0,cost,profit,referrals,discounts
Cus 1,1.624345,-0.611756,-0.528172,-1.072969
Cus 2,0.865408,-2.301539,1.744812,-0.761207
Cus 3,0.319039,-0.24937,1.462108,-2.060141
Cus 4,-0.322417,-0.384054,1.133769,-1.099891
Cus 5,-0.172428,-0.877858,0.042214,0.582815


In [57]:
df ["total"] = df["profit"] + df["referrals"] - df["discounts"]
df

Unnamed: 0,cost,profit,referrals,discounts,total
Cus 1,1.624345,-0.611756,-0.528172,-1.072969,-0.06696
Cus 2,0.865408,-2.301539,1.744812,-0.761207,0.20448
Cus 3,0.319039,-0.24937,1.462108,-2.060141,3.272878
Cus 4,-0.322417,-0.384054,1.133769,-1.099891,1.849606
Cus 5,-0.172428,-0.877858,0.042214,0.582815,-1.41846


In [59]:
df ["total"] = df ["cost"] + df ["referrals"]
df

Unnamed: 0,cost,profit,referrals,discounts,total
Cus 1,1.624345,-0.611756,-0.528172,-1.072969,1.096174
Cus 2,0.865408,-2.301539,1.744812,-0.761207,2.610219
Cus 3,0.319039,-0.24937,1.462108,-2.060141,1.781147
Cus 4,-0.322417,-0.384054,1.133769,-1.099891,0.811352
Cus 5,-0.172428,-0.877858,0.042214,0.582815,-0.130214


In [60]:
df ["total"] = df ["cost"] + df ["profit"] * df ["referrals"]
df

Unnamed: 0,cost,profit,referrals,discounts,total
Cus 1,1.624345,-0.611756,-0.528172,-1.072969,1.947458
Cus 2,0.865408,-2.301539,1.744812,-0.761207,-3.150344
Cus 3,0.319039,-0.24937,1.462108,-2.060141,-0.045567
Cus 4,-0.322417,-0.384054,1.133769,-1.099891,-0.757846
Cus 5,-0.172428,-0.877858,0.042214,0.582815,-0.209486


## Dropping/Deleting

In [76]:
np.random.seed(seed=1)
df = pd.DataFrame(np.random.rand(3, 4),
                  index = ["A", "B", "C"],
                  columns = ["Q1", "Q2", "Q3", "Q4"]
                  ) 
df

Unnamed: 0,Q1,Q2,Q3,Q4
A,0.417022,0.720324,0.000114,0.302333
B,0.146756,0.092339,0.18626,0.345561
C,0.396767,0.538817,0.419195,0.68522


In [None]:
df.drop(labels=["Q2"], axis=1) #you can use either columns/labels... offers the same functionality

Unnamed: 0,Q1,Q3,Q4
A,0.417022,0.000114,0.302333
B,0.146756,0.18626,0.345561
C,0.396767,0.419195,0.68522


In [90]:
df = df.drop(columns=["Q4"], axis=1)
df


Unnamed: 0,Q1,Q2,Q3
A,0.417022,0.720324,0.000114
B,0.146756,0.092339,0.18626
C,0.396767,0.538817,0.419195


 - second method of removing a column = inplace=True. Remember, this alters the data frame. You will run into an error like below.

In [96]:
df.drop(labels = ["Q1"], axis=1, inplace=True)
df

KeyError: "['Q1'] not found in axis"