In [2]:
import numpy as np
import pandas as pd

In [3]:
from numpy.random import randn

In [4]:
np.random.seed(101)

In [5]:
df = pd.DataFrame(data = randn(5,4),index = ["A","B","C","D","E"],columns=["W","X","Y","Z"])
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [6]:
df["W"]

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [7]:
df["X"]

A    0.628133
B   -0.319318
C    0.740122
D   -0.758872
E    1.978757
Name: X, dtype: float64

In [8]:
df[["W","Y"]]

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077
C,-2.018168,0.528813
D,0.188695,-0.933237
E,0.190794,2.605967


In [9]:
df["new"] = df["W"] +df["Y"]
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [10]:
df.drop("new",axis=1,inplace=True) #Axis equal to 0 refers to the index

#The inplace argument is necessary if you really want to modify the object, otherwise df'll still have the "new" column



In [11]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [12]:
df.drop("E",axis=0,inplace=True)
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [13]:
df.loc["C"]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [14]:
df.iloc[2]

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [15]:
df.loc["B","Z"]

0.6059653494949336

In [16]:
df.loc[["A","C"],["W","X"]]

Unnamed: 0,W,X
A,2.70685,0.628133
C,-2.018168,0.740122


In [17]:
df[df>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057


In [18]:
resultDf = df[df["W"]>0]

In [19]:
df[df["Z"]<0]

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [20]:
resultDf

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [21]:
resultDf["X"]

A    0.628133
B   -0.319318
D   -0.758872
Name: X, dtype: float64

In [22]:
df[df["W"]>0][["X","Z"]]

Unnamed: 0,X,Z
A,0.628133,0.503826
B,-0.319318,0.605965
D,-0.758872,0.955057


In [23]:
df[(df["W"]>0) and (df["Y"]<0)] #we get an error

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [24]:
df[(df["W"]>0) & (df["Y"]<0)]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [25]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [26]:
df.reset_index() #remember to use inplace = True to really change the df

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057


In [27]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [28]:
newIndex = "PE BA CE SE".split()
newIndex

['PE', 'BA', 'CE', 'SE']

In [29]:
df["state"] = newIndex
df

Unnamed: 0,W,X,Y,Z,state
A,2.70685,0.628133,0.907969,0.503826,PE
B,0.651118,-0.319318,-0.848077,0.605965,BA
C,-2.018168,0.740122,0.528813,-0.589001,CE
D,0.188695,-0.758872,-0.933237,0.955057,SE


In [30]:
df.set_index("state") #inplace = True once again if you want

Unnamed: 0_level_0,W,X,Y,Z
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
PE,2.70685,0.628133,0.907969,0.503826
BA,0.651118,-0.319318,-0.848077,0.605965
CE,-2.018168,0.740122,0.528813,-0.589001
SE,0.188695,-0.758872,-0.933237,0.955057


In [31]:
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [32]:
df = pd.DataFrame(data=randn(6,2),index=hier_index,columns=["A","B"])
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [37]:
df.loc["G1"].loc[2,"A"]

-1.7060859307350775

In [38]:
df.index.names = ["Groups","Nums"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Nums,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [39]:
df.xs(3,level="Nums")

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.134841,0.390528
G2,0.638787,0.329646
