# DataFrames_2

In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

In [3]:
np.random.seed(101) # A seed means is just to make sure that we get the same random numbers

In [4]:
df = pd.DataFrame(randn(5,4),["A","B","C","D","E"],["X","Y","Z","T"])

In [5]:
df

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## Conditional Selection

In [6]:
df > 0 

Unnamed: 0,X,Y,Z,T
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True
E,True,True,True,True


In [7]:
booldf = df > 0 

In [8]:
df[booldf]

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [9]:
df[df>0]

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [10]:
df['X'] > 0

A     True
B     True
C    False
D     True
E     True
Name: X, dtype: bool

In [11]:
df[df['X'] > 0]

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [12]:
df[df['T'] < 0]

Unnamed: 0,X,Y,Z,T
C,-2.018168,0.740122,0.528813,-0.589001


In [13]:
result_df = df[df['X'] > 0]
result_df

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [14]:
result_df['Z']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Z, dtype: float64

In [15]:
df[df['X']>0]['Z']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Z, dtype: float64

In [16]:
df[df['X']>0][['Z','T']] # for multiple columns

Unnamed: 0,Z,T
A,0.907969,0.503826
B,-0.848077,0.605965
D,-0.933237,0.955057
E,2.605967,0.683509


In [17]:
# Multiple conditions 
df[(df['X']>0) & (df['Z']>1)]

# We don't use "and" because Python doesn't understand and /
# When we use it, Python gets "ValueError: The truth value of a Series is ambiguous.""

Unnamed: 0,X,Y,Z,T
E,0.190794,1.978757,2.605967,0.683509


In [18]:
df[(df['X']>0) | (df['Z']>1)]

# We don't use "or" because Python doesn't understand and /
# When we use it, Python gets "ValueError: The truth value of a Series is ambiguous.""

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## More Index Details

In [19]:
df

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [20]:
# get the index reset to a column

df.reset_index()

Unnamed: 0,index,X,Y,Z,T
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [21]:
df # to occur in place what you would do is say inplace = True

Unnamed: 0,X,Y,Z,T
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [22]:
new_index = 'AB CD EF GH IJ'.split()

In [23]:
new_index

['AB', 'CD', 'EF', 'GH', 'IJ']

In [24]:
df["Double_Letters"] = new_index

In [25]:
df

Unnamed: 0,X,Y,Z,T,Double_Letters
A,2.70685,0.628133,0.907969,0.503826,AB
B,0.651118,-0.319318,-0.848077,0.605965,CD
C,-2.018168,0.740122,0.528813,-0.589001,EF
D,0.188695,-0.758872,-0.933237,0.955057,GH
E,0.190794,1.978757,2.605967,0.683509,IJ


In [26]:
df.set_index("Double_Letters")

Unnamed: 0_level_0,X,Y,Z,T
Double_Letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AB,2.70685,0.628133,0.907969,0.503826
CD,0.651118,-0.319318,-0.848077,0.605965
EF,-2.018168,0.740122,0.528813,-0.589001
GH,0.188695,-0.758872,-0.933237,0.955057
IJ,0.190794,1.978757,2.605967,0.683509


In [27]:
df

Unnamed: 0,X,Y,Z,T,Double_Letters
A,2.70685,0.628133,0.907969,0.503826,AB
B,0.651118,-0.319318,-0.848077,0.605965,CD
C,-2.018168,0.740122,0.528813,-0.589001,EF
D,0.188695,-0.758872,-0.933237,0.955057,GH
E,0.190794,1.978757,2.605967,0.683509,IJ


In [28]:
df.set_index("Double_Letters", inplace = True)

# to occur in place what you would do is say inplace = True

In [29]:
df

Unnamed: 0_level_0,X,Y,Z,T
Double_Letters,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AB,2.70685,0.628133,0.907969,0.503826
CD,0.651118,-0.319318,-0.848077,0.605965
EF,-2.018168,0.740122,0.528813,-0.589001
GH,0.188695,-0.758872,-0.933237,0.955057
IJ,0.190794,1.978757,2.605967,0.683509
