In [4]:
#Indexing and selecting data
import pandas as pd
import numpy as np
df = pd.DataFrame(np.random.randn(5, 3), columns=list('ABC'))
df

Unnamed: 0,A,B,C
0,0.488942,0.356326,0.380714
1,-0.268047,1.402934,-1.521396
2,-1.211578,-0.510327,0.547816
3,-0.300512,1.090424,0.944262
4,-0.428688,-2.701438,-1.748396


In [5]:
# Select column using a single label, 'A'
df['A']

0    0.488942
1   -0.268047
2   -1.211578
3   -0.300512
4   -0.428688
Name: A, dtype: float64

In [6]:
# Select multiple columns using an array of labels, ['A', 'C']
df[['A', 'C']]

Unnamed: 0,A,C
0,0.488942,0.380714
1,-0.268047,-1.521396
2,-1.211578,0.547816
3,-0.300512,0.944262
4,-0.428688,-1.748396


In [7]:
df = pd.DataFrame([[11, 22], [33, 44], [55, 66]], index=list("abc"))
df

Unnamed: 0,0,1
a,11,22
b,33,44
c,55,66


In [8]:
df.iloc[0] 

0    11
1    22
Name: a, dtype: int64

In [9]:
df.iloc[1] 

0    33
1    44
Name: b, dtype: int64

In [10]:
df.iloc[:2] 

Unnamed: 0,0,1
a,11,22
b,33,44


In [11]:
df[::-1]

Unnamed: 0,0,1
c,55,66
b,33,44
a,11,22


In [12]:
#Row location can be combined with column location
df.iloc[:, 1] 

a    22
b    44
c    66
Name: 1, dtype: int64

In [14]:
np.random.seed(5)
df = pd.DataFrame(np.random.randint(100, size=(5, 5)), columns = list("ABCDE"),
 index = ["R" + str(i) for i in range(5)])
df

Unnamed: 0,A,B,C,D,E
R0,99,78,61,16,73
R1,8,62,27,30,80
R2,7,76,15,53,80
R3,27,44,77,75,65
R4,47,30,84,86,18


In [15]:
df.loc['R0':'R2']

Unnamed: 0,A,B,C,D,E
R0,99,78,61,16,73
R1,8,62,27,30,80
R2,7,76,15,53,80


In [16]:
df.iloc[0:2] 

Unnamed: 0,A,B,C,D,E
R0,99,78,61,16,73
R1,8,62,27,30,80


In [17]:
df.loc[:, 'C':'E']

Unnamed: 0,C,D,E
R0,61,16,73
R1,27,30,80
R2,15,53,80
R3,77,75,65
R4,84,86,18


In [26]:
#Boolean indexing
np.random.seed(5)
df = pd.DataFrame(np.random.randint(100, size=(5, 5)), columns = list("ABCDE"),
 index = ["R" + str(i) for i in range(5)])
print (df)


     A   B   C   D   E
R0  99  78  61  16  73
R1   8  62  27  30  80
R2   7  76  15  53  80
R3  27  44  77  75  65
R4  47  30  84  86  18


In [27]:
mask = df['A'] > 10
print (mask)

R0     True
R1    False
R2    False
R3     True
R4     True
Name: A, dtype: bool


In [28]:
print (df[mask])

     A   B   C   D   E
R0  99  78  61  16  73
R3  27  44  77  75  65
R4  47  30  84  86  18


In [31]:
print (df.loc[mask, 'C'])


R0    61
R3    77
R4    84
Name: C, dtype: int32


In [33]:
df = pd.DataFrame(np.random.randint(0, 10, size=(5, 6)),
columns=['a10','a20','a25','b','c','d'])
df

Unnamed: 0,a10,a20,a25,b,c,d
0,1,9,5,7,0,9
1,6,0,5,2,8,6
2,8,0,5,2,0,7
3,7,6,0,0,8,5
4,5,9,6,4,5,2


In [34]:
df.filter(like='a')

Unnamed: 0,a10,a20,a25
0,1,9,5
1,6,0,5
2,8,0,5
3,7,6,0
4,5,9,6


In [35]:
df.filter(regex='(b|c|d)')

Unnamed: 0,b,c,d
0,7,0,9
1,2,8,6
2,2,0,7
3,0,8,5
4,4,5,2


In [36]:
df.loc[:, ~df.columns.str.contains('^a')]


Unnamed: 0,b,c,d
0,7,0,9
1,2,8,6
2,2,0,7
3,0,8,5
4,4,5,2


In [37]:
df = pd.DataFrame(np.random.randint(0,10,size=(10, 3)), columns=list('ABC'))
print(df)


   A  B  C
0  8  8  1
1  6  3  4
2  1  8  0
3  2  2  4
4  1  6  3
5  4  3  1
6  4  2  3
7  4  9  4
8  0  6  6
9  9  2  9


In [38]:
df.query('A > 2 and B < 5')

Unnamed: 0,A,B,C
1,6,3,4
5,4,3,1
6,4,2,3
9,9,2,9


In [40]:
B_filter = [1,7]
B_filter

[1, 7]

In [44]:
df.query('B == @B_filter')

Unnamed: 0,A,B,C


In [42]:
df.query('@B_filter in B')

Unnamed: 0,A,B,C


In [51]:
pd.read_json('[{"A": 1, "B": 2}, {"A": 3, "B": 4}]')

Unnamed: 0,A,B
0,1,2
1,3,4


In [53]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [1.0, 2.0, 3.0], 'C': ['a', 'b', 'c'],
 'D': [True, False, True]})
df

Unnamed: 0,A,B,C,D
0,1,1.0,a,True
1,2,2.0,b,False
2,3,3.0,c,True


In [54]:
 df['A'].tolist()

[1, 2, 3]

In [55]:
df.tolist()

AttributeError: 'DataFrame' object has no attribute 'tolist'

In [56]:
df['B'].values

array([1., 2., 3.])

In [57]:
df.values

array([[1, 1.0, 'a', True],
       [2, 2.0, 'b', False],
       [3, 3.0, 'c', True]], dtype=object)

In [58]:
df['C'].to_dict()

{0: 'a', 1: 'b', 2: 'c'}

In [59]:
df.to_dict()

{'A': {0: 1, 1: 2, 2: 3},
 'B': {0: 1.0, 1: 2.0, 2: 3.0},
 'C': {0: 'a', 1: 'b', 2: 'c'},
 'D': {0: True, 1: False, 2: True}}

In [65]:
#Map Values
data = {'U': [111, 112, 112, 113, 113, 113, 114],
        'L': ['en', 'en', 'es', 'es', 'ja', 'zh', 'es']}
df = pd.DataFrame(data)
df

Unnamed: 0,U,L
0,111,en
1,112,en
2,112,es
3,113,es
4,113,ja
5,113,zh
6,114,es


In [62]:
d = {112: 'en', 113: 'es', 114: 'es', 111: 'en'}
d

{112: 'en', 113: 'es', 114: 'es', 111: 'en'}

In [67]:
df['S'] = df['U'].map(d)
df

Unnamed: 0,U,L,S
0,111,en,en
1,112,en,en
2,112,es,en
3,113,es,es
4,113,ja,es
5,113,zh,es
6,114,es,es


In [69]:
#Merging two DataFrames
df1 = pd.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
df2 = pd.DataFrame({'y': ['b', 'c', 'd'], 'z': [4, 5, 6]})
df1

Unnamed: 0,x,y
0,1,a
1,2,b
2,3,c


In [70]:
df2

Unnamed: 0,y,z
0,b,4
1,c,5
2,d,6


In [71]:
df1.merge(df2) # by default, it does an inner join on the common column(s)


Unnamed: 0,x,y,z
0,2,b,4
1,3,c,5


In [73]:
merged_inner = pd.merge(left=df1, right=df2, left_on='y', right_on='y')
merged_inner

Unnamed: 0,x,y,z
0,2,b,4
1,3,c,5


In [74]:
 df1.merge(df2, how='outer')

Unnamed: 0,x,y,z
0,1.0,a,
1,2.0,b,4.0
2,3.0,c,5.0
3,,d,6.0


In [75]:
 df1.merge(df2, how='left')

Unnamed: 0,x,y,z
0,1,a,
1,2,b,4.0
2,3,c,5.0


In [76]:
 df1.merge(df2, how='right')

Unnamed: 0,x,y,z
0,2.0,b,4
1,3.0,c,5
2,,d,6


In [78]:
#Merging / concatenating / joining multiple data frames (horizontally and
#vertically)

df3 = pd.DataFrame({'col1':[211,212,213], 'col2': [221,222,223]})
df3

Unnamed: 0,col1,col2
0,211,221
1,212,222
2,213,223


In [79]:
df1 = pd.DataFrame({'col1':[11,12,13], 'col2': [21,22,23]})
df1

Unnamed: 0,col1,col2
0,11,21
1,12,22
2,13,23


In [80]:
df2 = pd.DataFrame({'col1':[111,112,113], 'col2': [121,122,123]})
df2

Unnamed: 0,col1,col2
0,111,121
1,112,122
2,113,123


In [83]:
pd.concat([df1,df2,df3], ignore_index=True) #vertically


Unnamed: 0,col1,col2
0,11,21
1,12,22
2,13,23
3,111,121
4,112,122
5,113,123
6,211,221
7,212,222
8,213,223


In [84]:
pd.concat([df1,df2,df3], axis=1) #horizontally

Unnamed: 0,col1,col2,col1.1,col2.1,col1.2,col2.2
0,11,21,111,121,211,221
1,12,22,112,122,212,222
2,13,23,113,123,213,223
