# Pandas

In [1]:
import numpy as np

In [2]:
import pandas as pd

In [3]:
labels = ['a','b','c']
my_data = [10,20,30]
arr = np.array(my_data)

In [4]:
d = {'a':10, 'b':20,'c':30}

In [7]:
pd.Series(data = my_data)

0    10
1    20
2    30
dtype: int64

In [8]:
pd.Series(data=my_data,index = labels)

a    10
b    20
c    30
dtype: int64

In [10]:
pd.Series(arr,labels)

a    10
b    20
c    30
dtype: int64

In [11]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [12]:
ser1 = pd.Series([1,2,3,4],['USA','Germany', 'USSR', 'Japan'])

In [13]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [14]:
ser2 = pd.Series([1,2,5,4],['USA', 'India', 'Germany', 'Japan'])

In [15]:
ser2

USA        1
India      2
Germany    5
Japan      4
dtype: int64

In [16]:
ser1['USA']

1

In [17]:
ser2['Germany']

5

In [18]:
ser1 + ser2

Germany    7.0
India      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64

# Data Frames 

In [20]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [21]:
np.random.seed(101)

In [27]:
df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())

In [28]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [29]:
df['W']

A   -1.467514
B    0.392489
C    0.666319
D    0.641806
E   -1.972605
Name: W, dtype: float64

In [30]:
type(df)

pandas.core.frame.DataFrame

In [31]:
df.W

A   -1.467514
B    0.392489
C    0.666319
D    0.641806
E   -1.972605
Name: W, dtype: float64

In [32]:
df[['W','Z']]

Unnamed: 0,W,Z
A,-1.467514,0.485809
B,0.392489,1.54199
C,0.666319,1.407338
D,0.641806,1.028293
E,-1.972605,-1.223082


In [35]:
df['new'] = df['W'] + df['Y']

In [36]:
df

Unnamed: 0,W,X,Y,Z,new
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


In [37]:
df.drop('new',axis = 1)

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [39]:
df

Unnamed: 0,W,X,Y,Z,new
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


In [40]:
df.drop('new',axis=1,inplace = True)

In [41]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [44]:
df.drop('E')

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [45]:
df.shape

(5, 4)

In [46]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [47]:
# Rows

In [48]:
df.loc['A']

W   -1.467514
X   -0.494095
Y   -0.162535
Z    0.485809
Name: A, dtype: float64

In [49]:
# index position 

In [52]:
df.iloc[0]

W   -1.467514
X   -0.494095
Y   -0.162535
Z    0.485809
Name: A, dtype: float64

In [53]:
df.loc['B','Y']

-0.8551960407780934

In [54]:
df.loc[['A','B'],['W','Y']]

Unnamed: 0,W,Y
A,-1.467514,-0.162535
B,0.392489,-0.855196


## Conditional Selection

In [55]:
bool_df = df > 0

In [56]:
df[bool_df]

Unnamed: 0,W,X,Y,Z
A,,,,0.485809
B,0.392489,0.221491,,1.54199
C,0.666319,,,1.407338
D,0.641806,,,1.028293
E,,,0.720788,


In [57]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [58]:
df['W']>0

A    False
B     True
C     True
D     True
E    False
Name: W, dtype: bool

In [60]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [61]:
df[df['W']>0]['X']

B    0.221491
C   -0.538235
D   -0.905100
Name: X, dtype: float64

In [63]:
boolser = df['W']>0
result = df[boolser]
mycols = ['Y','X']
result[mycols]

Unnamed: 0,Y,X
B,-0.855196,0.221491
C,-0.568581,-0.538235
D,-0.391157,-0.9051


## Multiple Conditions

In [69]:
df[(df['W']>0) & (df['Z']>1)]

Unnamed: 0,W,X,Y,Z
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [71]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,-1.467514,-0.494095,-0.162535,0.485809
1,B,0.392489,0.221491,-0.855196,1.54199
2,C,0.666319,-0.538235,-0.568581,1.407338
3,D,0.641806,-0.9051,-0.391157,1.028293
4,E,-1.972605,-0.866885,0.720788,-1.223082


In [72]:
newind = 'CA NY WR OR CO'.split()

In [73]:
newind

['CA', 'NY', 'WR', 'OR', 'CO']

In [74]:
df['States'] = newind

In [75]:
df

Unnamed: 0,W,X,Y,Z,States
A,-1.467514,-0.494095,-0.162535,0.485809,CA
B,0.392489,0.221491,-0.855196,1.54199,NY
C,0.666319,-0.538235,-0.568581,1.407338,WR
D,0.641806,-0.9051,-0.391157,1.028293,OR
E,-1.972605,-0.866885,0.720788,-1.223082,CO


In [76]:
df.set_index('States')

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,-1.467514,-0.494095,-0.162535,0.485809
NY,0.392489,0.221491,-0.855196,1.54199
WR,0.666319,-0.538235,-0.568581,1.407338
OR,0.641806,-0.9051,-0.391157,1.028293
CO,-1.972605,-0.866885,0.720788,-1.223082


In [77]:
df

Unnamed: 0,W,X,Y,Z,States
A,-1.467514,-0.494095,-0.162535,0.485809,CA
B,0.392489,0.221491,-0.855196,1.54199,NY
C,0.666319,-0.538235,-0.568581,1.407338,WR
D,0.641806,-0.9051,-0.391157,1.028293,OR
E,-1.972605,-0.866885,0.720788,-1.223082,CO
