In [1]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [3]:
import numpy as np
import pandas as pd

## Series

In [5]:
myList = [1, 2, 3, 4]
myLabels = ['w', 'x', 'y', 'z']
myArr = np.array(myList)
myDict = {'a':1, 'b':2, 'c':3, 'd':4}

In [6]:
pd.Series(myList)

0    1
1    2
2    3
3    4
dtype: int64

In [8]:
mySer = pd.Series(data=myList, index=myLabels)
mySer

w    1
x    2
y    3
z    4
dtype: int64

In [9]:
mySer[2]

3

In [10]:
mySer['y']

3

In [11]:
pd.Series(myArr)

0    1
1    2
2    3
3    4
dtype: int64

In [12]:
pd.Series(myDict)

a    1
b    2
c    3
d    4
dtype: int64

## DataFrame

In [16]:
df = pd.DataFrame(np.random.randint(1, 100, (5, 6)), 
             index=['A', 'B', 'C', 'D', 'E'],
            columns='U V W X Y Z'.split())

df

Unnamed: 0,U,V,W,X,Y,Z
A,53,33,33,52,45,24
B,14,68,2,64,58,78
C,9,70,19,19,25,40
D,66,60,22,85,31,76
E,35,94,15,16,23,98


In [23]:
df['V'] #Grab a column from the dataframe

A    33
B    68
C    70
D    60
E    94
Name: V, dtype: int64

In [24]:
df['V']['A']

33

In [25]:
df['V'][0]

33

In [20]:
df[['V', 'X']] #Grab multiple columns from the dataframe

Unnamed: 0,V,X
A,33,52
B,68,64
C,70,19
D,60,85
E,94,16


In [26]:
df['New'] = df['V'] + df['X'] #Creating a new column

In [27]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,53,33,33,52,45,24,85
B,14,68,2,64,58,78,132
C,9,70,19,19,25,40,89
D,66,60,22,85,31,76,145
E,35,94,15,16,23,98,110


In [28]:
df['New'] = 100 #Overwriting an existing column

In [29]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,53,33,33,52,45,24,100
B,14,68,2,64,58,78,100
C,9,70,19,19,25,40,100
D,66,60,22,85,31,76,100
E,35,94,15,16,23,98,100


In [31]:
df.drop('New', axis=1) #Temporarily drop a column or row

Unnamed: 0,U,V,W,X,Y,Z
A,53,33,33,52,45,24
B,14,68,2,64,58,78
C,9,70,19,19,25,40
D,66,60,22,85,31,76
E,35,94,15,16,23,98


In [32]:
df

Unnamed: 0,U,V,W,X,Y,Z,New
A,53,33,33,52,45,24,100
B,14,68,2,64,58,78,100
C,9,70,19,19,25,40,100
D,66,60,22,85,31,76,100
E,35,94,15,16,23,98,100


In [33]:
df.drop('New', axis=1, inplace=True) #Permanently drop a row or column

In [34]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,53,33,33,52,45,24
B,14,68,2,64,58,78
C,9,70,19,19,25,40
D,66,60,22,85,31,76
E,35,94,15,16,23,98


In [35]:
df.shape

(5, 6)

In [36]:
df['U']

A    53
B    14
C     9
D    66
E    35
Name: U, dtype: int64

In [37]:
df.loc['B'] #To fetch a row

U    14
V    68
W     2
X    64
Y    58
Z    78
Name: B, dtype: int64

In [39]:
df.iloc[1] #To fetch a row by its index position

U    14
V    68
W     2
X    64
Y    58
Z    78
Name: B, dtype: int64

## Conditional Selection

In [40]:
df

Unnamed: 0,U,V,W,X,Y,Z
A,53,33,33,52,45,24
B,14,68,2,64,58,78
C,9,70,19,19,25,40
D,66,60,22,85,31,76
E,35,94,15,16,23,98


In [42]:
df['X'] %2 == 0

A     True
B     True
C    False
D    False
E     True
Name: X, dtype: bool

In [43]:
df[df['X'] %2 == 0]

Unnamed: 0,U,V,W,X,Y,Z
A,53,33,33,52,45,24
B,14,68,2,64,58,78
E,35,94,15,16,23,98


In [44]:
df[df['X'] %2 == 0]['Z']

A    24
B    78
E    98
Name: Z, dtype: int64

## Importing a CSV File

In [46]:
heart = pd.read_csv('heart.csv')
heart

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0
