In [1]:
import pandas as pd
import numpy as np 

# Series and Dataframes
- Subsetting
- Arithmetic, mapping
- Missing Data
- Sorting and Ranking
- Heirarchical Indexing

In [2]:
labels = ['a','b','c','d']

In [3]:
list = [50,60,70,47]

In [6]:
array = np.array([50,60,70,47])
print(array)

[50 60 70 47]


In [8]:
dict = {'a':50, 'b':60, 'c':70, 'd':47}
dict

{'a': 50, 'b': 60, 'c': 70, 'd': 47}

In [9]:
pd.Series(data=list)

0    50
1    60
2    70
3    47
dtype: int64

In [10]:
pd.Series(data=list, index=labels)

a    50
b    60
c    70
d    47
dtype: int64

In [11]:
pd.Series(list,labels)

a    50
b    60
c    70
d    47
dtype: int64

In [12]:
pd.Series(array)

0    50
1    60
2    70
3    47
dtype: int32

In [14]:
pd.Series(array,labels)

a    50
b    60
c    70
d    47
dtype: int32

In [15]:
pd.Series(dict)

a    50
b    60
c    70
d    47
dtype: int64

# DataFrames and Indexing

In [17]:
club1 = pd.Series([11,12,16,4],index = ['TaeKwondo', 'Karate','Kungfu', 'jujitsu'])  

In [18]:
club1

TaeKwondo    11
Karate       12
Kungfu       16
jujitsu       4
dtype: int64

In [19]:
club1['TaeKwondo']

11

In [20]:
club1[0]

11

In [22]:
club2 = pd.Series([1,2,5,4],index = ['TaeKwondo', 'Karate', 'Judo', 'jujitsu'])  
club2

TaeKwondo    1
Karate       2
Judo         5
jujitsu      4
dtype: int64

In [23]:
club1 + club2

Judo          NaN
Karate       14.0
Kungfu        NaN
TaeKwondo    12.0
jujitsu       8.0
dtype: float64

In [24]:
from numpy.random import randn

In [25]:
df = pd.DataFrame(randn(10,5),index='x1 x2 x3 x4 x5 x6 x7 x8 x9 x10'.split(),
                         columns='points1 points2 points3 points4 points5'.split())

In [26]:
df

Unnamed: 0,points1,points2,points3,points4,points5
x1,-0.305457,-1.262025,-0.3862,0.393911,-1.130036
x2,0.008069,-1.431712,0.450144,-0.819828,0.450915
x3,-1.98381,-1.10697,1.132953,0.317977,0.496579
x4,1.208876,0.198858,-0.198328,0.476241,0.303265
x5,-2.097059,1.130494,0.270889,2.410209,-0.918399
x6,1.372714,-1.118173,0.528556,-0.116042,0.187865
x7,0.435378,-0.781918,-1.25711,-0.691294,-0.969927
x8,-0.320544,0.250852,0.603797,-1.049329,-0.942046
x9,1.768448,0.247238,-0.513339,0.352558,-0.779641
x10,-0.287796,1.396154,1.136272,0.1003,-0.348978


In [27]:
df['points3']

x1    -0.386200
x2     0.450144
x3     1.132953
x4    -0.198328
x5     0.270889
x6     0.528556
x7    -1.257110
x8     0.603797
x9    -0.513339
x10    1.136272
Name: points3, dtype: float64

In [28]:
df[['points2', 'points3']]

Unnamed: 0,points2,points3
x1,-1.262025,-0.3862
x2,-1.431712,0.450144
x3,-1.10697,1.132953
x4,0.198858,-0.198328
x5,1.130494,0.270889
x6,-1.118173,0.528556
x7,-0.781918,-1.25711
x8,0.250852,0.603797
x9,0.247238,-0.513339
x10,1.396154,1.136272


In [34]:
df['points6'] = df['points1'] + df['points2']

In [30]:
df

Unnamed: 0,points1,points2,points3,points4,points5,points6
x1,-0.305457,-1.262025,-0.3862,0.393911,-1.130036,-1.567482
x2,0.008069,-1.431712,0.450144,-0.819828,0.450915,-1.423643
x3,-1.98381,-1.10697,1.132953,0.317977,0.496579,-3.090779
x4,1.208876,0.198858,-0.198328,0.476241,0.303265,1.407733
x5,-2.097059,1.130494,0.270889,2.410209,-0.918399,-0.966565
x6,1.372714,-1.118173,0.528556,-0.116042,0.187865,0.254541
x7,0.435378,-0.781918,-1.25711,-0.691294,-0.969927,-0.346539
x8,-0.320544,0.250852,0.603797,-1.049329,-0.942046,-0.069692
x9,1.768448,0.247238,-0.513339,0.352558,-0.779641,2.015686
x10,-0.287796,1.396154,1.136272,0.1003,-0.348978,1.108358


In [36]:
df.drop('points4',axis=1, inplace=True) 
df

Unnamed: 0,points1,points2,points3,points6
x1,-0.305457,-1.262025,-0.3862,-1.567482
x2,0.008069,-1.431712,0.450144,-1.423643
x3,-1.98381,-1.10697,1.132953,-3.090779
x4,1.208876,0.198858,-0.198328,1.407733
x5,-2.097059,1.130494,0.270889,-0.966565
x6,1.372714,-1.118173,0.528556,0.254541
x7,0.435378,-0.781918,-1.25711,-0.346539
x8,-0.320544,0.250852,0.603797,-0.069692
x9,1.768448,0.247238,-0.513339,2.015686
x10,-0.287796,1.396154,1.136272,1.108358


In [38]:
df.drop('x8',axis=0)

Unnamed: 0,points1,points2,points3,points6
x1,-0.305457,-1.262025,-0.3862,-1.567482
x2,0.008069,-1.431712,0.450144,-1.423643
x3,-1.98381,-1.10697,1.132953,-3.090779
x4,1.208876,0.198858,-0.198328,1.407733
x5,-2.097059,1.130494,0.270889,-0.966565
x6,1.372714,-1.118173,0.528556,0.254541
x7,0.435378,-0.781918,-1.25711,-0.346539
x9,1.768448,0.247238,-0.513339,2.015686
x10,-0.287796,1.396154,1.136272,1.108358


In [39]:
df.loc['x4']

points1    1.208876
points2    0.198858
points3   -0.198328
points6    1.407733
Name: x4, dtype: float64

In [40]:
df.iloc[2]

points1   -1.983810
points2   -1.106970
points3    1.132953
points6   -3.090779
Name: x3, dtype: float64

In [41]:
df.loc['x3','points1']

-1.9838096156916

In [42]:
df.loc[['x2','x3'],['points1','points2']]

Unnamed: 0,points1,points2
x2,0.008069,-1.431712
x3,-1.98381,-1.10697


### Conditional Selection

 Similar to NumPy, we can make conditional selections using Brackets

In [47]:
df>0.5

Unnamed: 0,points1,points2,points3,points6
x1,False,False,False,False
x2,False,False,False,False
x3,False,False,True,False
x4,True,False,False,True
x5,False,True,False,False
x6,True,False,True,False
x7,False,False,False,False
x8,False,False,True,False
x9,True,False,False,True
x10,False,True,True,True


In [49]:
df[df>0.5]

Unnamed: 0,points1,points2,points3,points6
x1,,,,
x2,,,,
x3,,,1.132953,
x4,1.208876,,,1.407733
x5,,1.130494,,
x6,1.372714,,0.528556,
x7,,,,
x8,,,0.603797,
x9,1.768448,,,2.015686
x10,,1.396154,1.136272,1.108358


In [50]:
df[df['points2']>0.5]

Unnamed: 0,points1,points2,points3,points6
x5,-2.097059,1.130494,0.270889,-0.966565
x10,-0.287796,1.396154,1.136272,1.108358


In [51]:
df[df['points1']>0.5]['points2']

x4    0.198858
x6   -1.118173
x9    0.247238
Name: points2, dtype: float64

In [52]:
df[df['points1']>0.5][['points2','points3']]

Unnamed: 0,points2,points3
x4,0.198858,-0.198328
x6,-1.118173,0.528556
x9,0.247238,-0.513339


In [None]:
df[(df['points1']>0.5) & (df['points2'] > 0)]