# Pandas

# Series

In [1]:
import pandas as pd

In [2]:
obj1 = pd.Series([11,22,33,44])

In [3]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [4]:
obj1.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
obj1.values

array([11, 22, 33, 44], dtype=int64)

In [6]:
obj1.value_counts

<bound method IndexOpsMixin.value_counts of 0    11
1    22
2    33
3    44
dtype: int64>

# Reindexing

In [7]:
obj2 = pd.Series([11,22,33,44], index = ['a', 'b', 'c', 'd']) # here we define index manually

In [8]:
obj2

a    11
b    22
c    33
d    44
dtype: int64

In [9]:
obj2['c']

33

In [10]:
obj2['c'] = 34

In [11]:
obj2

a    11
b    22
c    34
d    44
dtype: int64

In [12]:
del obj2['c']

In [13]:
obj2

a    11
b    22
d    44
dtype: int64

In [14]:
obj2['c'] = 34

In [15]:
obj2

a    11
b    22
d    44
c    34
dtype: int64

In [16]:
index = [1,2,3,4]

In [17]:
obj3 = pd.Series(['Saad', 'Amim', 'Ali', 'Moiz'], index = index)

In [18]:
obj3

1    Saad
2    Amim
3     Ali
4    Moiz
dtype: object

In [19]:
obj3[3]

'Ali'

In [20]:
obj3 == 'Saad'

1     True
2    False
3    False
4    False
dtype: bool

In [21]:
obj3 == 'Khalil'

1    False
2    False
3    False
4    False
dtype: bool

In [22]:
obj3[obj3 == 'Saad']

1    Saad
dtype: object

In [23]:
obj3[obj3 == 'Khalil']

Series([], dtype: object)

# Arithmetic And Data Alignment

In [24]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [25]:
obj1+1

0    12
1    23
2    34
3    45
dtype: int64

In [26]:
obj3

1    Saad
2    Amim
3     Ali
4    Moiz
dtype: object

In [27]:
obj3*2

1    SaadSaad
2    AmimAmim
3      AliAli
4    MoizMoiz
dtype: object

In [28]:
obj1*2

0    22
1    44
2    66
3    88
dtype: int64

In [29]:
obj1-2

0     9
1    20
2    31
3    42
dtype: int64

In [30]:
obj1//2

0     5
1    11
2    16
3    22
dtype: int64

In [31]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [32]:
obj2

a    11
b    22
d    44
c    34
dtype: int64

In [33]:
obj1+obj2

0   NaN
1   NaN
2   NaN
3   NaN
a   NaN
b   NaN
c   NaN
d   NaN
dtype: float64

In [34]:
obj4 = pd.Series([11,33,22,44], index = [0,1,2,3])

In [35]:
obj4

0    11
1    33
2    22
3    44
dtype: int64

In [36]:
obj1+obj4

0    22
1    55
2    55
3    88
dtype: int64

In [37]:
obj1-obj4

0     0
1   -11
2    11
3     0
dtype: int64

In [38]:
obj1*obj4

0     121
1     726
2     726
3    1936
dtype: int64

In [39]:
obj4//obj1

0    1
1    1
2    0
3    1
dtype: int64

In [40]:
obj5 = pd.Series([100,200,300,400,500], index = ['1h', '2h', '3h', '4h', '5h'])

In [41]:
obj5

1h    100
2h    200
3h    300
4h    400
5h    500
dtype: int64

In [42]:
'1h' in obj5

True

In [43]:
'6h' in obj5

False

Creating a series using python dictionary

In [44]:
sdata = {'Karachi':"Mazar-e-Quaid", 'Lahore':"Minar-e-Pakistan", 'Peshawar':"Bab-e-Khaybar", 'Multan':"Halwa"}
#         index   :     value        index  :      value           index   :     value        index  : value

In [45]:
s1 = pd.Series(sdata)

In [46]:
s1

Karachi        Mazar-e-Quaid
Lahore      Minar-e-Pakistan
Peshawar       Bab-e-Khaybar
Multan                 Halwa
dtype: object

In [47]:
city = ['Karachi', 'Multan', 'Lahore', 'Islamabad']

In [48]:
s2 = pd.Series(sdata, index = city)

In [49]:
s2

Karachi         Mazar-e-Quaid
Multan                  Halwa
Lahore       Minar-e-Pakistan
Islamabad                 NaN
dtype: object

In [50]:
pd.isnull(s2)

Karachi      False
Multan       False
Lahore       False
Islamabad     True
dtype: bool

In [51]:
s2[pd.isnull(s2)]

Islamabad    NaN
dtype: object

In [52]:
pd.notnull(s2)

Karachi       True
Multan        True
Lahore        True
Islamabad    False
dtype: bool

In [53]:
s2[pd.notnull(s2)]

Karachi       Mazar-e-Quaid
Multan                Halwa
Lahore     Minar-e-Pakistan
dtype: object

In [54]:
obj6 = pd.Series(['Blue', 'Green', 'White'], index = [0,2,4])

In [55]:
obj6

0     Blue
2    Green
4    White
dtype: object

In [56]:
obj6.reindex(range(6), method = 'ffill')

0     Blue
1     Blue
2    Green
3    Green
4    White
5    White
dtype: object

In [57]:
obj6.reindex(range(8), method = 'ffill')

0     Blue
1     Blue
2    Green
3    Green
4    White
5    White
6    White
7    White
dtype: object

# Data Frame

In [58]:
data = {'City':['Karachi', 'Lahore', 'Islamabad', 'Multan'],
         'Population':[4.0, 1.5, 1.0, 0.8],
         'Year':[2019, 2019, 2019, 2019]}

In [59]:
frame = pd.DataFrame(data)

In [60]:
frame

Unnamed: 0,City,Population,Year
0,Karachi,4.0,2019
1,Lahore,1.5,2019
2,Islamabad,1.0,2019
3,Multan,0.8,2019


In [61]:
frame2 = pd.DataFrame(data, columns = ['Year', 'City', 'Population'])

In [62]:
frame2

Unnamed: 0,Year,City,Population
0,2019,Karachi,4.0
1,2019,Lahore,1.5
2,2019,Islamabad,1.0
3,2019,Multan,0.8


In [63]:
frame.describe()

Unnamed: 0,Population,Year
count,4.0,4.0
mean,1.825,2019.0
std,1.479583,0.0
min,0.8,2019.0
25%,0.95,2019.0
50%,1.25,2019.0
75%,2.125,2019.0
max,4.0,2019.0


In [64]:
frame2.describe()

Unnamed: 0,Year,Population
count,4.0,4.0
mean,2019.0,1.825
std,0.0,1.479583
min,2019.0,0.8
25%,2019.0,0.95
50%,2019.0,1.25
75%,2019.0,2.125
max,2019.0,4.0


In [65]:
frame2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
Year          4 non-null int64
City          4 non-null object
Population    4 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 176.0+ bytes


In [66]:
frame3 = pd.DataFrame(data, columns = ['City', 'Population', 'Year', 'Location'], index = ['i', 'ii', 'iii', 'iv'])

In [67]:
frame3

Unnamed: 0,City,Population,Year,Location
i,Karachi,4.0,2019,
ii,Lahore,1.5,2019,
iii,Islamabad,1.0,2019,
iv,Multan,0.8,2019,


In [68]:
frame3['Location'] = 'Pakistan'

In [69]:
frame3

Unnamed: 0,City,Population,Year,Location
i,Karachi,4.0,2019,Pakistan
ii,Lahore,1.5,2019,Pakistan
iii,Islamabad,1.0,2019,Pakistan
iv,Multan,0.8,2019,Pakistan


In [70]:
frame3 = pd.DataFrame(data, columns = ['Serial', 'City', 'Population', 'Year', 'Location'],
                      index = ['i', 'ii', 'iii', 'iv'])

In [71]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,,Karachi,4.0,2019,
ii,,Lahore,1.5,2019,
iii,,Islamabad,1.0,2019,
iv,,Multan,0.8,2019,


In [72]:
frame3['Location'] = 'Pakistan'

In [73]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,,Karachi,4.0,2019,Pakistan
ii,,Lahore,1.5,2019,Pakistan
iii,,Islamabad,1.0,2019,Pakistan
iv,,Multan,0.8,2019,Pakistan


In [74]:
frame3['Serial'] = 1

In [75]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,1,Lahore,1.5,2019,Pakistan
iii,1,Islamabad,1.0,2019,Pakistan
iv,1,Multan,0.8,2019,Pakistan


In [76]:
frame3['Serial'] = [1,2,3,4]

In [77]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [78]:
frame3['Serial'] = range(1,5)

In [79]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [80]:
frame3.columns

Index(['Serial', 'City', 'Population', 'Year', 'Location'], dtype='object')

In [81]:
frame3.Serial

i      1
ii     2
iii    3
iv     4
Name: Serial, dtype: int32

In [82]:
frame3.City

i        Karachi
ii        Lahore
iii    Islamabad
iv        Multan
Name: City, dtype: object

In [83]:
frame3['City']

i        Karachi
ii        Lahore
iii    Islamabad
iv        Multan
Name: City, dtype: object

In [84]:
frame3[['City']]

Unnamed: 0,City
i,Karachi
ii,Lahore
iii,Islamabad
iv,Multan


In [85]:
frame3[['City', 'Population']]

Unnamed: 0,City,Population
i,Karachi,4.0
ii,Lahore,1.5
iii,Islamabad,1.0
iv,Multan,0.8


In [86]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [87]:
frame3.loc['i']

Serial               1
City           Karachi
Population           4
Year              2019
Location      Pakistan
Name: i, dtype: object

In [88]:
frame3.iloc[1]

Serial               2
City            Lahore
Population         1.5
Year              2019
Location      Pakistan
Name: ii, dtype: object

In [94]:
data2 = pd.read_csv('banknotes.csv')

In [95]:
data2

Unnamed: 0,V1,V2,V3,V4,Class
0,3.621600,8.66610,-2.807300,-0.446990,1
1,4.545900,8.16740,-2.458600,-1.462100,1
2,3.866000,-2.63830,1.924200,0.106450,1
3,3.456600,9.52280,-4.011200,-3.594400,1
4,0.329240,-4.45520,4.571800,-0.988800,1
5,4.368400,9.67180,-3.960600,-3.162500,1
6,3.591200,3.01290,0.728880,0.564210,1
7,2.092200,-6.81000,8.463600,-0.602160,1
8,3.203200,5.75880,-0.753450,-0.612510,1
9,1.535600,9.17720,-2.271800,-0.735350,1


In [96]:
data2.head()

Unnamed: 0,V1,V2,V3,V4,Class
0,3.6216,8.6661,-2.8073,-0.44699,1
1,4.5459,8.1674,-2.4586,-1.4621,1
2,3.866,-2.6383,1.9242,0.10645,1
3,3.4566,9.5228,-4.0112,-3.5944,1
4,0.32924,-4.4552,4.5718,-0.9888,1


In [97]:
data2.tail()

Unnamed: 0,V1,V2,V3,V4,Class
1367,0.40614,1.3492,-1.4501,-0.55949,2
1368,-1.3887,-4.8773,6.4774,0.34179,2
1369,-3.7503,-13.4586,17.5932,-2.7771,2
1370,-3.5637,-8.3827,12.393,-1.2823,2
1371,-2.5419,-0.65804,2.6842,1.1952,2


In [98]:
data2.head(10)

Unnamed: 0,V1,V2,V3,V4,Class
0,3.6216,8.6661,-2.8073,-0.44699,1
1,4.5459,8.1674,-2.4586,-1.4621,1
2,3.866,-2.6383,1.9242,0.10645,1
3,3.4566,9.5228,-4.0112,-3.5944,1
4,0.32924,-4.4552,4.5718,-0.9888,1
5,4.3684,9.6718,-3.9606,-3.1625,1
6,3.5912,3.0129,0.72888,0.56421,1
7,2.0922,-6.81,8.4636,-0.60216,1
8,3.2032,5.7588,-0.75345,-0.61251,1
9,1.5356,9.1772,-2.2718,-0.73535,1


In [99]:
data2.describe()

Unnamed: 0,V1,V2,V3,V4,Class
count,1372.0,1372.0,1372.0,1372.0,1372.0
mean,0.433735,1.922353,1.397627,-1.191657,1.444606
std,2.842763,5.869047,4.31003,2.101013,0.497103
min,-7.0421,-13.7731,-5.2861,-8.5482,1.0
25%,-1.773,-1.7082,-1.574975,-2.41345,1.0
50%,0.49618,2.31965,0.61663,-0.58665,1.0
75%,2.821475,6.814625,3.17925,0.39481,2.0
max,6.8248,12.9516,17.9274,2.4495,2.0


In [100]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 5 columns):
V1       1372 non-null float64
V2       1372 non-null float64
V3       1372 non-null float64
V4       1372 non-null float64
Class    1372 non-null int64
dtypes: float64(4), int64(1)
memory usage: 53.7 KB


In [101]:
import numpy as np

In [102]:
frame4 = pd.DataFrame(np.arange(9).reshape(3,3), index = [1, 2, 3], columns = ['Karachi', 'Lahore', 'Multan'])

In [103]:
frame4

Unnamed: 0,Karachi,Lahore,Multan
1,0,1,2
2,3,4,5
3,6,7,8


In [104]:
frame5 = frame4.reindex([0, 1, 2, 3, 4,])

In [105]:
frame5

Unnamed: 0,Karachi,Lahore,Multan
0,,,
1,0.0,1.0,2.0
2,3.0,4.0,5.0
3,6.0,7.0,8.0
4,,,


In [106]:
cities = ('Haydarbad', 'Islamabad', 'Karachi')

In [107]:
frame5.reindex(columns = cities)

Unnamed: 0,Haydarbad,Islamabad,Karachi
0,,,
1,,,0.0
2,,,3.0
3,,,6.0
4,,,


In [108]:
val = pd.Series([-1.2, -3.3, -2.2], index = ['i', 'iii', 'iv'])

In [109]:
val

i     -1.2
iii   -3.3
iv    -2.2
dtype: float64

In [110]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [111]:
frame3.Serial = val

In [112]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,-1.2,Karachi,4.0,2019,Pakistan
ii,,Lahore,1.5,2019,Pakistan
iii,-3.3,Islamabad,1.0,2019,Pakistan
iv,-2.2,Multan,0.8,2019,Pakistan


In [115]:
frame3.Serial['i', 'ii', 'iv'] = [1,2,3]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [116]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1.0,Karachi,4.0,2019,Pakistan
ii,2.0,Lahore,1.5,2019,Pakistan
iii,-3.3,Islamabad,1.0,2019,Pakistan
iv,3.0,Multan,0.8,2019,Pakistan


In [117]:
frame3['Eastern'] = frame3.City == 'Karachi'

In [118]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location,Eastern
i,1.0,Karachi,4.0,2019,Pakistan,True
ii,2.0,Lahore,1.5,2019,Pakistan,False
iii,-3.3,Islamabad,1.0,2019,Pakistan,False
iv,3.0,Multan,0.8,2019,Pakistan,False


In [119]:
data3 = {'Name':['Saad', 'Ali', 'Amim', 'Mushtaq', 'Umair'],
         'Marks_In_AI':[99,18,77,66,55],
         'Marks_In_Python':[98,47,76,65,54],
         'Marks_In_Numpy':[97,56,75,64,13]}

In [120]:
score = pd.DataFrame(data3)

In [121]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy
0,Saad,99,98,97
1,Ali,18,47,56
2,Amim,77,76,75
3,Mushtaq,66,65,64
4,Umair,55,54,13


In [122]:
score['Total_Marks'] = 300

In [123]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks
0,Saad,99,98,97,300
1,Ali,18,47,56,300
2,Amim,77,76,75,300
3,Mushtaq,66,65,64,300
4,Umair,55,54,13,300


In [124]:
score['Obtained_Marks'] = score['Marks_In_AI']+score['Marks_In_Python']+score['Marks_In_Numpy']

In [125]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks
0,Saad,99,98,97,300,294
1,Ali,18,47,56,300,121
2,Amim,77,76,75,300,228
3,Mushtaq,66,65,64,300,195
4,Umair,55,54,13,300,122


In [126]:
score['Percentage'] = score['Obtained_Marks']/score['Total_Marks']*100

In [127]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks,Percentage
0,Saad,99,98,97,300,294,98.0
1,Ali,18,47,56,300,121,40.333333
2,Amim,77,76,75,300,228,76.0
3,Mushtaq,66,65,64,300,195,65.0
4,Umair,55,54,13,300,122,40.666667


In [128]:
score['Status'] = ['Passed' if percent>=50 else 'Failed' for percent in score['Percentage']]

In [129]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks,Percentage,Status
0,Saad,99,98,97,300,294,98.0,Passed
1,Ali,18,47,56,300,121,40.333333,Failed
2,Amim,77,76,75,300,228,76.0,Passed
3,Mushtaq,66,65,64,300,195,65.0,Passed
4,Umair,55,54,13,300,122,40.666667,Failed


In [130]:
score['Final_Status'] = ['Passed' if ai>=50 and py>=50 and np>=50 else 'Failed' 
                         for ai,py,np in zip(score.Marks_In_AI, score.Marks_In_Numpy, score.Marks_In_Python)]

In [131]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks,Percentage,Status,Final_Status
0,Saad,99,98,97,300,294,98.0,Passed,Passed
1,Ali,18,47,56,300,121,40.333333,Failed,Failed
2,Amim,77,76,75,300,228,76.0,Passed,Passed
3,Mushtaq,66,65,64,300,195,65.0,Passed,Passed
4,Umair,55,54,13,300,122,40.666667,Failed,Failed


In [132]:
pop = {'Karachi':{2000:1.5, 2010:3.0, 2019:4.0},
       'Lahore':{2000:0.5, 2011:1.0, 2019:1.5}}

In [133]:
frame6 = pd.DataFrame(pop)

In [134]:
frame6

Unnamed: 0,Karachi,Lahore
2000,1.5,0.5
2010,3.0,
2011,,1.0
2019,4.0,1.5


In [135]:
frame6.T

Unnamed: 0,2000,2010,2011,2019
Karachi,1.5,3.0,,4.0
Lahore,0.5,,1.0,1.5


In [136]:
pd.DataFrame(pop, index = [2000, 2002, 2019])

Unnamed: 0,Karachi,Lahore
2000,1.5,0.5
2002,,
2019,4.0,1.5


# Index Objects

In [137]:
obj7= pd.Series(range(3), index = ['a', 'b', 'c'])

In [138]:
obj7

a    0
b    1
c    2
dtype: int64

In [139]:
index = obj7.index

In [140]:
index

Index(['a', 'b', 'c'], dtype='object')

In [141]:
index2 = obj7.values

In [142]:
index2

array([0, 1, 2], dtype=int64)

In [143]:
index[1:]

Index(['b', 'c'], dtype='object')

In [144]:
index[:2]

Index(['a', 'b'], dtype='object')

In [145]:
index[1]

'b'

In [146]:
labels = pd.Index(np.arange(3))

In [147]:
labels

Int64Index([0, 1, 2], dtype='int64')

In [148]:
obj8 = pd.Series([1.2, 2.2, 3.3], index = labels)

In [149]:
obj8

0    1.2
1    2.2
2    3.3
dtype: float64

In [150]:
obj8.index is labels

True

In [151]:
obj8.index is obj5

False

# Dropping Entries From An Axis

In [152]:
obj9 = pd.Series(np.arange(5.), index = ['a', 'b', 'c', 'd', 'e'])

In [153]:
obj9

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [154]:
new_obj = obj9.drop('c')

In [155]:
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [156]:
new_obj2 = obj9.drop(['a', 'c'])

In [157]:
new_obj2

b    1.0
d    3.0
e    4.0
dtype: float64

In [158]:
data4 = pd.DataFrame(np.arange(16).reshape(4,4),
                     index = ['Karachi', 'Lahore', 'Multan', 'Peshawar'],
                     columns = [1,2,3,4])

In [159]:
data4

Unnamed: 0,1,2,3,4
Karachi,0,1,2,3
Lahore,4,5,6,7
Multan,8,9,10,11
Peshawar,12,13,14,15


In [160]:
data4.drop(['Lahore', 'Multan'])

Unnamed: 0,1,2,3,4
Karachi,0,1,2,3
Peshawar,12,13,14,15


In [161]:
data4.drop(2, axis = 1)

Unnamed: 0,1,3,4
Karachi,0,2,3
Lahore,4,6,7
Multan,8,10,11
Peshawar,12,14,15


OR

In [162]:
data4.drop([2, 3], axis = 'columns')

Unnamed: 0,1,4
Karachi,0,3
Lahore,4,7
Multan,8,11
Peshawar,12,15


In [163]:
data4

Unnamed: 0,1,2,3,4
Karachi,0,1,2,3
Lahore,4,5,6,7
Multan,8,9,10,11
Peshawar,12,13,14,15


In [164]:
data4.drop('Peshawar', inplace = True)

In [165]:
data4

Unnamed: 0,1,2,3,4
Karachi,0,1,2,3
Lahore,4,5,6,7
Multan,8,9,10,11


# Function Application And Mapping

In [166]:
frame7 = pd.DataFrame(np.random.randn(4, 3), columns = list('bde'), index = ['Karachi', 'Lahore', 'Islamabad', 'Quetta'])

In [167]:
frame7

Unnamed: 0,b,d,e
Karachi,1.523014,1.574411,0.129212
Lahore,0.476343,-1.572065,0.878976
Islamabad,1.169361,-0.710956,-0.520088
Quetta,-0.3134,0.640503,-0.502791


In [168]:
frame7 = np.abs(frame7)  #'abs' function remoe -ve sign

In [169]:
frame7

Unnamed: 0,b,d,e
Karachi,1.523014,1.574411,0.129212
Lahore,0.476343,1.572065,0.878976
Islamabad,1.169361,0.710956,0.520088
Quetta,0.3134,0.640503,0.502791


In [170]:
f = lambda x: x.max() - x.min()  #  by default perform operation raw wise mean column
frame7.apply(f)

b    1.209614
d    0.933908
e    0.749764
dtype: float64

In [171]:
frame7.apply(f, axis = 'columns')  #  here we change the axis fromm raw to columns means raw

Karachi      1.445199
Lahore       1.095722
Islamabad    0.649273
Quetta       0.327103
dtype: float64

# Soring And Ranking

In [172]:
obj10 = pd.Series([1,0,3,2], index = ['d', 'a', 'b', 'c'])

In [173]:
obj10

d    1
a    0
b    3
c    2
dtype: int64

In [174]:
obj10.sort_index()

a    0
b    3
c    2
d    1
dtype: int64

In [175]:
obj10.sort_values()

a    0
d    1
c    2
b    3
dtype: int64

In [176]:
frame8 = pd.DataFrame(np.arange(8).reshape(2,4), index = ['three', 'one'], columns = ['d', 'a', 'b', 'c'])

In [177]:
frame8

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [178]:
frame8.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [179]:
frame8.sort_index(axis = 1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [180]:
obj11 = pd.Series([4, 7, -3, 2])

In [181]:
obj11

0    4
1    7
2   -3
3    2
dtype: int64

In [182]:
obj11.sort_values()

2   -3
3    2
0    4
1    7
dtype: int64

In [183]:
obj11.sort_index()

0    4
1    7
2   -3
3    2
dtype: int64

In [184]:
frame9 = pd.DataFrame({'b':[4, 7, -3, 2], 'a':[0, 1, 0, 1]})

In [185]:
frame9

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [186]:
frame9.sort_values(by = 'b')

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [187]:
frame9.sort_values(by = 'a')

Unnamed: 0,b,a
0,4,0
2,-3,0
1,7,1
3,2,1


In [188]:
frame9

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [189]:
frame9.sort_values(by = ['a', 'b'])

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [190]:
frame9.sort_values(by = ['b', 'a'])

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [191]:
obj12 = pd.Series([7, -5, 7, 4, 2, 0, 4])

In [192]:
obj12

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [193]:
obj12.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [194]:
obj13 = pd.Series([2,2,4,5,7,8])

In [195]:
obj13

0    2
1    2
2    4
3    5
4    7
5    8
dtype: int64

In [196]:
obj13.rank()

0    1.5
1    1.5
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [197]:
obj13.rank(method = 'first')

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
dtype: float64

In [198]:
obj12

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [199]:
obj12.rank(method = 'first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

# Summarizing And Computing Descriptive Statistics

In [200]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],
                   index = ['a', 'b', 'c', 'd'],
                   columns = ['one', 'two'])

In [201]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [202]:
df.sum()   # by default operation perform raw wise means column and NaN values will be neglegted

one    9.25
two   -5.80
dtype: float64

In [203]:
df.sum(axis = 'columns')  # operation perform column wise means raw and NaN values will be neglegted

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [204]:
df.sum(skipna = False)

one   NaN
two   NaN
dtype: float64

In [205]:
df.sum(axis = 'columns', skipna = False)

a     NaN
b    2.60
c     NaN
d   -0.55
dtype: float64

In [206]:
df.mean()    # by default operation perform raw wise means column and NaN values will be neglegted

one    3.083333
two   -2.900000
dtype: float64

In [207]:
df.mean(axis = 'columns')   # operation perform column wise means raw and NaN values will be neglegted

a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64

In [208]:
df.mean(skipna = False)

one   NaN
two   NaN
dtype: float64

In [209]:
df.mean(axis = 'columns', skipna = False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [210]:
frame10 = pd.DataFrame({'b':[1,0], 'a':[0,1]})

In [211]:
frame10

Unnamed: 0,b,a
0,1,0
1,0,1


In [212]:
frame10.corr()

Unnamed: 0,b,a
b,1.0,-1.0
a,-1.0,1.0


In [213]:
frame10.cov()

Unnamed: 0,b,a
b,0.5,-0.5
a,-0.5,0.5


In [214]:
import pandas_datareader.data as web

In [216]:
all_data = {ticker: web.get_data_yahoo(ticker)
                       for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}

In [217]:
price = pd.DataFrame({ticker: data['Adj Close'] for ticker, data in all_data.items()})

In [218]:
volume = pd.DataFrame({ticker: data['Volume'] for ticker, data in all_data.items()})

In [219]:
all_data

{'AAPL':                   High         Low        Open       Close       Volume  \
 Date                                                                      
 2015-01-08  112.150002  108.699997  109.230003  111.889999   59364500.0   
 2015-01-09  113.250000  110.209999  112.669998  112.010002   53699500.0   
 2015-01-12  112.629997  108.800003  112.599998  109.250000   49650800.0   
 2015-01-13  112.800003  108.910004  111.430000  110.220001   67091900.0   
 2015-01-14  110.489998  108.500000  109.040001  109.800003   48956600.0   
 2015-01-15  110.059998  106.660004  110.000000  106.820000   60014000.0   
 2015-01-16  107.580002  105.199997  107.029999  105.989998   78513300.0   
 2015-01-20  108.970001  106.500000  107.839996  108.720001   49899900.0   
 2015-01-21  111.059998  108.269997  108.949997  109.550003   48575900.0   
 2015-01-22  112.470001  109.720001  110.260002  112.400002   53796400.0   
 2015-01-23  113.750000  111.529999  112.300003  112.980003   46464800.0   
 201

In [220]:
price

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-08,102.806480,127.657913,42.724323,501.303680
2015-01-09,102.916725,128.213913,42.365219,494.811493
2015-01-12,100.380791,126.062386,41.835537,491.201416
2015-01-13,101.272049,126.360535,41.620083,494.821472
2015-01-14,100.886154,125.546654,41.260979,499.498627
2015-01-15,98.148056,124.555489,40.830055,500.416107
2015-01-16,97.385445,126.626427,41.512352,506.688873
2015-01-20,99.893822,126.473312,41.647015,505.512115
2015-01-21,100.656441,122.557053,41.225071,516.621643
2015-01-22,103.275063,125.216255,42.311356,532.926819


In [221]:
volume

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-08,59364500.0,4236800.0,29645200.0,3353500.0
2015-01-09,53699500.0,4488300.0,23944200.0,2071300.0
2015-01-12,49650800.0,4187600.0,23651900.0,2326700.0
2015-01-13,67091900.0,4377500.0,35270600.0,2370400.0
2015-01-14,48956600.0,4690300.0,29719600.0,2235700.0
2015-01-15,60014000.0,4248400.0,32750800.0,2715800.0
2015-01-16,78513300.0,5756000.0,35695300.0,2298200.0
2015-01-20,49899900.0,8392800.0,36161900.0,2232000.0
2015-01-21,48575900.0,11897100.0,39081100.0,2268700.0
2015-01-22,53796400.0,6120100.0,35898000.0,2676900.0


In [222]:
returns = price.pct_change()

In [223]:
returns

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-08,,,,
2015-01-09,0.001072,0.004355,-0.008405,-0.012951
2015-01-12,-0.024641,-0.016781,-0.012503,-0.007296
2015-01-13,0.008879,0.002365,-0.005150,0.007370
2015-01-14,-0.003810,-0.006441,-0.008628,0.009452
2015-01-15,-0.027140,-0.007895,-0.010444,0.001837
2015-01-16,-0.007770,0.016627,0.016711,0.012535
2015-01-20,0.025757,-0.001209,0.003244,-0.002322
2015-01-21,0.007634,-0.030965,-0.010131,0.021977
2015-01-22,0.026015,0.021698,0.026350,0.031561


In [224]:
returns.tail()

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-12-31,0.007307,0.009261,0.000698,0.000659
2020-01-02,0.022816,0.010295,0.018516,0.0227
2020-01-03,-0.009722,-0.007975,-0.012452,-0.004907
2020-01-06,0.007968,-0.001786,0.002585,0.024657
2020-01-07,-0.004703,0.000671,-0.009118,-0.000624


In [225]:
returns['MSFT'].corr(returns['IBM'])

0.4837218908687079

In [226]:
returns['IBM'].corr(returns['MSFT'])

0.483721890868708

In [227]:
returns['IBM'].corr(returns['GOOG'])

0.4088961403859018

In [228]:
returns.MSFT.corr(returns.AAPL)

0.5726101233496661

In [229]:
returns.MSFT.cov(returns.AAPL)

0.00013102825759646604

In [230]:
returns.AAPL.cov(returns.GOOG)

0.0001233733050622297