# Pandas

# Series

In [3]:
import pandas as pd

In [4]:
obj1 = pd.Series([11,22,33,44])

In [5]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [6]:
obj1.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
obj1.values

array([11, 22, 33, 44], dtype=int64)

In [8]:
obj1.value_counts

<bound method IndexOpsMixin.value_counts of 0    11
1    22
2    33
3    44
dtype: int64>

# Reindexing

In [9]:
obj2 = pd.Series([11,22,33,44], index = ['a', 'b', 'c', 'd']) # here we define index manually

In [10]:
obj2

a    11
b    22
c    33
d    44
dtype: int64

In [11]:
obj2['c']

33

In [12]:
obj2['c'] = 34

In [13]:
obj2

a    11
b    22
c    34
d    44
dtype: int64

In [14]:
del obj2['c']

In [15]:
obj2

a    11
b    22
d    44
dtype: int64

In [16]:
obj2['c'] = 34

In [17]:
obj2

a    11
b    22
d    44
c    34
dtype: int64

In [18]:
index = [1,2,3,4]

In [19]:
obj3 = pd.Series(['Saad', 'Amim', 'Ali', 'Moiz'], index = index)

In [20]:
obj3

1    Saad
2    Amim
3     Ali
4    Moiz
dtype: object

In [21]:
obj3[3]

'Ali'

In [22]:
obj3 == 'Saad'

1     True
2    False
3    False
4    False
dtype: bool

In [23]:
obj3 == 'Khalil'

1    False
2    False
3    False
4    False
dtype: bool

In [24]:
obj3[obj3 == 'Saad']

1    Saad
dtype: object

In [25]:
obj3[obj3 == 'Khalil']

Series([], dtype: object)

In [26]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [27]:
obj1+1

0    12
1    23
2    34
3    45
dtype: int64

In [28]:
obj3

1    Saad
2    Amim
3     Ali
4    Moiz
dtype: object

In [29]:
obj3*2

1    SaadSaad
2    AmimAmim
3      AliAli
4    MoizMoiz
dtype: object

In [30]:
obj1*2

0    22
1    44
2    66
3    88
dtype: int64

In [31]:
obj1-2

0     9
1    20
2    31
3    42
dtype: int64

In [32]:
obj1//2

0     5
1    11
2    16
3    22
dtype: int64

In [33]:
obj4 = pd.Series([100,200,300,400,500], index = ['1h', '2h', '3h', '4h', '5h'])

In [34]:
obj4

1h    100
2h    200
3h    300
4h    400
5h    500
dtype: int64

In [35]:
'1h' in obj4

True

In [36]:
'6h' in obj4

False

Creating a series using python dictionary

In [37]:
sdata = {'Karachi':"Mazar-e-Quaid", 'Lahore':"Minar-e-Pakistan", 'Peshawar':"Bab-e-Khaybar", 'Multan':"Halwa"}
#         index   :     value        index  :      value           index   :     value        index  : value

In [38]:
s1 = pd.Series(sdata)

In [39]:
s1

Karachi        Mazar-e-Quaid
Lahore      Minar-e-Pakistan
Peshawar       Bab-e-Khaybar
Multan                 Halwa
dtype: object

In [40]:
city = ['Karachi', 'Multan', 'Lahore', 'Islamabad']

In [41]:
s2 = pd.Series(sdata, index = city)

In [42]:
s2

Karachi         Mazar-e-Quaid
Multan                  Halwa
Lahore       Minar-e-Pakistan
Islamabad                 NaN
dtype: object

In [43]:
pd.isnull(s2)

Karachi      False
Multan       False
Lahore       False
Islamabad     True
dtype: bool

In [44]:
s2[pd.isnull(s2)]

Islamabad    NaN
dtype: object

In [45]:
pd.notnull(s2)

Karachi       True
Multan        True
Lahore        True
Islamabad    False
dtype: bool

In [46]:
s2[pd.notnull(s2)]

Karachi       Mazar-e-Quaid
Multan                Halwa
Lahore     Minar-e-Pakistan
dtype: object

In [126]:
robj = pd.Series(['Blue', 'Green', 'White'], index = [0,2,4])

In [127]:
robj

0     Blue
2    Green
4    White
dtype: object

In [128]:
robj.reindex(range(6), method = 'ffill')

0     Blue
1     Blue
2    Green
3    Green
4    White
5    White
dtype: object

In [131]:
robj.reindex(range(8), method = 'ffill')

0     Blue
1     Blue
2    Green
3    Green
4    White
5    White
6    White
7    White
dtype: object

# Data Frame

In [47]:
data = {'City':['Karachi', 'Lahore', 'Islamabad', 'Multan'],
         'Population':[4.0, 1.5, 1.0, 0.8],
         'Year':[2019, 2019, 2019, 2019]}

In [48]:
frame = pd.DataFrame(data)

In [49]:
frame

Unnamed: 0,City,Population,Year
0,Karachi,4.0,2019
1,Lahore,1.5,2019
2,Islamabad,1.0,2019
3,Multan,0.8,2019


In [50]:
frame2 = pd.DataFrame(data, columns = ['Year', 'City', 'Population'])

In [51]:
frame2

Unnamed: 0,Year,City,Population
0,2019,Karachi,4.0
1,2019,Lahore,1.5
2,2019,Islamabad,1.0
3,2019,Multan,0.8


In [52]:
frame.describe()

Unnamed: 0,Population,Year
count,4.0,4.0
mean,1.825,2019.0
std,1.479583,0.0
min,0.8,2019.0
25%,0.95,2019.0
50%,1.25,2019.0
75%,2.125,2019.0
max,4.0,2019.0


In [53]:
frame2.describe()

Unnamed: 0,Year,Population
count,4.0,4.0
mean,2019.0,1.825
std,0.0,1.479583
min,2019.0,0.8
25%,2019.0,0.95
50%,2019.0,1.25
75%,2019.0,2.125
max,2019.0,4.0


In [54]:
frame2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
Year          4 non-null int64
City          4 non-null object
Population    4 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 176.0+ bytes


In [55]:
frame3 = pd.DataFrame(data, columns = ['City', 'Population', 'Year', 'Location'], index = ['i', 'ii', 'iii', 'iv'])

In [56]:
frame3

Unnamed: 0,City,Population,Year,Location
i,Karachi,4.0,2019,
ii,Lahore,1.5,2019,
iii,Islamabad,1.0,2019,
iv,Multan,0.8,2019,


In [57]:
frame3['Location'] = 'Pakistan'

In [58]:
frame3

Unnamed: 0,City,Population,Year,Location
i,Karachi,4.0,2019,Pakistan
ii,Lahore,1.5,2019,Pakistan
iii,Islamabad,1.0,2019,Pakistan
iv,Multan,0.8,2019,Pakistan


In [59]:
frame3 = pd.DataFrame(data, columns = ['Serial', 'City', 'Population', 'Year', 'Location'],
                      index = ['i', 'ii', 'iii', 'iv'])

In [60]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,,Karachi,4.0,2019,
ii,,Lahore,1.5,2019,
iii,,Islamabad,1.0,2019,
iv,,Multan,0.8,2019,


In [61]:
frame3['Location'] = 'Pakistan'

In [62]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,,Karachi,4.0,2019,Pakistan
ii,,Lahore,1.5,2019,Pakistan
iii,,Islamabad,1.0,2019,Pakistan
iv,,Multan,0.8,2019,Pakistan


In [63]:
frame3['Serial'] = 1

In [64]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,1,Lahore,1.5,2019,Pakistan
iii,1,Islamabad,1.0,2019,Pakistan
iv,1,Multan,0.8,2019,Pakistan


In [65]:
frame3['Serial'] = [1,2,3,4]

In [66]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [67]:
frame3['Serial'] = range(1,5)

In [68]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [69]:
frame3.columns

Index(['Serial', 'City', 'Population', 'Year', 'Location'], dtype='object')

In [70]:
frame3.Serial

i      1
ii     2
iii    3
iv     4
Name: Serial, dtype: int32

In [71]:
frame3.City

i        Karachi
ii        Lahore
iii    Islamabad
iv        Multan
Name: City, dtype: object

In [72]:
frame3['City']

i        Karachi
ii        Lahore
iii    Islamabad
iv        Multan
Name: City, dtype: object

In [73]:
frame3[['City']]

Unnamed: 0,City
i,Karachi
ii,Lahore
iii,Islamabad
iv,Multan


In [74]:
frame3[['City', 'Population']]

Unnamed: 0,City,Population
i,Karachi,4.0
ii,Lahore,1.5
iii,Islamabad,1.0
iv,Multan,0.8


In [75]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [76]:
frame3.loc['i']

Serial               1
City           Karachi
Population           4
Year              2019
Location      Pakistan
Name: i, dtype: object

In [77]:
frame3.iloc[1]

Serial               2
City            Lahore
Population         1.5
Year              2019
Location      Pakistan
Name: ii, dtype: object

In [183]:
data2 = pd.read_csv('banknotes.csv')

In [184]:
data2

Unnamed: 0,V1,V2,V3,V4,Class
0,3.621600,8.66610,-2.807300,-0.446990,1
1,4.545900,8.16740,-2.458600,-1.462100,1
2,3.866000,-2.63830,1.924200,0.106450,1
3,3.456600,9.52280,-4.011200,-3.594400,1
4,0.329240,-4.45520,4.571800,-0.988800,1
5,4.368400,9.67180,-3.960600,-3.162500,1
6,3.591200,3.01290,0.728880,0.564210,1
7,2.092200,-6.81000,8.463600,-0.602160,1
8,3.203200,5.75880,-0.753450,-0.612510,1
9,1.535600,9.17720,-2.271800,-0.735350,1


In [185]:
data2.head()

Unnamed: 0,V1,V2,V3,V4,Class
0,3.6216,8.6661,-2.8073,-0.44699,1
1,4.5459,8.1674,-2.4586,-1.4621,1
2,3.866,-2.6383,1.9242,0.10645,1
3,3.4566,9.5228,-4.0112,-3.5944,1
4,0.32924,-4.4552,4.5718,-0.9888,1


In [186]:
data2.tail()

Unnamed: 0,V1,V2,V3,V4,Class
1367,0.40614,1.3492,-1.4501,-0.55949,2
1368,-1.3887,-4.8773,6.4774,0.34179,2
1369,-3.7503,-13.4586,17.5932,-2.7771,2
1370,-3.5637,-8.3827,12.393,-1.2823,2
1371,-2.5419,-0.65804,2.6842,1.1952,2


In [187]:
data2.head(10)

Unnamed: 0,V1,V2,V3,V4,Class
0,3.6216,8.6661,-2.8073,-0.44699,1
1,4.5459,8.1674,-2.4586,-1.4621,1
2,3.866,-2.6383,1.9242,0.10645,1
3,3.4566,9.5228,-4.0112,-3.5944,1
4,0.32924,-4.4552,4.5718,-0.9888,1
5,4.3684,9.6718,-3.9606,-3.1625,1
6,3.5912,3.0129,0.72888,0.56421,1
7,2.0922,-6.81,8.4636,-0.60216,1
8,3.2032,5.7588,-0.75345,-0.61251,1
9,1.5356,9.1772,-2.2718,-0.73535,1


In [188]:
data2.describe()

Unnamed: 0,V1,V2,V3,V4,Class
count,1372.0,1372.0,1372.0,1372.0,1372.0
mean,0.433735,1.922353,1.397627,-1.191657,1.444606
std,2.842763,5.869047,4.31003,2.101013,0.497103
min,-7.0421,-13.7731,-5.2861,-8.5482,1.0
25%,-1.773,-1.7082,-1.574975,-2.41345,1.0
50%,0.49618,2.31965,0.61663,-0.58665,1.0
75%,2.821475,6.814625,3.17925,0.39481,2.0
max,6.8248,12.9516,17.9274,2.4495,2.0


In [189]:
data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1372 entries, 0 to 1371
Data columns (total 5 columns):
V1       1372 non-null float64
V2       1372 non-null float64
V3       1372 non-null float64
V4       1372 non-null float64
Class    1372 non-null int64
dtypes: float64(4), int64(1)
memory usage: 53.7 KB


In [135]:
frames = pd.DataFrame(np.arange(9).reshape(3,3), index = [1, 2, 3], columns = ['Karachi', 'Lahore', 'Multan'])

In [136]:
frames

Unnamed: 0,Karachi,Lahore,Multan
1,0,1,2
2,3,4,5
3,6,7,8


In [139]:
frames2 = frames.reindex([0, 1, 2, 3, 4,])

In [140]:
frames2

Unnamed: 0,Karachi,Lahore,Multan
0,,,
1,0.0,1.0,2.0
2,3.0,4.0,5.0
3,6.0,7.0,8.0
4,,,


In [143]:
cities = ('Haydarbad', 'Islamabad', 'Karachi')

In [144]:
frames2.reindex(columns = cities)

Unnamed: 0,Haydarbad,Islamabad,Karachi
0,,,
1,,,0.0
2,,,3.0
3,,,6.0
4,,,


In [78]:
val = pd.Series([-1.2, -3.3, -2.2], index = ['i', 'iii', 'iv'])

In [79]:
val

i     -1.2
iii   -3.3
iv    -2.2
dtype: float64

In [80]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,Karachi,4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [81]:
frame3.Serial = val

In [82]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,-1.2,Karachi,4.0,2019,Pakistan
ii,,Lahore,1.5,2019,Pakistan
iii,-3.3,Islamabad,1.0,2019,Pakistan
iv,-2.2,Multan,0.8,2019,Pakistan


In [83]:
frame3.Serial['i', 'ii', 'iv'] = [1,2,3]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [84]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1.0,Karachi,4.0,2019,Pakistan
ii,2.0,Lahore,1.5,2019,Pakistan
iii,-3.3,Islamabad,1.0,2019,Pakistan
iv,3.0,Multan,0.8,2019,Pakistan


In [85]:
frame3['Eastern'] = frame3.City == 'Karachi'

In [86]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location,Eastern
i,1.0,Karachi,4.0,2019,Pakistan,True
ii,2.0,Lahore,1.5,2019,Pakistan,False
iii,-3.3,Islamabad,1.0,2019,Pakistan,False
iv,3.0,Multan,0.8,2019,Pakistan,False


In [87]:
data3 = {'Name':['Saad', 'Ali', 'Amim', 'Mushtaq', 'Umair'],
         'Marks_In_AI':[99,18,77,66,55],
         'Marks_In_Python':[98,47,76,65,54],
         'Marks_In_Numpy':[97,56,75,64,13]}

In [88]:
score = pd.DataFrame(data3)

In [89]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy
0,Saad,99,98,97
1,Ali,18,47,56
2,Amim,77,76,75
3,Mushtaq,66,65,64
4,Umair,55,54,13


In [90]:
score['Total_Marks'] = 300

In [91]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks
0,Saad,99,98,97,300
1,Ali,18,47,56,300
2,Amim,77,76,75,300
3,Mushtaq,66,65,64,300
4,Umair,55,54,13,300


In [92]:
score['Obtained_Marks'] = score['Marks_In_AI']+score['Marks_In_Python']+score['Marks_In_Numpy']

In [93]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks
0,Saad,99,98,97,300,294
1,Ali,18,47,56,300,121
2,Amim,77,76,75,300,228
3,Mushtaq,66,65,64,300,195
4,Umair,55,54,13,300,122


In [94]:
score['Percentage'] = score['Obtained_Marks']/score['Total_Marks']*100

In [95]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks,Percentage
0,Saad,99,98,97,300,294,98.0
1,Ali,18,47,56,300,121,40.333333
2,Amim,77,76,75,300,228,76.0
3,Mushtaq,66,65,64,300,195,65.0
4,Umair,55,54,13,300,122,40.666667


In [96]:
score['Status'] = ['Passed' if percent>=50 else 'Failed' for percent in score['Percentage']]

In [97]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks,Percentage,Status
0,Saad,99,98,97,300,294,98.0,Passed
1,Ali,18,47,56,300,121,40.333333,Failed
2,Amim,77,76,75,300,228,76.0,Passed
3,Mushtaq,66,65,64,300,195,65.0,Passed
4,Umair,55,54,13,300,122,40.666667,Failed


In [98]:
score['Final_Status'] = ['Passed' if ai>=50 and py>=50 and np>=50 else 'Failed' 
                         for ai,py,np in zip(score.Marks_In_AI, score.Marks_In_Numpy, score.Marks_In_Python)]

In [99]:
score

Unnamed: 0,Name,Marks_In_AI,Marks_In_Python,Marks_In_Numpy,Total_Marks,Obtained_Marks,Percentage,Status,Final_Status
0,Saad,99,98,97,300,294,98.0,Passed,Passed
1,Ali,18,47,56,300,121,40.333333,Failed,Failed
2,Amim,77,76,75,300,228,76.0,Passed,Passed
3,Mushtaq,66,65,64,300,195,65.0,Passed,Passed
4,Umair,55,54,13,300,122,40.666667,Failed,Failed


In [103]:
pop = {'Karachi':{2000:1.5, 2010:3.0, 2019:4.0},
       'Lahore':{2000:0.5, 2011:1.0, 2019:1.5}}

In [104]:
frame4 = pd.DataFrame(pop)

In [105]:
frame4

Unnamed: 0,Karachi,Lahore
2000,1.5,0.5
2010,3.0,
2011,,1.0
2019,4.0,1.5


In [106]:
frame4.T

Unnamed: 0,2000,2010,2011,2019
Karachi,1.5,3.0,,4.0
Lahore,0.5,,1.0,1.5


In [107]:
pd.DataFrame(pop, index = [2000, 2002, 2019])

Unnamed: 0,Karachi,Lahore
2000,1.5,0.5
2002,,
2019,4.0,1.5


# Index Objects

In [108]:
obj5 = pd.Series(range(3), index = ['a', 'b', 'c'])

In [109]:
obj5

a    0
b    1
c    2
dtype: int64

In [110]:
index = obj5.index

In [111]:
index

Index(['a', 'b', 'c'], dtype='object')

In [112]:
index2 = obj5.values

In [113]:
index2

array([0, 1, 2], dtype=int64)

In [114]:
index[1:]

Index(['b', 'c'], dtype='object')

In [115]:
index[:2]

Index(['a', 'b'], dtype='object')

In [116]:
index[1]

'b'

In [119]:
labels = pd.Index(np.arange(3))

In [120]:
labels

Int64Index([0, 1, 2], dtype='int64')

In [121]:
obj6 = pd.Series([1.2, 2.2, 3.3], index = labels)

In [122]:
obj6

0    1.2
1    2.2
2    3.3
dtype: float64

In [123]:
obj6.index is labels

True

In [125]:
obj6.index is obj5

False

# Dropping Entries From An Axis

In [145]:
obj7 = pd.Series(np.arange(5.), index = ['a', 'b', 'c', 'd', 'e'])

In [146]:
obj7

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [150]:
new_obj = obj7.drop('c')

In [151]:
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [153]:
new_obj2 = obj7.drop(['a', 'c'])

In [154]:
new_obj2

b    1.0
d    3.0
e    4.0
dtype: float64