# Pandas

# Series

In [1]:
import pandas as pd

In [2]:
obj1 = pd.Series([11,22,33,44])

In [3]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [4]:
obj1.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
obj1.values

array([11, 22, 33, 44], dtype=int64)

In [6]:
obj1.value_counts

<bound method IndexOpsMixin.value_counts of 0    11
1    22
2    33
3    44
dtype: int64>

In [7]:
obj2 = pd.Series([11,22,33,44], index = ['a', 'b', 'c', 'd']) # here we define index manually

In [8]:
obj2

a    11
b    22
c    33
d    44
dtype: int64

In [9]:
obj2['c']

33

In [10]:
obj2['c'] = 34

In [11]:
obj2

a    11
b    22
c    34
d    44
dtype: int64

In [12]:
del obj2['c']

In [13]:
obj2

a    11
b    22
d    44
dtype: int64

In [14]:
obj2['c'] = 34

In [15]:
obj2

a    11
b    22
d    44
c    34
dtype: int64

In [16]:
index = [1,2,3,4]

In [17]:
obj3 = pd.Series(['Saad', 'Amim', 'Ali', 'Moiz'], index = index)

In [18]:
obj3

1    Saad
2    Amim
3     Ali
4    Moiz
dtype: object

In [19]:
obj3[3]

'Ali'

In [20]:
obj3 == 'Saad'

1     True
2    False
3    False
4    False
dtype: bool

In [21]:
obj3 == 'Khalil'

1    False
2    False
3    False
4    False
dtype: bool

In [22]:
obj3[obj3 == 'Saad']

1    Saad
dtype: object

In [23]:
obj3[obj3 == 'Khalil']

Series([], dtype: object)

In [24]:
obj1

0    11
1    22
2    33
3    44
dtype: int64

In [25]:
obj1+1

0    12
1    23
2    34
3    45
dtype: int64

In [26]:
obj3

1    Saad
2    Amim
3     Ali
4    Moiz
dtype: object

In [27]:
obj3*2

1    SaadSaad
2    AmimAmim
3      AliAli
4    MoizMoiz
dtype: object

In [28]:
obj1*2

0    22
1    44
2    66
3    88
dtype: int64

In [30]:
obj1-2

0     9
1    20
2    31
3    42
dtype: int64

In [31]:
obj1//2

0     5
1    11
2    16
3    22
dtype: int64

In [32]:
obj4 = pd.Series([100,200,300,400,500], index = ['1h', '2h', '3h', '4h', '5h'])

In [34]:
obj4

1h    100
2h    200
3h    300
4h    400
5h    500
dtype: int64

In [35]:
'1h' in obj4

True

In [36]:
'6h' in obj4

False

Creating a series using python dictionary

In [40]:
sdata = {'Karachi':"Mazar-e-Quaid", 'Lahore':"Minar-e-Pakistan", 'Peshawar':"Bab-e-Khaybar", 'Multan':"Halwa"}
#         index   :     value        index  :      value           index   :     value        index  : value

In [41]:
s1 = pd.Series(sdata)

In [42]:
s1

Karachi        Mazar-e-Quaid
Lahore      Minar-e-Pakistan
Peshawar       Bab-e-Khaybar
Multan                 Halwa
dtype: object

In [46]:
city = ['Karachi', 'Multan', 'Lahore', 'Islamabad']

In [47]:
s2 = pd.Series(sdata, index = city)

In [48]:
s2

Karachi         Mazar-e-Quaid
Multan                  Halwa
Lahore       Minar-e-Pakistan
Islamabad                 NaN
dtype: object

In [49]:
pd.isnull(s2)

Karachi      False
Multan       False
Lahore       False
Islamabad     True
dtype: bool

In [50]:
s2[pd.isnull(s2)]

Islamabad    NaN
dtype: object

In [51]:
pd.notnull(s2)

Karachi       True
Multan        True
Lahore        True
Islamabad    False
dtype: bool

In [52]:
s2[pd.notnull(s2)]

Karachi       Mazar-e-Quaid
Multan                Halwa
Lahore     Minar-e-Pakistan
dtype: object

# Data Frame

In [54]:
data = {'City':['Karachi,', 'Lahore', 'Islamabad', 'Multan'],
         'Population':[4.0, 1.5, 1.0, 0.8],
         'Year':[2019, 2019, 2019, 2019]}

In [55]:
frame = pd.DataFrame(data)

In [56]:
frame

Unnamed: 0,City,Population,Year
0,"Karachi,",4.0,2019
1,Lahore,1.5,2019
2,Islamabad,1.0,2019
3,Multan,0.8,2019


In [57]:
frame2 = pd.DataFrame(data, columns = ['Year', 'City', 'Population'])

In [58]:
frame2

Unnamed: 0,Year,City,Population
0,2019,"Karachi,",4.0
1,2019,Lahore,1.5
2,2019,Islamabad,1.0
3,2019,Multan,0.8


In [59]:
frame.describe()

Unnamed: 0,Population,Year
count,4.0,4.0
mean,1.825,2019.0
std,1.479583,0.0
min,0.8,2019.0
25%,0.95,2019.0
50%,1.25,2019.0
75%,2.125,2019.0
max,4.0,2019.0


In [60]:
frame2.describe()

Unnamed: 0,Year,Population
count,4.0,4.0
mean,2019.0,1.825
std,0.0,1.479583
min,2019.0,0.8
25%,2019.0,0.95
50%,2019.0,1.25
75%,2019.0,2.125
max,2019.0,4.0


In [61]:
frame2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
Year          4 non-null int64
City          4 non-null object
Population    4 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 176.0+ bytes


In [65]:
frame3 = pd.DataFrame(data, columns = ['City', 'Population', 'Year', 'Location'], index = ['i', 'ii', 'iii', 'iv'])

In [66]:
frame3

Unnamed: 0,City,Population,Year,Location
i,"Karachi,",4.0,2019,
ii,Lahore,1.5,2019,
iii,Islamabad,1.0,2019,
iv,Multan,0.8,2019,


In [67]:
frame3['Location'] = 'Pakistan'

In [68]:
frame3

Unnamed: 0,City,Population,Year,Location
i,"Karachi,",4.0,2019,Pakistan
ii,Lahore,1.5,2019,Pakistan
iii,Islamabad,1.0,2019,Pakistan
iv,Multan,0.8,2019,Pakistan


In [69]:
frame3 = pd.DataFrame(data, columns = ['Serial', 'City', 'Population', 'Year', 'Location'],
                      index = ['i', 'ii', 'iii', 'iv'])

In [70]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,,"Karachi,",4.0,2019,
ii,,Lahore,1.5,2019,
iii,,Islamabad,1.0,2019,
iv,,Multan,0.8,2019,


In [71]:
frame3['Location'] = 'Pakistan'

In [72]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,,"Karachi,",4.0,2019,Pakistan
ii,,Lahore,1.5,2019,Pakistan
iii,,Islamabad,1.0,2019,Pakistan
iv,,Multan,0.8,2019,Pakistan


In [73]:
frame3['Serial'] = 1

In [74]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,"Karachi,",4.0,2019,Pakistan
ii,1,Lahore,1.5,2019,Pakistan
iii,1,Islamabad,1.0,2019,Pakistan
iv,1,Multan,0.8,2019,Pakistan


In [75]:
frame3['Serial'] = [1,2,3,4]

In [76]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,"Karachi,",4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [77]:
frame3['Serial'] = range(1,5)

In [78]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,"Karachi,",4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [79]:
frame3.columns

Index(['Serial', 'City', 'Population', 'Year', 'Location'], dtype='object')

In [80]:
frame3.Serial

i      1
ii     2
iii    3
iv     4
Name: Serial, dtype: int32

In [81]:
frame3.City

i       Karachi,
ii        Lahore
iii    Islamabad
iv        Multan
Name: City, dtype: object

In [85]:
frame3['City']

i       Karachi,
ii        Lahore
iii    Islamabad
iv        Multan
Name: City, dtype: object

In [86]:
frame3[['City']]

Unnamed: 0,City
i,"Karachi,"
ii,Lahore
iii,Islamabad
iv,Multan


In [87]:
frame3[['City', 'Population']]

Unnamed: 0,City,Population
i,"Karachi,",4.0
ii,Lahore,1.5
iii,Islamabad,1.0
iv,Multan,0.8


In [88]:
frame3

Unnamed: 0,Serial,City,Population,Year,Location
i,1,"Karachi,",4.0,2019,Pakistan
ii,2,Lahore,1.5,2019,Pakistan
iii,3,Islamabad,1.0,2019,Pakistan
iv,4,Multan,0.8,2019,Pakistan


In [89]:
frame3.loc['i']

Serial               1
City          Karachi,
Population           4
Year              2019
Location      Pakistan
Name: i, dtype: object

In [90]:
frame3.iloc[1]

Serial               2
City            Lahore
Population         1.5
Year              2019
Location      Pakistan
Name: ii, dtype: object