# Pandas
### 25-08-2022

In [1]:
import numpy as np
import pandas as pd

How to create a series?

In [2]:
data1 = pd.Series([2, 3, 4, 45, 6], index=['a', 'b', 'c', 'd', 'e'])
data1

a     2
b     3
c     4
d    45
e     6
dtype: int64

In [3]:
data2 = pd.Series([2, 3, 4, 45, 6], index=[2, 4, 6, 8, 16])
data2

2      2
4      3
6      4
8     45
16     6
dtype: int64

In [4]:
data3 = pd.Series({'a': 1, 'b':2, 'c':3})
data3

a    1
b    2
c    3
dtype: int64

In [5]:
data4 = pd.Series([2, np.nan, 4, None, 6])
data4

0    2.0
1    NaN
2    4.0
3    NaN
4    6.0
dtype: float64

How to access data.

In [6]:
data1['a']

2

In [7]:
data2[2 : 6]

6      4
8     45
16     6
dtype: int64

In [8]:
data2

2      2
4      3
6      4
8     45
16     6
dtype: int64

In [9]:
# data2[0] ERROR
data2.iloc[0] # implicit indexing

2

In [10]:
data2.loc[6] # explicit indexing

4

In [11]:
data1

a     2
b     3
c     4
d    45
e     6
dtype: int64

In [12]:
data1['j'] = 100
data1

a      2
b      3
c      4
d     45
e      6
j    100
dtype: int64

In [13]:
data1['j'] = 1000
data1

a       2
b       3
c       4
d      45
e       6
j    1000
dtype: int64

In [14]:
data1['a' : 'j' : 3]

a     2
d    45
dtype: int64

In [15]:
data1[['a', 'd', 'j']]

a       2
d      45
j    1000
dtype: int64

In [16]:
data1

a       2
b       3
c       4
d      45
e       6
j    1000
dtype: int64

In [17]:
data1[(data1 > 2) & (data1 < 7)]

b    3
c    4
e    6
dtype: int64

Create a DataFrame

In [18]:
df = pd.DataFrame(np.random.randint(0, 10, (3, 3)), index=['a', 'b', 'c'], columns=['ab', 'bc', 'cd'])
df

Unnamed: 0,ab,bc,cd
a,3,8,5
b,8,7,8
c,8,5,1


In [19]:
df.index

Index(['a', 'b', 'c'], dtype='object')

In [20]:
df.columns

Index(['ab', 'bc', 'cd'], dtype='object')

In [21]:
dictionary = {'data1':[2, 3, 4, 5], 'data2':[2, 5, 6, 4]} # dictionary
df2 = pd.DataFrame(dictionary)
df2

Unnamed: 0,data1,data2
0,2,2
1,3,5
2,4,6
3,5,4


In [22]:
df2[:3]['data1']

0    2
1    3
2    4
Name: data1, dtype: int64

In [23]:
population_dict = {'Bhopal': 38332521, 'Indore': 26448193, 'Gandhinagar': 19651127, 'Ahmedabad': 19552860, 'Goa': 2135}
population = pd.Series(population_dict)

In [24]:
area_dict = {'Bhopal': 423967, 'Indore': 695662, 'Gandhinagar': 141297, 'Ahmedabad': 170312, 'Goa': 149995}
area = pd.Series(area_dict)

In [25]:
population.iloc[0:2] # 0 to n-1

Bhopal    38332521
Indore    26448193
dtype: int64

In [26]:
population.loc['Bhopal' : 'Goa'] # 0 to n

Bhopal         38332521
Indore         26448193
Gandhinagar    19651127
Ahmedabad      19552860
Goa                2135
dtype: int64

In [27]:
df = pd.DataFrame({'population': population, 'area': area})
df

Unnamed: 0,population,area
Bhopal,38332521,423967
Indore,26448193,695662
Gandhinagar,19651127,141297
Ahmedabad,19552860,170312
Goa,2135,149995


In [28]:
df['population'] # indexing style

Bhopal         38332521
Indore         26448193
Gandhinagar    19651127
Ahmedabad      19552860
Goa                2135
Name: population, dtype: int64

In [29]:
df.area # attribute style

Bhopal         423967
Indore         695662
Gandhinagar    141297
Ahmedabad      170312
Goa            149995
Name: area, dtype: int64

In [30]:
df.population['Bhopal' : 'Ahmedabad']

Bhopal         38332521
Indore         26448193
Gandhinagar    19651127
Ahmedabad      19552860
Name: population, dtype: int64

In [31]:
df['density'] = df['population'] / df['area']
df

Unnamed: 0,population,area,density
Bhopal,38332521,423967,90.413926
Indore,26448193,695662,38.01874
Gandhinagar,19651127,141297,139.076746
Ahmedabad,19552860,170312,114.806121
Goa,2135,149995,0.014234


In [32]:
df.loc['japan', ['population', 'density']] = 2, 25810
df

Unnamed: 0,population,area,density
Bhopal,38332521.0,423967.0,90.413926
Indore,26448193.0,695662.0,38.01874
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121
Goa,2135.0,149995.0,0.014234
japan,2.0,,25810.0


In [33]:
df.loc[(df.density > 100)]

Unnamed: 0,population,area,density
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121
japan,2.0,,25810.0


In [34]:
df.loc[(df.density > 100) & (df.area < 1000000)]

Unnamed: 0,population,area,density
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121


In [35]:
df[(df.density > 100)][['population', 'area']]

Unnamed: 0,population,area
Gandhinagar,19651127.0,141297.0
Ahmedabad,19552860.0,170312.0
japan,2.0,


In [36]:
df

Unnamed: 0,population,area,density
Bhopal,38332521.0,423967.0,90.413926
Indore,26448193.0,695662.0,38.01874
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121
Goa,2135.0,149995.0,0.014234
japan,2.0,,25810.0


In [37]:
df.drop('japan')

Unnamed: 0,population,area,density
Bhopal,38332521.0,423967.0,90.413926
Indore,26448193.0,695662.0,38.01874
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121
Goa,2135.0,149995.0,0.014234


In [38]:
df

Unnamed: 0,population,area,density
Bhopal,38332521.0,423967.0,90.413926
Indore,26448193.0,695662.0,38.01874
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121
Goa,2135.0,149995.0,0.014234
japan,2.0,,25810.0


In [39]:
df.drop('japan', inplace=True)
df

Unnamed: 0,population,area,density
Bhopal,38332521.0,423967.0,90.413926
Indore,26448193.0,695662.0,38.01874
Gandhinagar,19651127.0,141297.0,139.076746
Ahmedabad,19552860.0,170312.0,114.806121
Goa,2135.0,149995.0,0.014234


In [43]:
df1 = df.drop('density', axis=1)

In [44]:
df1

Unnamed: 0,population,area
Bhopal,38332521.0,423967.0
Indore,26448193.0,695662.0
Gandhinagar,19651127.0,141297.0
Ahmedabad,19552860.0,170312.0
Goa,2135.0,149995.0


In [50]:
data1

a       2
b       3
c       4
d      45
e       6
j    1000
dtype: int64

In [51]:
np.exp(data1)

  result = getattr(ufunc, method)(*inputs, **kwargs)


a    7.389056e+00
b    2.008554e+01
c    5.459815e+01
d    3.493427e+19
e    4.034288e+02
j             inf
dtype: float64

In [52]:
np.power(2, data1)

a                 4
b                 8
c                16
d    35184372088832
e                64
j                 0
dtype: int64

In [53]:
population_dict = {'Bhopal123': 38332521, 'Indore': 26448193, 'Gandhinagar': 19651127, 'Ahmedabad': 19552860, 'Goa': 2135}
population = pd.Series(population_dict)
area_dict = {'Bhopal': 423967, 'Indore': 695662, 'Gandhinagar': 141297, 'Ahmedabad': 170312, 'Goa': 149995}
area = pd.Series(area_dict)

In [54]:
df2 = pd.DataFrame({'population': population, 'area':area})
df2

Unnamed: 0,population,area
Ahmedabad,19552860.0,170312.0
Bhopal,,423967.0
Bhopal123,38332521.0,
Gandhinagar,19651127.0,141297.0
Goa,2135.0,149995.0
Indore,26448193.0,695662.0


In [55]:
area.index | population.index

  area.index | population.index


Index(['Ahmedabad', 'Bhopal', 'Bhopal123', 'Gandhinagar', 'Goa', 'Indore'], dtype='object')

In [56]:
area.index & population.index

  area.index & population.index


Index(['Indore', 'Gandhinagar', 'Ahmedabad', 'Goa'], dtype='object')