# Getting Started with Pandas

In [1]:
import numpy as np
import pandas as pd

## pandas Series

In [2]:
s1 = pd.Series([2,5,-8,6])
s1

0    2
1    5
2   -8
3    6
dtype: int64

In [3]:
# values attribute
s1.values

array([ 2,  5, -8,  6], dtype=int64)

In [4]:
# index attribute
s1.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
s2 = pd.Series([2,5,-8,6], index=['d','b','a','c'])
s2

d    2
b    5
a   -8
c    6
dtype: int64

In [7]:
s2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [8]:
# Accessing individual elements
s2['a']

-8

In [10]:
# Select elements at specified indexes
s2[['c','a','d']]

c    6
a   -8
d    2
dtype: int64

In [11]:
s2

d    2
b    5
a   -8
c    6
dtype: int64

In [12]:
# Modifying elements
s2['d'] = 45
s2

d    45
b     5
a    -8
c     6
dtype: int64

In [13]:
# Select values that are greater than 5
s2[s2 > 5]

d    45
c     6
dtype: int64

In [14]:
s2 > 5

d     True
b    False
a    False
c     True
dtype: bool

In [15]:
# Create a Series object from an exsiting Series

s3 = s2 * 2
s3

d    90
b    10
a   -16
c    12
dtype: int64

In [16]:
print('b' in s2)

True


In [17]:
print('e' not in s2)

True


In [18]:
s2

d    45
b     5
a    -8
c     6
dtype: int64

In [19]:
# Modifying existing index

s2.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
s2

Bob      45
Steve     5
Jeff     -8
Ryan      6
dtype: int64

In [20]:
# Creating a Series object using a Python Dictionary

corona_data = {'USA':164800, 'India':1251, 'UK':22141, 'Italy':101739, 'China':81518,
              'Spain':94417, 'Germany':67051}

s4 = pd.Series(corona_data)

s4

USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
dtype: int64

In [22]:
countries = ['India', 'USA', 'Iran', 'Germany', 'France']

In [23]:
s5 = pd.Series(corona_data, index=countries)
s5

India        1251.0
USA        164800.0
Iran            NaN
Germany     67051.0
France          NaN
dtype: float64

In [24]:
pd.isnull(s5)

India      False
USA        False
Iran        True
Germany    False
France      True
dtype: bool

In [25]:
s5.isnull()

India      False
USA        False
Iran        True
Germany    False
France      True
dtype: bool

In [26]:
# Combining two Series objects
s4

USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
dtype: int64

In [27]:
s5

India        1251.0
USA        164800.0
Iran            NaN
Germany     67051.0
France          NaN
dtype: float64

In [28]:
s4 + s5

China           NaN
France          NaN
Germany    134102.0
India        2502.0
Iran            NaN
Italy           NaN
Spain           NaN
UK              NaN
USA        329600.0
dtype: float64

In [29]:
s4

USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
dtype: int64

In [30]:
s4.name = 'Corona Cases'
s4.index.name = 'Country'
s4

Country
USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
Name: Corona Cases, dtype: int64

In [34]:
s7 = pd.Series(np.arange(4.), index = ['a','b','c','d'])

s7

a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

## DataFrame 

In [35]:
# Creating a DataFrame using 2D ndarray
data = np.array([[100,92,83], [94,85,96]])

data

array([[100,  92,  83],
       [ 94,  85,  96]])

In [36]:
print(type(data))

<class 'numpy.ndarray'>


In [37]:
df1 = pd.DataFrame(data, index=['Priyanka', 'Rahul'], columns=['Math', 'Physics', 'Chemistry'])

df1

Unnamed: 0,Math,Physics,Chemistry
Priyanka,100,92,83
Rahul,94,85,96


In [38]:
#Creating DF usng dictionary of lists

data = {'names' : ['Rama', 'Lalitha', 'Peter', 'Shabana'],
       'Age' : [20, 21, 19, 18]}

data

{'names': ['Rama', 'Lalitha', 'Peter', 'Shabana'], 'Age': [20, 21, 19, 18]}

In [39]:
df2 = pd.DataFrame(data)

df2

Unnamed: 0,names,Age
0,Rama,20
1,Lalitha,21
2,Peter,19
3,Shabana,18


In [40]:
# Creating a DataFrame using nested dictionary

employees = {101 : {'name':'Amar', 'age':'27', 'sex':'Male'},
            201 : {'name':'Malathi', 'age':'22', 'sex':'Female'},
            301 : {'name':'James', 'age':'37', 'sex':'Male'}}

employees

{101: {'name': 'Amar', 'age': '27', 'sex': 'Male'},
 201: {'name': 'Malathi', 'age': '22', 'sex': 'Female'},
 301: {'name': 'James', 'age': '37', 'sex': 'Male'}}

In [41]:
df5 = pd.DataFrame(employees)

df5

Unnamed: 0,101,201,301
name,Amar,Malathi,James
age,27,22,37
sex,Male,Female,Male


In [42]:
data = {'state':['Bihar','Bihar','Bihar', 'Gujarat', 'Gujarat', 'Gujarat'],
       'year' : [2001, 2011, 2018, 2001, 2011, 2018],
       'pop' : [8.3, 10.4, 11.9, 5.0, 6.1, 6.4]}

data

{'state': ['Bihar', 'Bihar', 'Bihar', 'Gujarat', 'Gujarat', 'Gujarat'],
 'year': [2001, 2011, 2018, 2001, 2011, 2018],
 'pop': [8.3, 10.4, 11.9, 5.0, 6.1, 6.4]}

In [43]:
state_df = pd.DataFrame(data)
state_df

Unnamed: 0,state,year,pop
0,Bihar,2001,8.3
1,Bihar,2011,10.4
2,Bihar,2018,11.9
3,Gujarat,2001,5.0
4,Gujarat,2011,6.1
5,Gujarat,2018,6.4


In [45]:
state_df.head(2)

Unnamed: 0,state,year,pop
0,Bihar,2001,8.3
1,Bihar,2011,10.4


In [47]:
state_df.tail(3)

Unnamed: 0,state,year,pop
3,Gujarat,2001,5.0
4,Gujarat,2011,6.1
5,Gujarat,2018,6.4


In [49]:
state_df2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'area'],
                       index = ['one', 'two', 'three', 'four', 'five', 'six'])

state_df2

Unnamed: 0,year,state,pop,area
one,2001,Bihar,8.3,
two,2011,Bihar,10.4,
three,2018,Bihar,11.9,
four,2001,Gujarat,5.0,
five,2011,Gujarat,6.1,
six,2018,Gujarat,6.4,


In [50]:
state_df2['state']

one        Bihar
two        Bihar
three      Bihar
four     Gujarat
five     Gujarat
six      Gujarat
Name: state, dtype: object

In [51]:
state_df2.year

one      2001
two      2011
three    2018
four     2001
five     2011
six      2018
Name: year, dtype: int64

In [52]:
state_df2.loc['three']

year      2018
state    Bihar
pop       11.9
area       NaN
Name: three, dtype: object

In [53]:
state_df2['area'] = 16.5

state_df2

Unnamed: 0,year,state,pop,area
one,2001,Bihar,8.3,16.5
two,2011,Bihar,10.4,16.5
three,2018,Bihar,11.9,16.5
four,2001,Gujarat,5.0,16.5
five,2011,Gujarat,6.1,16.5
six,2018,Gujarat,6.4,16.5


In [54]:
state_df2['area'] = np.arange(6.)

state_df2

Unnamed: 0,year,state,pop,area
one,2001,Bihar,8.3,0.0
two,2011,Bihar,10.4,1.0
three,2018,Bihar,11.9,2.0
four,2001,Gujarat,5.0,3.0
five,2011,Gujarat,6.1,4.0
six,2018,Gujarat,6.4,5.0


In [55]:
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])

In [56]:
state_df2['area'] = val

state_df2

Unnamed: 0,year,state,pop,area
one,2001,Bihar,8.3,
two,2011,Bihar,10.4,-1.2
three,2018,Bihar,11.9,
four,2001,Gujarat,5.0,-1.5
five,2011,Gujarat,6.1,-1.7
six,2018,Gujarat,6.4,


In [57]:
state_df2['western'] = state_df2.state == 'Gujarat'

state_df2

Unnamed: 0,year,state,pop,area,western
one,2001,Bihar,8.3,,False
two,2011,Bihar,10.4,-1.2,False
three,2018,Bihar,11.9,,False
four,2001,Gujarat,5.0,-1.5,True
five,2011,Gujarat,6.1,-1.7,True
six,2018,Gujarat,6.4,,True
