# Pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
s1 = pd.Series([1,2,-7,5.6, 4])
s1

0    1.0
1    2.0
2   -7.0
3    5.6
4    4.0
dtype: float64

In [3]:
# values attributes
s1.values

array([ 1. ,  2. , -7. ,  5.6,  4. ])

In [4]:
# index attributes
s1.index

RangeIndex(start=0, stop=5, step=1)

In [5]:
s2 = pd.Series([1,2,3,4], index = ['A', 'B', 'C', 'D'])
s2

A    1
B    2
C    3
D    4
dtype: int64

In [6]:
s2.index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [7]:
s2.values

array([1, 2, 3, 4])

In [8]:
s2['A']

1

In [10]:
s2[['B', 'C', 'A']]

B    2
C    3
A    1
dtype: int64

In [12]:
s2['B'] = 10
s2

A     1
B    10
C     3
D     4
dtype: object

In [13]:
s2 > 3

A    False
B     True
C    False
D     True
dtype: bool

In [14]:
# Select values that are greater than 3
s2[s2>3]

B    10
D     4
dtype: object

In [15]:
s3 = s2*2
s3

A     2
B    20
C     6
D     8
dtype: object

In [16]:
'B' in s2

True

In [18]:
20 in s2

False

In [19]:
s2.index = ['AA', 'BB', 'CC', 'DD']
s2

AA     1
BB    10
CC     3
DD     4
dtype: object

In [20]:
# Creating a Series object using a Python Dictionary

corona_data = {'USA':164800, 'India':1251, 'UK':22141, 'Italy':101739, 'China':81518,
              'Spain':94417, 'Germany':67051}

s4 = pd.Series(corona_data)

s4

USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
dtype: int64

In [22]:
s5 = pd.Series(corona_data, index = ['India', 'USA', 'Iran', 'Germany', 'France'])
s5

India        1251.0
USA        164800.0
Iran            NaN
Germany     67051.0
France          NaN
dtype: float64

In [24]:
pd.isnull(s5)

India      False
USA        False
Iran        True
Germany    False
France      True
dtype: bool

In [25]:
s5.isnull()

India      False
USA        False
Iran        True
Germany    False
France      True
dtype: bool

In [26]:
# Combining two series object
s4

USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
dtype: int64

In [27]:
s5

India        1251.0
USA        164800.0
Iran            NaN
Germany     67051.0
France          NaN
dtype: float64

In [28]:
s4 + s5

China           NaN
France          NaN
Germany    134102.0
India        2502.0
Iran            NaN
Italy           NaN
Spain           NaN
UK              NaN
USA        329600.0
dtype: float64

In [29]:
s4



USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
dtype: int64

In [30]:
s4.name = 'Corona Cases'
s4.index.name = 'Country'

In [31]:
s4

Country
USA        164800
India        1251
UK          22141
Italy      101739
China       81518
Spain       94417
Germany     67051
Name: Corona Cases, dtype: int64

In [32]:
s4.values.name = 'Cases'

AttributeError: 'numpy.ndarray' object has no attribute 'name'

# Data Frames

In [33]:
data = np.array([[100,92,83], [94,85,96]])
data

array([[100,  92,  83],
       [ 94,  85,  96]])

In [35]:
df1 = pd.DataFrame(data, index= ['Ram', 'Sita'], columns = ['Maths', 'Science', 'Hindi'])
df1

Unnamed: 0,Maths,Science,Hindi
Ram,100,92,83
Sita,94,85,96
