# Pandas
#### Specifically designed for hetrogenous data or tabular representation where numpy is for homogenous numerical array data
## Series
## DataFrame

In [2]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.4.2-cp39-cp39-win_amd64.whl (10.5 MB)
Collecting pytz>=2020.1
  Downloading pytz-2022.1-py2.py3-none-any.whl (503 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.4.2 pytz-2022.1


In [3]:
import pandas as pd
import numpy as np

Series is like one dimensional array but with index

In [10]:
obj = pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [12]:
print(obj.index)
print(obj.values)
obj.index = ['a','b','c','d']
print(obj.index)
obj

Index(['a', 'b', 'c', 'd'], dtype='object')
[ 4  7 -5  3]
Index(['a', 'b', 'c', 'd'], dtype='object')


a    4
b    7
c   -5
d    3
dtype: int64

In [17]:
print(obj['c'])
print(obj[obj>0])

-5
a    4
b    7
d    3
dtype: int64


In [18]:
s1 = pd.Series([2,4,6])
s1

0    2
1    4
2    6
dtype: int64

In [20]:
s3 = pd.Series([1,2,'a','b'])
print(s3.values)
s3

[1 2 'a' 'b']


0    1
1    2
2    a
3    b
dtype: object

In [21]:
series = pd.Series([1,2,3])
series

0    1
1    2
2    3
dtype: int64

In [22]:
# by default index start with 0 through N-1 where N = Number of elements in series. But you can specify the index
series.index , series.values

(RangeIndex(start=0, stop=3, step=1), array([1, 2, 3], dtype=int64))

In [23]:
s1.index = ['a','b','c']
s1

a    2
b    4
c    6
dtype: int64

In [24]:
s1 = pd.Series(index=['a','b','c'],data=[2,4,6],dtype = np.float32)
s1

a    2.0
b    4.0
c    6.0
dtype: float32

In [25]:
s1 = pd.Series(index=['a','b','c'],data=[2,4,6],dtype='object')
s1

a    2
b    4
c    6
dtype: object

In [26]:
s1 = s1.astype(np.int32)
s1

a    2
b    4
c    6
dtype: int32

In [27]:
s2 = pd.Series(s1,dtype='object')
s2

a    2
b    4
c    6
dtype: object

### Accessing Series

In [28]:
s1['a']

2

In [29]:
s1['a'] = 3
s1

a    3
b    4
c    6
dtype: int32

In [30]:
# We can change the sequence to see the data
s1[['b','c','a']]

b    4
c    6
a    3
dtype: int32

In [31]:
s1[s1>3]

b    4
c    6
dtype: int32

In [32]:
print(s1*2)
s1

a     6
b     8
c    12
dtype: int32


a    3
b    4
c    6
dtype: int32

In [33]:
print(s2)
s2*2

a    2
b    4
c    6
dtype: object


a     4
b     8
c    12
dtype: object

In [35]:
s3 = pd.Series(data=['Umesh','Pranav','Akshay','Vishal','Devraju'])
s3

0      Umesh
1     Pranav
2     Akshay
3     Vishal
4    Devraju
dtype: object

In [36]:
s3*2

0        UmeshUmesh
1      PranavPranav
2      AkshayAkshay
3      VishalVishal
4    DevrajuDevraju
dtype: object

In [37]:
s1[['a','b','c']] = [1,2,3]
s1

a    1
b    2
c    3
dtype: int32

In [38]:
np.exp(s1)

a     2.718282
b     7.389056
c    20.085537
dtype: float64

In [39]:
'b' in s1

True

In [42]:
'd' in s1

False

In [43]:
s1.index

Index(['a', 'b', 'c'], dtype='object')

In [41]:
np.in1d(s1.index,['a','b'])

array([ True,  True, False])

##### Series is a fixed length ordered dict as it is mapping of index values to data values

In [44]:
dict1 = {'TCS':3000,'INFY':2000,'HCL':1500,'WIPRO':1400}
dict1

{'TCS': 3000, 'INFY': 2000, 'HCL': 1500, 'WIPRO': 1400}

In [45]:
# covert from dictionary to series
s3 = pd.Series(dict1)
s3

TCS      3000
INFY     2000
HCL      1500
WIPRO    1400
dtype: int64

In [46]:
# Change the order of dictionary keys in series
company = ['WIPRO','HCL','INFY','TCS','ACCENTURE']
s4 = pd.Series(dict1,index=company)
s4

WIPRO        1400.0
HCL          1500.0
INFY         2000.0
TCS          3000.0
ACCENTURE       NaN
dtype: float64

In [47]:
# To check missing values in pandas
# Nan stands for Not a number
pd.isnull(s4)

WIPRO        False
HCL          False
INFY         False
TCS          False
ACCENTURE     True
dtype: bool

In [48]:
pd.notnull(s4)

WIPRO         True
HCL           True
INFY          True
TCS           True
ACCENTURE    False
dtype: bool

In [49]:
## Series instance method
s4.isnull()

WIPRO        False
HCL          False
INFY         False
TCS          False
ACCENTURE     True
dtype: bool

In [50]:
s4.notnull()

WIPRO         True
HCL           True
INFY          True
TCS           True
ACCENTURE    False
dtype: bool

In [39]:
s4

WIPRO        1400.0
HCL          1500.0
INFY         2000.0
TCS          3000.0
ACCENTURE       NaN
dtype: float64

In [59]:
s3 = s4.copy()
s3

WIPRO        1400.0
HCL          1500.0
INFY         2000.0
TCS          3000.0
ACCENTURE       NaN
dtype: float64

##### Series automatically aligns by index in arithmetic operations

In [60]:
s3['IBM'] = 1000
s3

WIPRO        1400.0
HCL          1500.0
INFY         2000.0
TCS          3000.0
ACCENTURE       NaN
IBM          1000.0
dtype: float64

In [61]:
s3+s4

ACCENTURE       NaN
HCL          3000.0
IBM             NaN
INFY         4000.0
TCS          6000.0
WIPRO        2800.0
dtype: float64

In [53]:
s4+s3

ACCENTURE       NaN
HCL          3000.0
IBM             NaN
INFY         4000.0
TCS          6000.0
WIPRO        2800.0
dtype: float64

In [62]:
# Both series itself and its index have name attribute, which integrates with other key areas of pandas functionality
s3.name = 'share'
s3.index.name= 'company'

In [63]:
s3

company
WIPRO        1400.0
HCL          1500.0
INFY         2000.0
TCS          3000.0
ACCENTURE       NaN
IBM          1000.0
Name: share, dtype: float64

In [64]:
s3.name

'share'

In [65]:
# A series's index can be altered in-place by assignment.
s3.index = ['HCL','INFY','TCS','Innova','IBM','Polaris']
s3

HCL        1400.0
INFY       1500.0
TCS        2000.0
Innova     3000.0
IBM           NaN
Polaris    1000.0
Name: share, dtype: float64

In [66]:
dict1.keys()

dict_keys(['TCS', 'INFY', 'HCL', 'WIPRO'])