# Pandas - Open-source data analysis and manipulation tool<br>
Documentation Link: [https://pandas.pydata.org/](https://pandas.pydata.org/)

In [None]:
import numpy as np
import pandas as pd

In [None]:
print("Numpy version=",np.__version__)
print("Pandas Version=",pd.__version__)

Numpy version= 1.22.4
Pandas Version= 1.4.4


# Series

## Series from Array

In [None]:
np.random.seed(100)
grades=np.random.randint(30,100,(5,3))
grades

array([[38, 54, 97],
       [78, 40, 82],
       [83, 96, 44],
       [64, 54, 45],
       [90, 88, 46]])

In [None]:
type(grades)

numpy.ndarray

In [None]:
grades[:,1]

array([54, 40, 96, 54, 88])

In [None]:
pd.Series(grades[:,1])

0    54
1    40
2    96
3    54
4    88
dtype: int64

In [None]:
Physics=pd.Series(grades[:,0], ['john','victoria','leon', 'Alex' , 'Sara'])
#pd.Series(data=grades[:,2], index=['john','victoria','leon', 'Alex' , 'Sara'])
Physics

john        38
victoria    78
leon        83
Alex        64
Sara        90
dtype: int64

In [None]:
type(Physics)

pandas.core.series.Series

In [None]:
Physics[1]

78

In [None]:
Physics['john']

38

In [None]:
Physics.index

Index(['john', 'victoria', 'leon', 'Alex', 'Sara'], dtype='object')

In [None]:
Physics.index[1]

'victoria'

## Series from Dictionary

In [None]:
pd.Series({'John':20, 'Sara':100, 'Victoria':100})

John         20
Sara        100
Victoria    100
dtype: int64

## Why Series?

In [None]:
grades

array([[38, 54, 97],
       [78, 40, 82],
       [83, 96, 44],
       [64, 54, 45],
       [90, 88, 46]])

In [None]:
names=['john','victoria','leon', 'Alex' , 'Sara']

In [None]:
Physics=pd.Series(data=grades[:,0], index=names)
Math=pd.Series(data=grades[:,1], index=names)

In [None]:
print(Physics)
print(Math)

john        38
victoria    78
leon        83
Alex        64
Sara        90
dtype: int64
john        54
victoria    40
leon        96
Alex        54
Sara        88
dtype: int64


In [None]:
(Physics+Math)/2

john        46.0
victoria    59.0
leon        89.5
Alex        59.0
Sara        89.0
dtype: float64

In [None]:
Physics['leon']=None
Math['victoria']=None
print (Physics)
print(Math)

john        38.0
victoria    78.0
leon         NaN
Alex        64.0
Sara        90.0
dtype: float64
john        54.0
victoria     NaN
leon        96.0
Alex        54.0
Sara        88.0
dtype: float64


In [None]:
(Physics+Math)/2

john        46.0
victoria     NaN
leon         NaN
Alex        59.0
Sara        89.0
dtype: float64

In [None]:
Physics.add(Math, fill_value=0) #subtract, multiply, divide

john         92.0
victoria     78.0
leon         96.0
Alex        118.0
Sara        178.0
dtype: float64

In [None]:
Physics.add(Math, fill_value=0)/2 #subtract, multiply, divide

john        46.0
victoria    39.0
leon        48.0
Alex        59.0
Sara        89.0
dtype: float64

# DataFrame

In [None]:
grades

array([[38, 54, 97],
       [78, 40, 82],
       [83, 96, 44],
       [64, 54, 45],
       [90, 88, 46]])

In [None]:
pd.DataFrame(data=grades)

Unnamed: 0,0,1,2
0,38,54,97
1,78,40,82
2,83,96,44
3,64,54,45
4,90,88,46


In [None]:
pd.DataFrame(data=grades,index=['john','victoria','leon', 'Alex' , 'Sara'],columns=['Physics','Math','Chemistry'])

Unnamed: 0,Physics,Math,Chemistry
john,38,54,97
victoria,78,40,82
leon,83,96,44
Alex,64,54,45
Sara,90,88,46


In [None]:
Class_of_2030=pd.DataFrame(grades,['john','victoria','leon', 'Alex' , 'Sara'],['Math','Physics','Chemistry'])
Class_of_2030

Unnamed: 0,Math,Physics,Chemistry
john,38,54,97
victoria,78,40,82
leon,83,96,44
Alex,64,54,45
Sara,90,88,46


In [None]:
type(Class_of_2030)

pandas.core.frame.DataFrame

In [None]:
Class_of_2030['Physics']['leon']=None
Class_of_2030

Unnamed: 0,Math,Physics,Chemistry
john,38,54.0,97
victoria,78,40.0,82
leon,83,,44
Alex,64,54.0,45
Sara,90,88.0,46


In [None]:
type(Class_of_2030)

pandas.core.frame.DataFrame

In [None]:
Class_of_2030.columns

Index(['Math', 'Physics', 'Chemistry'], dtype='object')

In [None]:
Class_of_2030.index

Index(['john', 'victoria', 'leon', 'Alex', 'Sara'], dtype='object')

In [None]:
Class_of_2030.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, john to Sara
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Math       5 non-null      int64  
 1   Physics    4 non-null      float64
 2   Chemistry  5 non-null      int64  
dtypes: float64(1), int64(2)
memory usage: 320.0+ bytes


In [None]:
Class_of_2030.describe()

Unnamed: 0,Math,Physics,Chemistry
count,5.0,4.0,5.0
mean,70.6,59.0,62.8
std,20.562101,20.428738,24.953958
min,38.0,40.0,44.0
25%,64.0,50.5,45.0
50%,78.0,54.0,46.0
75%,83.0,62.5,82.0
max,90.0,88.0,97.0


### Selection/Slicing

In [None]:
Class_of_2030

Unnamed: 0,Math,Physics,Chemistry
john,38,54.0,97
victoria,78,40.0,82
leon,83,,44
Alex,64,54.0,45
Sara,90,88.0,46


In [None]:
Class_of_2030['Math']

john        38
victoria    78
leon        83
Alex        64
Sara        90
Name: Math, dtype: int64

In [None]:
Class_of_2030[['Math','Physics']]

Unnamed: 0,Math,Physics
john,38,54.0
victoria,78,40.0
leon,83,
Alex,64,54.0
Sara,90,88.0


In [None]:
Class_of_2030[['Math','Physics']][2:4]

Unnamed: 0,Math,Physics
leon,83,
Alex,64,54.0


In [None]:
Class_of_2030[['Math','Physics']].loc[['victoria','leon']]

Unnamed: 0,Math,Physics
victoria,78,40.0
leon,83,


In [None]:
Class_of_2030[['Physics','Math']][Class_of_2030['Math']>=50]

Unnamed: 0,Physics,Math
victoria,40.0,78
leon,,83
Alex,54.0,64
Sara,88.0,90


In [None]:
Class_of_2030[['Physics','Math']][(Class_of_2030['Math']>=50) & (Class_of_2030['Math']<90) ]
#AND &
#OR |

Unnamed: 0,Physics,Math
victoria,40.0,78
leon,,83
Alex,54.0,64


### Index Index Index

In [None]:
Class_of_2030

Unnamed: 0,Math,Physics,Chemistry
john,38,54.0,97
victoria,78,40.0,82
leon,83,,44
Alex,64,54.0,45
Sara,90,88.0,46


In [None]:
Class_of_2030.index

Index(['john', 'victoria', 'leon', 'Alex', 'Sara'], dtype='object')

In [None]:
print(Class_of_2030['Physics'].loc['Alex'])
print(Class_of_2030['Physics'][3])


54.0
54.0


In [None]:
Class_of_2030=Class_of_2030.reset_index()
Class_of_2030

Unnamed: 0,index,Math,Physics,Chemistry
0,john,38,54.0,97
1,victoria,78,40.0,82
2,leon,83,,44
3,Alex,64,54.0,45
4,Sara,90,88.0,46


In [None]:
Class_of_2030.index

RangeIndex(start=0, stop=5, step=1)

In [None]:
Class_of_2030['index']

0        john
1    victoria
2        leon
3        Alex
4        Sara
Name: index, dtype: object

In [None]:
print(Class_of_2030['Physics'].loc['Alex']) #we get error because we changed the indexes to numerical indexes
print(Class_of_2030['Physics'][3])

KeyError: ignored

In [None]:
Class_of_2030

Unnamed: 0,index,Math,Physics,Chemistry
0,john,38,54.0,97
1,victoria,78,40.0,82
2,leon,83,,44
3,Alex,64,54.0,45
4,Sara,90,88.0,46


In [None]:
Class_of_2030.columns

Index(['index', 'Math', 'Physics', 'Chemistry'], dtype='object')

In [None]:
Class_of_2030.columns=['Names', 'Math', 'Physics', 'Chemistry']
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry
0,john,38,54.0,97
1,victoria,78,40.0,82
2,leon,83,,44
3,Alex,64,54.0,45
4,Sara,90,88.0,46


In [None]:
Class_of_2030.index

RangeIndex(start=0, stop=5, step=1)

### Add/Remove Record or Columns

In [None]:
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry
0,john,38,54.0,97
1,victoria,78,40.0,82
2,leon,83,,44
3,Alex,64,54.0,45
4,Sara,90,88.0,46


In [None]:
Class_of_2030=Class_of_2030.append({'Names':'Marc', 'Math':70, 'Physics':80},ignore_index=True)

Unnamed: 0,Names,Math,Physics,Chemistry
0,john,38,54.0,97.0
1,victoria,78,40.0,82.0
2,leon,83,,44.0
3,Alex,64,54.0,45.0
4,Sara,90,88.0,46.0
5,Marc,70,80.0,


In [None]:
Class_of_2030['GPA']=(Class_of_2030['Math']+Class_of_2030['Physics']+Class_of_2030['Chemistry'])/3
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry,GPA
0,john,38,54.0,97.0,63.0
1,victoria,78,40.0,82.0,66.666667
2,leon,83,,44.0,
3,Alex,64,54.0,45.0,54.333333
4,Sara,90,88.0,46.0,74.666667
5,Marc,70,80.0,,


In [None]:
Class_of_2030['GPA']=(Class_of_2030['Math']+Class_of_2030['Physics'].fillna(0)+Class_of_2030['Chemistry'].fillna(0))/3
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry,GPA
0,john,38,54.0,97.0,63.0
1,victoria,78,40.0,82.0,66.666667
2,leon,83,,44.0,42.333333
3,Alex,64,54.0,45.0,54.333333
4,Sara,90,88.0,46.0,74.666667
5,Marc,70,80.0,,50.0


In [None]:
type((Class_of_2030['Math']+Class_of_2030['Physics']+Class_of_2030['Chemistry'])/3)

pandas.core.series.Series

In [None]:
Class_of_2030['GPA']=np.round((Class_of_2030['Math']+Class_of_2030['Physics'].fillna(0)+Class_of_2030['Chemistry'].fillna(0))/3,2)
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry,GPA
0,john,38,54.0,97.0,63.0
1,victoria,78,40.0,82.0,66.67
2,leon,83,,44.0,42.33
3,Alex,64,54.0,45.0,54.33
4,Sara,90,88.0,46.0,74.67
5,Marc,70,80.0,,50.0


In [None]:
Class_of_2030=Class_of_2030.drop(0)#based on index
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry,GPA
1,victoria,78,40.0,82.0,66.67
2,leon,83,,44.0,42.33
3,Alex,64,54.0,45.0,54.33
4,Sara,90,88.0,46.0,74.67
5,Marc,70,80.0,,50.0


In [None]:
Class_of_2030.drop(['Math','Chemistry'],axis=1)

Unnamed: 0,Names,Physics,GPA
1,victoria,40.0,66.67
2,leon,,42.33
3,Alex,54.0,54.33
4,Sara,88.0,74.67
5,Marc,80.0,50.0


In [None]:
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry,GPA
1,victoria,78,40.0,82.0,66.67
2,leon,83,,44.0,42.33
3,Alex,64,54.0,45.0,54.33
4,Sara,90,88.0,46.0,74.67
5,Marc,70,80.0,,50.0


In [None]:
Class_of_2030.index

Int64Index([1, 2, 3, 4, 5], dtype='int64')

In [None]:
['M'+str(i) for i in Class_of_2030.index]

['M1', 'M2', 'M3', 'M4', 'M5']

In [None]:
Class_of_2030['Mnumber']=['M'+str(i) for i in Class_of_2030.index]
Class_of_2030

Unnamed: 0,Names,Math,Physics,Chemistry,GPA,Mnumber
1,victoria,78,40.0,82.0,66.67,M1
2,leon,83,,44.0,42.33,M2
3,Alex,64,54.0,45.0,54.33,M3
4,Sara,90,88.0,46.0,74.67,M4
5,Marc,70,80.0,,50.0,M5


In [None]:
Class_of_2030=Class_of_2030.set_index('Mnumber')
Class_of_2030

Unnamed: 0_level_0,Names,Math,Physics,Chemistry,GPA
Mnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M1,victoria,78,40.0,82.0,66.67
M2,leon,83,,44.0,42.33
M3,Alex,64,54.0,45.0,54.33
M4,Sara,90,88.0,46.0,74.67
M5,Marc,70,80.0,,50.0


In [None]:
Class_of_2030.drop('M3')

Unnamed: 0_level_0,Names,Math,Physics,Chemistry,GPA
Mnumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
M1,victoria,78,40.0,82.0,66.67
M2,leon,83,,44.0,42.33
M4,Sara,90,88.0,46.0,74.67
M5,Marc,70,80.0,,50.0


In [None]:
Class_of_2030.drop(3) #We got error because drop uses index and index now is M1,M2....

KeyError: ignored