# PANDAS

### Day - 4

#### Pandas is a powerful, open source Python library for data analysis, manipulation, and visualization. 

#### There are many things to like about pandas: It's well-documented, has a huge amount of community support, is under active development, and plays well with other Python libraries (such as matplotlib, scikit-learn, and seaborn).

In [85]:
#import the libraries
import pandas as pd
import numpy as np

In [86]:
iris = pd.read_csv("C:\\Users\\user\\Downloads\\iris.csv")
iris.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [87]:
#setting the column names in different names
iris.columns = ["sepal_length","sepal_width","petal_length","petal_width","class"]
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [88]:
#check if there is any null values
iris.isnull().sum()

#or
#pd.isnull(iris).sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
class           0
dtype: int64

In [89]:
#to check the dimension of iris 
print(iris.shape) 

#lets set rows 10 to 29 of petal_length to null/Nan
iris.iloc[10:30,2:3] = np.nan
iris.head(20)

(150, 5)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [90]:
#substitute null/nan values with the value 1 
iris.petal_length.fillna(1,inplace=True)
iris.head(20)




Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
5,5.4,3.9,1.7,0.4,setosa
6,4.6,3.4,1.4,0.3,setosa
7,5.0,3.4,1.5,0.2,setosa
8,4.4,2.9,1.4,0.2,setosa
9,4.9,3.1,1.5,0.1,setosa


In [91]:
#drop the class column
iris.drop("class",axis=1,inplace=True)
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [92]:
#set first 3 rows to null for all parameters

iris.iloc[0:3,:]=np.nan
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,,,,
1,,,,
2,,,,
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [93]:
#delete the rows having null values

iris.dropna(how="any",inplace=True)
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
5,5.4,3.9,1.7,0.4
6,4.6,3.4,1.4,0.3
7,5.0,3.4,1.5,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [94]:
#to reset index
iris.reset_index(drop=True,inplace=True)
iris

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.6,3.1,1.5,0.2
1,5.0,3.6,1.4,0.2
2,5.4,3.9,1.7,0.4
3,4.6,3.4,1.4,0.3
4,5.0,3.4,1.5,0.2
...,...,...,...,...
142,6.7,3.0,5.2,2.3
143,6.3,2.5,5.0,1.9
144,6.5,3.0,5.2,2.0
145,6.2,3.4,5.4,2.3


In [101]:
random = np.random.randint(10,size=10)
random

array([4, 9, 4, 0, 8, 8, 8, 9, 0, 8])

In [103]:
iris.petal_length[random] = np.nan
iris.head(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.6,3.1,,0.2
1,5.0,3.6,1.4,0.2
2,5.4,3.9,1.7,0.4
3,4.6,3.4,1.4,0.3
4,5.0,3.4,,0.2
5,4.4,2.9,1.4,0.2
6,4.9,3.1,1.5,0.1
7,5.4,3.7,1.0,0.2
8,4.8,3.4,,0.2
9,4.8,3.0,,0.1


In [106]:
#fill the null values with 1
iris.petal_length.fillna(1,inplace=True)
iris.head(10)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,4.6,3.1,1.0,0.2
1,5.0,3.6,1.4,0.2
2,5.4,3.9,1.7,0.4
3,4.6,3.4,1.4,0.3
4,5.0,3.4,1.0,0.2
5,4.4,2.9,1.4,0.2
6,4.9,3.1,1.5,0.1
7,5.4,3.7,1.0,0.2
8,4.8,3.4,1.0,0.2
9,4.8,3.0,1.0,0.1
