# Pandas practice for Data Science


## Series object

In [5]:
import pandas as pd
# one domensional array
s1 = pd.Series([1,2,3,4,5,6])
# Display series
s1

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [4]:
# check what type of data is s1
type(s1)

pandas.core.series.Series

In [10]:
# attaching a specific index
# index to be written inside pd.Series function, separated by a comma
s1 = pd.Series([1,2,3,4,5,6], index = ['a', 'b', 'c', 'd', 'e', 'f'])
s1

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [11]:
# another way to introduce index
pd.Series({'a' :10, 'b' : 20, 'c' : 30})

a    10
b    20
c    30
dtype: int64

In [14]:
# changing index
s2 = pd.Series({'a' :10, 'b' : 20, 'c' : 30}, index=['b', 'c', 'd', 'a'])
s2

b    20.0
c    30.0
d     NaN
a    10.0
dtype: float64

In [17]:
# extracting values from series with index 
s3 = [1,2,3,4,5,6,7,8,9]
s3[1]


2

In [20]:
# extract values from 1 till 3(except index 3)
s3 = [1,2,3,4,5,6,7,8,9]
s3[1:3]

[2, 3]

In [21]:
# extract values from index 0 till 3(except index 3)
s3 = [1,2,3,4,5,6,7,8,9]
s3[:3]

[1, 2, 3]

In [22]:
s3 = [1,2,3,4,5,6,7,8,9]
s3[:-1]

[1, 2, 3, 4, 5, 6, 7, 8]

In [23]:
s3 = [1,2,3,4,5,6,7,8,9]
s3[1:-2]
# last index represents -1

[2, 3, 4, 5, 6, 7]

In [24]:
s3 = [1,2,3,4,5,6,7,8,9]
s3[:]

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [26]:
# print values from index -2(second last value) till end
s3 = [1,2,3,4,5,6,7,8,9]
s3[-2:]

[8, 9]

# # Arithmatic operations on series elements

In [33]:
s4 = pd.Series([1,2,3,4,5,6,7,8,9])
s4 +2

0     3
1     4
2     5
3     6
4     7
5     8
6     9
7    10
8    11
dtype: int64

In [36]:
s5 = pd.Series([1,2,3,4,5,6,7,8,9])
s6 = pd.Series([1,2,3,4,5,6,7,8,9])
s5 + s6

0     2
1     4
2     6
3     8
4    10
5    12
6    14
7    16
8    18
dtype: int64

In [37]:
s5 = pd.Series([1,2,3,4,5,6,7,8,9])
s6 = pd.Series([1,2,3,4,5,6,7,8,9])
s5 - s6

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
dtype: int64

In [38]:
s5 = pd.Series([1,2,3,4,5,6,7,8,9])
s6 = pd.Series([1,2,3,4,5,6,7,8,9])
s5 / s6

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
5    1.0
6    1.0
7    1.0
8    1.0
dtype: float64

In [39]:
s5 = pd.Series([1,2,3,4,5,6,7,8,9])
s6 = pd.Series([1,2,3,4,5,6,7,8,9])
s5 * s6

0     1
1     4
2     9
3    16
4    25
5    36
6    49
7    64
8    81
dtype: int64

In [43]:
s5 = pd.Series([1,2,3,4,5,6,7,8,9])
s6 = pd.Series([1,1,3,4,6,6,9,8,5])
s5 > s6

0    False
1     True
2    False
3    False
4    False
5    False
6    False
7    False
8     True
dtype: bool

## Creating 2d Data frame

In [44]:
import pandas as pd
# Create data frame using dictonary, key1 is name with values ram, shyam, mohan & key2 is marks with values 10, 15 & 19 
pd.DataFrame({'name':['ram', 'shyam', 'mohan'], 'marks':[10, 15, 19]})

Unnamed: 0,marks,name
0,10,ram
1,15,shyam
2,19,mohan


## Builtin functions


### shape() : shows no of rows & columns of a data frame 
### head() : displays first 5 rows of data frame
### tail() : displays last 5 rows of data frame
### describe() : displays general info



In [48]:
# using Iris.csv dataset 
iris = pd.read_csv('Iris.csv')
iris.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [49]:
iris.tail()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica
149,150,5.9,3.0,5.1,1.8,Iris-virginica


In [52]:
iris.shape
# without ()

(150, 6)

In [54]:
iris.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


### iloc[] : to extract a part of data from given data frame

In [61]:
# extract first 3 rows & firat 2 columns
iris = pd.read_csv('Iris.csv')
iris.iloc[0:3,0:4]

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm
0,1,5.1,3.5,1.4
1,2,4.9,3.0,1.4
2,3,4.7,3.2,1.3


In [65]:
# extract rows 5 till 10(except10) & column 3 till 5(i.e.3 & 4th)
iris.iloc[5:10,3:5]

Unnamed: 0,PetalLengthCm,PetalWidthCm
5,1.7,0.4
6,1.4,0.3
7,1.5,0.2
8,1.4,0.2
9,1.5,0.1


### loc[] : similar to iloc[] method but here we use column name insted of index

In [68]:
iris.loc[5:10, ('PetalLengthCm', 'PetalWidthCm', 'Species')]
# note: in iloc for row index last value is not included while in loc it goes till last row index. see above & below examples

Unnamed: 0,PetalLengthCm,PetalWidthCm,Species
5,1.7,0.4,Iris-setosa
6,1.4,0.3,Iris-setosa
7,1.5,0.2,Iris-setosa
8,1.4,0.2,Iris-setosa
9,1.5,0.1,Iris-setosa
10,1.5,0.2,Iris-setosa


### Drop rows/columns
#### axis=0 will drop rows, axis=1 will drop columns 

In [74]:
# drop columns by name(here Species)
iris.drop('Species', axis=1)


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,1,5.1,3.5,1.4,0.2
1,2,4.9,3.0,1.4,0.2
2,3,4.7,3.2,1.3,0.2
3,4,4.6,3.1,1.5,0.2
4,5,5.0,3.6,1.4,0.2
5,6,5.4,3.9,1.7,0.4
6,7,4.6,3.4,1.4,0.3
7,8,5.0,3.4,1.5,0.2
8,9,4.4,2.9,1.4,0.2
9,10,4.9,3.1,1.5,0.1


In [78]:
# drop rows by index(here index 0 till 3 will be dropped)
iris.drop([1,2,3], axis=0)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
5,6,5.4,3.9,1.7,0.4,Iris-setosa
6,7,4.6,3.4,1.4,0.3,Iris-setosa
7,8,5.0,3.4,1.5,0.2,Iris-setosa
8,9,4.4,2.9,1.4,0.2,Iris-setosa
9,10,4.9,3.1,1.5,0.1,Iris-setosa
10,11,5.4,3.7,1.5,0.2,Iris-setosa
11,12,4.8,3.4,1.6,0.2,Iris-setosa
12,13,4.8,3.0,1.4,0.1,Iris-setosa


### mean, median, minimum, maximum

In [80]:
iris.mean()

Id               75.500000
SepalLengthCm     5.843333
SepalWidthCm      3.054000
PetalLengthCm     3.758667
PetalWidthCm      1.198667
dtype: float64

In [81]:
iris.median()

Id               75.50
SepalLengthCm     5.80
SepalWidthCm      3.00
PetalLengthCm     4.35
PetalWidthCm      1.30
dtype: float64

In [82]:
iris.min()

Id                         1
SepalLengthCm            4.3
SepalWidthCm               2
PetalLengthCm              1
PetalWidthCm             0.1
Species          Iris-setosa
dtype: object

In [83]:
iris.max()

Id                          150
SepalLengthCm               7.9
SepalWidthCm                4.4
PetalLengthCm               6.9
PetalWidthCm                2.5
Species          Iris-virginica
dtype: object

In [84]:
iris.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


### operation on column values(reducing values etc)

In [85]:
# reduce SepalLengthCm &  PetalLengthCm value to half by user defined function
def reduce_by_half(x):
    return x * 0.5
iris[['SepalLengthCm', 'PetalLengthCm']].apply(reduce_by_half)

Unnamed: 0,SepalLengthCm,PetalLengthCm
0,2.55,0.70
1,2.45,0.70
2,2.35,0.65
3,2.30,0.75
4,2.50,0.70
5,2.70,0.85
6,2.30,0.70
7,2.50,0.75
8,2.20,0.70
9,2.45,0.75


### .value_counts() : Value count 

In [86]:
iris['Species'].value_counts()

Iris-versicolor    50
Iris-virginica     50
Iris-setosa        50
Name: Species, dtype: int64

### .sort_values() : increasing order 

In [87]:
iris.head(10)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
5,6,5.4,3.9,1.7,0.4,Iris-setosa
6,7,4.6,3.4,1.4,0.3,Iris-setosa
7,8,5.0,3.4,1.5,0.2,Iris-setosa
8,9,4.4,2.9,1.4,0.2,Iris-setosa
9,10,4.9,3.1,1.5,0.1,Iris-setosa


In [90]:
# sort PetalWidthCm in increasing order of values
iris.sort_values(by='PetalWidthCm')

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
32,33,5.2,4.1,1.5,0.1,Iris-setosa
13,14,4.3,3.0,1.1,0.1,Iris-setosa
37,38,4.9,3.1,1.5,0.1,Iris-setosa
9,10,4.9,3.1,1.5,0.1,Iris-setosa
12,13,4.8,3.0,1.4,0.1,Iris-setosa
34,35,4.9,3.1,1.5,0.1,Iris-setosa
0,1,5.1,3.5,1.4,0.2,Iris-setosa
27,28,5.2,3.5,1.5,0.2,Iris-setosa
28,29,5.2,3.4,1.4,0.2,Iris-setosa
29,30,4.7,3.2,1.6,0.2,Iris-setosa
