# Pandas Tutorial

## Pandas Series

### Pandas Series Creation

In [2]:
import pandas as pd

In [3]:
X = pd.Series([10,20,30,40])
X

0    10
1    20
2    30
3    40
dtype: int64

In [4]:
print(X.index, X.values, sep="\n\n")

RangeIndex(start=0, stop=4, step=1)

[10 20 30 40]


In [5]:
X[0]

10

In [6]:
print(X[1], X[[1,2,3]], sep="\n\n")

20

1    20
2    30
3    40
dtype: int64


In [9]:
print(X[1], X[[1,2,3]], X[1:4], sep="\n\n") # slicing

20

1    20
2    30
3    40
dtype: int64

1    20
2    30
3    40
dtype: int64


### Pandas Series creation with Index Values

In [10]:
X = pd.Series([10,20,30,40], index = ['a', 'b', 'c', 'd'])
X

a    10
b    20
c    30
d    40
dtype: int64

In [11]:
print(X.index, X.values, sep="\n\n")

Index(['a', 'b', 'c', 'd'], dtype='object')

[10 20 30 40]


In [13]:
print(X['b'], X[['b','c','d']], X['b':'d'], sep="\n\n") # slicing in strings as index is different

20

b    20
c    30
d    40
dtype: int64

b    20
c    30
d    40
dtype: int64


### Filter Operation

In [14]:
X = pd.Series([10,-10,20,-20,30,-30,40,-40])
X

0    10
1   -10
2    20
3   -20
4    30
5   -30
6    40
7   -40
dtype: int64

In [15]:
X<0

0    False
1     True
2    False
3     True
4    False
5     True
6    False
7     True
dtype: bool

In [18]:
X[X<0]

1   -10
3   -20
5   -30
7   -40
dtype: int64

In [19]:
print(X[X<0], X[X<-10], sep="\n\n")

1   -10
3   -20
5   -30
7   -40
dtype: int64

3   -20
5   -30
7   -40
dtype: int64


### Mathematical Operations on Series objects

In [24]:
X1 = pd.Series([10,20,30,40], index = ['a', 'b', 'c', 'd'])
X2 = pd.Series([20,30,40,50], index = ['b', 'c', 'd', 'e'])
X1 + X2

a     NaN
b    40.0
c    60.0
d    80.0
e     NaN
dtype: float64

In [23]:
X1 + X1

a    20
b    40
c    60
d    80
dtype: int64

In [22]:
X1 * X2

a       NaN
b     400.0
c     900.0
d    1600.0
e       NaN
dtype: float64

### Pandas Series Object creation using dictionary 

In [25]:
D = {'a':1, 'b':2, 'c':3, 'd':4}
X = pd.Series(D)
X

a    1
b    2
c    3
d    4
dtype: int64

In [28]:
X = pd.Series(D, index=['d', 'c', 'b', 'a'])
X

d    4
c    3
b    2
a    1
dtype: int64

In [29]:
X = pd.Series(D, index=['d', 'c', 'b', 'a','e'])
X

d    4.0
c    3.0
b    2.0
a    1.0
e    NaN
dtype: float64

In [30]:
X.isnull()

d    False
c    False
b    False
a    False
e     True
dtype: bool

In [31]:
X.notnull()

d     True
c     True
b     True
a     True
e    False
dtype: bool

## Pandas Dataframe

### Dataframe object creation

In [33]:
D = {"State": ['AP', "HP", "UP", "KA"],
     "Size": [10,20,30,40],
     "Year": [2001,2002,2003,2004]
    }
X = pd.DataFrame(D)
X

Unnamed: 0,State,Size,Year
0,AP,10,2001
1,HP,20,2002
2,UP,30,2003
3,KA,40,2004


In [34]:
X = pd.DataFrame(D, columns=["State", "Size"])
X

Unnamed: 0,State,Size
0,AP,10
1,HP,20
2,UP,30
3,KA,40


In [35]:
X = pd.DataFrame(D, columns=["Year", "State", "Size"])
X

Unnamed: 0,Year,State,Size
0,2001,AP,10
1,2002,HP,20
2,2003,UP,30
3,2004,KA,40


In [36]:
X = pd.DataFrame(D, columns=["Year", "State", "Size", "Population"])
X

Unnamed: 0,Year,State,Size,Population
0,2001,AP,10,
1,2002,HP,20,
2,2003,UP,30,
3,2004,KA,40,
