# Pandas Tutorial - 2 

## Pandas Dataframes

### Dataframe object creation

In [2]:
import pandas as pd

In [3]:
D = {"State": ['AP', "HP", "UP", "KA"],
     "Size": [10,20,30,40],
     "Year": [2001,2002,2003,2004]
    }
X = pd.DataFrame(D)
X

Unnamed: 0,State,Size,Year
0,AP,10,2001
1,HP,20,2002
2,UP,30,2003
3,KA,40,2004


In [4]:
X = pd.DataFrame(D, columns=["State", "Size"])
X

Unnamed: 0,State,Size
0,AP,10
1,HP,20
2,UP,30
3,KA,40


In [5]:
X = pd.DataFrame(D, columns=["Year", "State", "Size"])
X

Unnamed: 0,Year,State,Size
0,2001,AP,10
1,2002,HP,20
2,2003,UP,30
3,2004,KA,40


In [6]:
X = pd.DataFrame(D, columns=["Year", "State", "Size", "Population"])
X

Unnamed: 0,Year,State,Size,Population
0,2001,AP,10,
1,2002,HP,20,
2,2003,UP,30,
3,2004,KA,40,


### Access Column Details

In [12]:
X = pd.DataFrame(D, columns=["Year", "State", "Size", "Population"])
X

Unnamed: 0,Year,State,Size,Population
0,2001,AP,10,
1,2002,HP,20,
2,2003,UP,30,
3,2004,KA,40,


In [13]:
X['Year']

0    2001
1    2002
2    2003
3    2004
Name: Year, dtype: int64

In [14]:
print(X['Year'], X.Year, sep="\n\n") # same

0    2001
1    2002
2    2003
3    2004
Name: Year, dtype: int64

0    2001
1    2002
2    2003
3    2004
Name: Year, dtype: int64


In [16]:
print(X[['Year', 'State', 'Size']])

   Year State  Size
0  2001    AP    10
1  2002    HP    20
2  2003    UP    30
3  2004    KA    40


In [17]:
X.columns

Index(['Year', 'State', 'Size', 'Population'], dtype='object')

In [18]:
X['Population'] = 10
X

Unnamed: 0,Year,State,Size,Population
0,2001,AP,10,10
1,2002,HP,20,10
2,2003,UP,30,10
3,2004,KA,40,10


In [19]:
X['Population'] = [10,100,1000,10000]
X

Unnamed: 0,Year,State,Size,Population
0,2001,AP,10,10
1,2002,HP,20,100
2,2003,UP,30,1000
3,2004,KA,40,10000


In [20]:
X = pd.DataFrame(D, columns=["Year", "State", "Size", "Population"])
X

Unnamed: 0,Year,State,Size,Population
0,2001,AP,10,
1,2002,HP,20,
2,2003,UP,30,
3,2004,KA,40,


In [21]:
X['Size>20'] = X['Size'] > 20
X

Unnamed: 0,Year,State,Size,Population,Size>20
0,2001,AP,10,,False
1,2002,HP,20,,False
2,2003,UP,30,,True
3,2004,KA,40,,True


### Update Column value, Transpose & Delete Column

In [22]:
X = pd.DataFrame(D, columns = ["Year", "State", "Size", "Population"], index = ['a', 'b', 'c', 'd'])
X

Unnamed: 0,Year,State,Size,Population
a,2001,AP,10,
b,2002,HP,20,
c,2003,UP,30,
d,2004,KA,40,


In [23]:
XPop = pd.Series([100,200,300,400], index = ['a', 'b', 'c', 'd'])
XPop

a    100
b    200
c    300
d    400
dtype: int64

In [28]:
X["Population"] = XPop
X

Unnamed: 0,Year,State,Size,Population
a,2001,AP,10,100
b,2002,HP,20,200
c,2003,UP,30,300
d,2004,KA,40,400


In [29]:
X.T # same as transpose()

Unnamed: 0,a,b,c,d
Year,2001,2002,2003,2004
State,AP,HP,UP,KA
Size,10,20,30,40
Population,100,200,300,400


In [32]:
X

Unnamed: 0,Year,State,Size,Population
a,2001,AP,10,100
b,2002,HP,20,200
c,2003,UP,30,300
d,2004,KA,40,400


In [33]:
del X["Population"]
X

Unnamed: 0,Year,State,Size
a,2001,AP,10
b,2002,HP,20
c,2003,UP,30
d,2004,KA,40


### Properties of Index objects
Index Objects can be repeated

In [34]:
X = pd.DataFrame(D, columns = ["Year", "State", "Size", "Population"], index = ['a', 'b', 'c', 'd'])
X

Unnamed: 0,Year,State,Size,Population
a,2001,AP,10,
b,2002,HP,20,
c,2003,UP,30,
d,2004,KA,40,


In [35]:
print(X.index, X.columns, sep="\n\n")

Index(['a', 'b', 'c', 'd'], dtype='object')

Index(['Year', 'State', 'Size', 'Population'], dtype='object')


In [36]:
X = pd.DataFrame(D, columns = ["Year", "State", "Size", "Population"], index = ['a', 'a', 'a', 'd'])
X

Unnamed: 0,Year,State,Size,Population
a,2001,AP,10,
a,2002,HP,20,
a,2003,UP,30,
d,2004,KA,40,


In [37]:
X.index

Index(['a', 'a', 'a', 'd'], dtype='object')

In [38]:
X.index[0] = 'b'

TypeError: Index does not support mutable operations

In [39]:
X.columns

Index(['Year', 'State', 'Size', 'Population'], dtype='object')

In [40]:
X.columns[0] = "Col1"

TypeError: Index does not support mutable operations