In [1]:
# import pandas
import pandas as pd

In [4]:
# read csv file, csv files contains description lines at the top so we skip
# those rows
df = pd.read_csv('nepal.csv', skiprows=3)

In [5]:
# show top 5 rows
df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,Unnamed: 60
0,Nepal,NPL,"Agricultural machinery, tractors",AG.AGR.TRAC.NO,,180.0,190.0,200.0,215.0,225.0,...,,,,,,,,,,
1,Nepal,NPL,Fertilizer consumption (% of fertilizer produc...,AG.CON.FERT.PT.ZS,,,,,,,...,,,,,,,,,,
2,Nepal,NPL,Fertilizer consumption (kilograms per hectare ...,AG.CON.FERT.ZS,,,,,,,...,1.608929,1.364865,18.202727,25.086697,34.88371,44.069877,57.728477,,,
3,Nepal,NPL,Agricultural land (sq. km),AG.LND.AGRI.K2,,35530.0,35530.0,35630.0,35530.0,35530.0,...,41660.0,41520.0,41400.0,41260.0,41266.0,41210.0,41210.0,,,
4,Nepal,NPL,Agricultural land (% of land area),AG.LND.AGRI.ZS,,24.846154,24.846154,24.916084,24.846154,24.846154,...,29.061737,28.964074,28.880363,28.7827,28.786885,28.74782,28.74782,,,


#### Task: Select by columns
---

In [11]:
# select using column names
df1 = df[['Indicator Name', '2010', '2011', '2012']]

In [12]:
df1.head()

Unnamed: 0,Indicator Name,2010,2011,2012
0,"Agricultural machinery, tractors",,,
1,Fertilizer consumption (% of fertilizer produc...,,,
2,Fertilizer consumption (kilograms per hectare ...,25.086697,34.88371,44.069877
3,Agricultural land (sq. km),41260.0,41266.0,41210.0
4,Agricultural land (% of land area),28.7827,28.786885,28.74782


*Selecting columns by range / and column name separately and merging them together*

In [15]:
# slice all the rows
df.ix[:].head(3)

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,Unnamed: 60
0,Nepal,NPL,"Agricultural machinery, tractors",AG.AGR.TRAC.NO,,180.0,190.0,200.0,215.0,225.0,...,,,,,,,,,,
1,Nepal,NPL,Fertilizer consumption (% of fertilizer produc...,AG.CON.FERT.PT.ZS,,,,,,,...,,,,,,,,,,
2,Nepal,NPL,Fertilizer consumption (kilograms per hectare ...,AG.CON.FERT.ZS,,,,,,,...,1.608929,1.364865,18.202727,25.086697,34.88371,44.069877,57.728477,,,


In [17]:
# slice all rows with column range
df.ix[:,'2010':'2015'].head(3)

Unnamed: 0,2010,2011,2012,2013,2014,2015
0,,,,,,
1,,,,,,
2,25.086697,34.88371,44.069877,57.728477,,


In [19]:
# select using column name list
df[['Indicator Name']].head(3)

Unnamed: 0,Indicator Name
0,"Agricultural machinery, tractors"
1,Fertilizer consumption (% of fertilizer produc...
2,Fertilizer consumption (kilograms per hectare ...


In [25]:
# now combine those two into single dataframe 
df2 = pd.concat([df[['Indicator Name']], df.ix[:,'2010':'2015']], axis=1).head(3)

In [26]:
df2.head(3)

Unnamed: 0,Indicator Name,2010,2011,2012,2013,2014,2015
0,"Agricultural machinery, tractors",,,,,,
1,Fertilizer consumption (% of fertilizer produc...,,,,,,
2,Fertilizer consumption (kilograms per hectare ...,25.086697,34.88371,44.069877,57.728477,,


In [23]:
# see how axis value changes the dataframe
pd.concat([df[['Indicator Name']], df.ix[:,'2010':'2015']], axis=0).head(3)

Unnamed: 0,2010,2011,2012,2013,2014,2015,Indicator Name
0,,,,,,,"Agricultural machinery, tractors"
1,,,,,,,Fertilizer consumption (% of fertilizer produc...
2,,,,,,,Fertilizer consumption (kilograms per hectare ...


**Task: Remove NaN**

In [35]:
# drop rows if NaN, default how is any i.e if any of the column has NaN
df1.dropna().head(3)

Unnamed: 0,Indicator Name,2010,2011,2012
2,Fertilizer consumption (kilograms per hectare ...,25.086697,34.88371,44.069877
3,Agricultural land (sq. km),41260.0,41266.0,41210.0
4,Agricultural land (% of land area),28.7827,28.786885,28.74782


In [33]:
# only drop if all the column has NaN
df2.dropna(axis=1, how='all')

Unnamed: 0,Indicator Name,2010,2011,2012,2013
0,"Agricultural machinery, tractors",,,,
1,Fertilizer consumption (% of fertilizer produc...,,,,
2,Fertilizer consumption (kilograms per hectare ...,25.086697,34.88371,44.069877,57.728477


In [36]:
# df1.set_axis

In [40]:
df3 = df1.transpose()

In [41]:
df3.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1409,1410,1411,1412,1413,1414,1415,1416,1417,1418
Indicator Name,"Agricultural machinery, tractors",Fertilizer consumption (% of fertilizer produc...,Fertilizer consumption (kilograms per hectare ...,Agricultural land (sq. km),Agricultural land (% of land area),Arable land (hectares),Arable land (hectares per person),Arable land (% of land area),Land under cereal production (hectares),Permanent cropland (% of land area),...,Account at a financial institution (% age 15+)...,"Account at a financial institution, male (% ag...","Account at a financial institution, female (% ...","Account at a financial institution, income, po...","Account at a financial institution, income, ri...",Mobile account (% age 15+) [w2],"Mobile account, male (% age 15+) [w2]","Mobile account, female (% age 15+) [w2]","Mobile account, income, poorest 40% (% ages 15...","Mobile account, income, richest 60% (% ages 15..."
2010,,,25.0867,41260,28.7827,2.18e+06,0.0811135,15.2075,3.39355e+06,1.06034,...,,,,,,,,,,
2011,,,34.8837,41266,28.7869,2.1627e+06,0.0795718,15.0869,3.46851e+06,1.17614,...,25.3086,29.5583,21.2196,14.379,32.5893,,,,,
2012,,,44.0699,41210,28.7478,2.118e+06,0.0770167,14.775,3.48453e+06,1.45099,...,,,,,,,,,,


In [43]:
df3.reindex_axis(df3.columns, axis=1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1409,1410,1411,1412,1413,1414,1415,1416,1417,1418
Indicator Name,"Agricultural machinery, tractors",Fertilizer consumption (% of fertilizer produc...,Fertilizer consumption (kilograms per hectare ...,Agricultural land (sq. km),Agricultural land (% of land area),Arable land (hectares),Arable land (hectares per person),Arable land (% of land area),Land under cereal production (hectares),Permanent cropland (% of land area),...,Account at a financial institution (% age 15+)...,"Account at a financial institution, male (% ag...","Account at a financial institution, female (% ...","Account at a financial institution, income, po...","Account at a financial institution, income, ri...",Mobile account (% age 15+) [w2],"Mobile account, male (% age 15+) [w2]","Mobile account, female (% age 15+) [w2]","Mobile account, income, poorest 40% (% ages 15...","Mobile account, income, richest 60% (% ages 15..."
2010,,,25.0867,41260,28.7827,2.18e+06,0.0811135,15.2075,3.39355e+06,1.06034,...,,,,,,,,,,
2011,,,34.8837,41266,28.7869,2.1627e+06,0.0795718,15.0869,3.46851e+06,1.17614,...,25.3086,29.5583,21.2196,14.379,32.5893,,,,,
2012,,,44.0699,41210,28.7478,2.118e+06,0.0770167,14.775,3.48453e+06,1.45099,...,,,,,,,,,,


In [47]:
df3.ix[0, :]

0                        Agricultural machinery, tractors
1       Fertilizer consumption (% of fertilizer produc...
2       Fertilizer consumption (kilograms per hectare ...
3                              Agricultural land (sq. km)
4                      Agricultural land (% of land area)
5                                  Arable land (hectares)
6                       Arable land (hectares per person)
7                            Arable land (% of land area)
8                 Land under cereal production (hectares)
9                     Permanent cropland (% of land area)
10      Rural land area where elevation is below 5 met...
11      Rural land area where elevation is below 5 met...
12      Urban land area where elevation is below 5 met...
13      Urban land area where elevation is below 5 met...
14      Land area where elevation is below 5 meters (%...
15                                   Forest area (sq. km)
16                           Forest area (% of land area)
17      Agricu

In [48]:
# doesnot work
df3.reindex_axis(df3.ix[0, :], axis=1)

Indicator Name,"Agricultural machinery, tractors",Fertilizer consumption (% of fertilizer production),Fertilizer consumption (kilograms per hectare of arable land),Agricultural land (sq. km),Agricultural land (% of land area),Arable land (hectares),Arable land (hectares per person),Arable land (% of land area),Land under cereal production (hectares),Permanent cropland (% of land area),...,Account at a financial institution (% age 15+) [ts],"Account at a financial institution, male (% age 15+) [ts]","Account at a financial institution, female (% age 15+) [ts]","Account at a financial institution, income, poorest 40% (% ages 15+) [ts]","Account at a financial institution, income, richest 60% (% ages 15+) [ts]",Mobile account (% age 15+) [w2],"Mobile account, male (% age 15+) [w2]","Mobile account, female (% age 15+) [w2]","Mobile account, income, poorest 40% (% ages 15+) [w2]","Mobile account, income, richest 60% (% ages 15+) [w2]"
Indicator Name,,,,,,,,,,,...,,,,,,,,,,
2010,,,,,,,,,,,...,,,,,,,,,,
2011,,,,,,,,,,,...,,,,,,,,,,
2012,,,,,,,,,,,...,,,,,,,,,,


In [50]:
df1.head()

Unnamed: 0,Indicator Name,2010,2011,2012
0,"Agricultural machinery, tractors",,,
1,Fertilizer consumption (% of fertilizer produc...,,,
2,Fertilizer consumption (kilograms per hectare ...,25.086697,34.88371,44.069877
3,Agricultural land (sq. km),41260.0,41266.0,41210.0
4,Agricultural land (% of land area),28.7827,28.786885,28.74782


In [56]:
df5 = df1.set_index('Indicator Name').head(3)

In [57]:
df5.transpose()

Indicator Name,"Agricultural machinery, tractors",Fertilizer consumption (% of fertilizer production),Fertilizer consumption (kilograms per hectare of arable land)
2010,,,25.086697
2011,,,34.88371
2012,,,44.069877


In [58]:
# pd.pivot_table?