# Pandas

In [15]:
import pandas as pd
import numpy as np

# Chapter 5

## Series

In [3]:
a = pd.Series([1,2,3,4,5])

In [4]:
a

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [5]:
print(a)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [6]:
a.values

array([1, 2, 3, 4, 5])

In [8]:
a.index

RangeIndex(start=0, stop=5, step=1)

In [9]:
a = pd.Series([[6,7,8],[3,4,5],[1,2,3]])
a

0    [6, 7, 8]
1    [3, 4, 5]
2    [1, 2, 3]
dtype: object

### Series index

In [14]:
a = pd.Series([1,2,3],index=['a','b','c'])
a

a    1
b    2
c    3
dtype: int64

In [16]:
a = pd.Series([[1,2,3],[4,5,6],[4,5,6]],index=['jan','feb','mar'])
a

jan    [1, 2, 3]
feb    [4, 5, 6]
mar    [4, 5, 6]
dtype: object

In [18]:
list(a.index)

['jan', 'feb', 'mar']

### Series name

In [31]:
a = pd.Series([[1,2,3],[4,5,6]],index=['a','b'],name='data')
a

a    [1, 2, 3]
b    [4, 5, 6]
Name: data, dtype: object

### Getting Data

In [2]:
a = pd.Series([1,2,3,4,5],index=['M','T','W','T','F'],name='Daily Data')
a

M    1
T    2
W    3
T    4
F    5
Name: Daily Data, dtype: int64

In [3]:
a[1]

2

In [4]:
a.values[1]

2

In [5]:
a.index[1]

'T'

In [6]:
a['M']

1

In [7]:
a[['M','T']] # fancy indexing

M    1
T    2
T    4
Name: Daily Data, dtype: int64

In [8]:
a > 3

M    False
T    False
W    False
T     True
F     True
Name: Daily Data, dtype: bool

In [9]:
a[a>3]

T    4
F    5
Name: Daily Data, dtype: int64

In [11]:
a.index[a > 3]

Index(['T', 'F'], dtype='object')

In [12]:
a = pd.Series([[1,2,3],[4,5,6],[7,8,9]],index=['jan','feb','mar'],name='monthly Data')
a

jan    [1, 2, 3]
feb    [4, 5, 6]
mar    [7, 8, 9]
Name: monthly Data, dtype: object

In [13]:
a[1]

[4, 5, 6]

In [16]:
a[1][1]

5

In [19]:
a['feb']

[4, 5, 6]

In [25]:
a[1:3]

feb    [4, 5, 6]
mar    [7, 8, 9]
Name: monthly Data, dtype: object

In [26]:
a = pd.Series([2,2,2],[3,3,3])
a

3    2
3    2
3    2
dtype: int64

In [52]:
a = np.array([1,2,])
b = np.array(['mon','tue'])

In [53]:
c = pd.Series(a,index=b)
c

mon    1
tue    2
dtype: int64

In [59]:
obj = {'m':22,'t':44,'w':{'a':[22,33]}}
obj

{'m': 22, 't': 44, 'w': {'a': [22, 33]}}

In [60]:
a = pd.Series(obj)
a

m                 22
t                 44
w    {'a': [22, 33]}
dtype: object

### isnull

In [67]:
obj = {'a':1,'b':2,'c':3}
a = pd.Series(obj,index=['a','b','c','d'])
a

a    1.0
b    2.0
c    3.0
d    NaN
dtype: float64

In [68]:
a.isnull()

a    False
b    False
c    False
d     True
dtype: bool

In [69]:
a[a.isnull()]

d   NaN
dtype: float64

In [70]:
a[a.isnull()] = 0

In [72]:
a

a    1.0
b    2.0
c    3.0
d    0.0
dtype: float64

## Data Frames

#### Merging Series

In [2]:
a = pd.Series([11,22,33,44])
b = pd.Series([56,78,90,36])

In [3]:
obj = {'col1':a , 'col2':b} 

In [4]:
obj

{'col1': 0    11
 1    22
 2    33
 3    44
 dtype: int64, 'col2': 0    56
 1    78
 2    90
 3    36
 dtype: int64}

In [5]:
c = pd.DataFrame(obj)

In [6]:
c

Unnamed: 0,col1,col2
0,11,56
1,22,78
2,33,90
3,44,36


In [14]:
a = pd.Series([11,22,33,44],name='PK',index=[1,2,3,4])
b = pd.Series([56,78,90,36],name='UK',index=[1,2,3,4])
obj = {a.name:a , b.name:b} 
pd.DataFrame(obj)

Unnamed: 0,PK,UK
1,11,56
2,22,78
3,33,90
4,44,36


#### Creating from Obj

In [20]:
a = {
    "Year":[2001,2002,2003,2004],
    "Country":["Pakistan","India","Bangladesh","Nepal"],
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
}

In [21]:
a

{'Year': [2001, 2002, 2003, 2004],
 'Country': ['Pakistan', 'India', 'Bangladesh', 'Nepal'],
 'GDP': [3, 5, 4, 4],
 'Population in (m)': [2, 12, 2, 1]}

In [24]:
b = pd.DataFrame(a,index=[1,2,3,4])
b

Unnamed: 0,Year,Country,GDP,Population in (m)
1,2001,Pakistan,3,2
2,2002,India,5,12
3,2003,Bangladesh,4,2
4,2004,Nepal,4,1


In [26]:
a = pd.Series([[1,2,3],[4,5,6]])
a

0    [1, 2, 3]
1    [4, 5, 6]
dtype: object

In [27]:
b = {
    'col1':a
}
b

{'col1': 0    [1, 2, 3]
 1    [4, 5, 6]
 dtype: object}

In [28]:
c = pd.DataFrame(b)
c

Unnamed: 0,col1
0,"[1, 2, 3]"
1,"[4, 5, 6]"


In [29]:
## thus Series are for single dimension and DataFrames are for multiDimension purpose

#### Extracting Data from DataFrames

In [12]:
a = pd.DataFrame({
    "Year":[2001,2002,2003,2004],
    "Country":["Pakistan","India","Bangladesh","Nepal"],
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
},index=[1,2,3,4])


In [31]:
a

Unnamed: 0,Year,Country,GDP,Population in (m)
1,2001,Pakistan,3,2
2,2002,India,5,12
3,2003,Bangladesh,4,2
4,2004,Nepal,4,1


In [32]:
a['Country']

1      Pakistan
2         India
3    Bangladesh
4         Nepal
Name: Country, dtype: object

In [34]:
a['Country'][1]

'Pakistan'

In [39]:
a.Country[2]

'India'

In [42]:
a.index

Int64Index([1, 2, 3, 4], dtype='int64')

In [59]:
a[2:3]

Unnamed: 0,Year,Country,GDP,Population in (m)
3,2003,Bangladesh,4,2


In [62]:
a.loc[1]

Year                     2001
Country              Pakistan
GDP                         3
Population in (m)           2
Name: 1, dtype: object

In [67]:
a.loc[2]

Year                  2002
Country              India
GDP                      5
Population in (m)       12
Name: 2, dtype: object

In [13]:
a

Unnamed: 0,Year,Country,GDP,Population in (m)
1,2001,Pakistan,3,2
2,2002,India,5,12
3,2003,Bangladesh,4,2
4,2004,Nepal,4,1


In [16]:
print(a.loc[[1],['GDP']])

   GDP
1    3


In [118]:
a.loc[[2,3],['GDP','country']]

Unnamed: 0,GDP,country
2,6,US
3,3,IND


In [120]:
a.iloc[[2,3],[0,1]]

Unnamed: 0,country,GDP
2,US,6
3,IND,3


In [121]:
a

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,9
3,IND,3,12


In [123]:
np.max(a)
np.min(a)

country         IND
GDP               2
Population m      4
dtype: object

In [124]:
np.exp(a.GDP)

0      7.389056
1    148.413159
2    403.428793
3     20.085537
Name: GDP, dtype: float64

#### multiTables using pandas

In [72]:
a = {
    'Pakistan':{
        2000:2,2001:3,2002:3,2004:2
    },
    "UK":{
        2000:6,2001:7,2002:2
    }
}
a

{'Pakistan': {2000: 2, 2001: 3, 2002: 3, 2004: 2},
 'UK': {2000: 6, 2001: 7, 2002: 2}}

In [73]:
b = pd.DataFrame(a)

In [74]:
b

Unnamed: 0,Pakistan,UK
2000,2,6.0
2001,3,7.0
2002,3,2.0
2004,2,


## Pandas Function

#### Reindex

In [5]:
a = pd.DataFrame({
    'country':['PK','UK','US','IND'],
    'GDP':[2,5,6,3],
    'Population m':[4,7,9,12]
})



In [4]:
a

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,9
3,IND,3,12


In [5]:
a.reindex([2,3,1,0])

Unnamed: 0,country,GDP,Population m
2,US,6,9
3,IND,3,12
1,UK,5,7
0,PK,2,4


In [10]:
a.reindex(columns=['GDP','country'],index=[2,1,3])

Unnamed: 0,GDP,country
2,6,US
1,5,UK
3,3,IND


In [88]:
a.reindex([2,3,0,1],method='ffill')

Unnamed: 0,country,GDP,Population m
2,US,6,9
3,IND,3,12
0,PK,2,4
1,UK,5,7


In [17]:
a.reindex(index=[1,2,3,4,5],method='ffill')

Unnamed: 0,country,GDP,Population m
1,UK,5,7
2,US,6,9
3,IND,3,12
4,IND,3,12
5,IND,3,12


In [21]:
a.reindex(index=[1,2,3,4,5],columns=['GDP','country'])

Unnamed: 0,GDP,country
1,5.0,UK
2,6.0,US
3,3.0,IND
4,,
5,,


#### drop mehtod

In [89]:
a

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,9
3,IND,3,12


In [102]:
a.drop(index=[1,2],columns=['GDP'])

Unnamed: 0,country,Population m
0,PK,4
3,IND,12


In [113]:
a.drop('GDP',axis=1)

Unnamed: 0,country,Population m
0,PK,4
1,UK,7
2,US,9
3,IND,12


#### Arthimatic and Data Alignment

In [128]:
a = pd.DataFrame({
    'country':['PK','UK','US','IND'],
    'GDP':[2,5,6,3],
    'Population m':[4,7,9,12]
})
a

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,9
3,IND,3,12


In [129]:
a+a

Unnamed: 0,country,GDP,Population m
0,PKPK,4,8
1,UKUK,10,14
2,USUS,12,18
3,INDIND,6,24


In [133]:
b = pd.DataFrame({
    'country':['PK','UK','US',],
    'GDP':[2,5,6],
    'Population m':[4,7,12]
})
b

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,12


In [134]:
a+b

Unnamed: 0,country,GDP,Population m
0,PKPK,4.0,8.0
1,UKUK,10.0,14.0
2,USUS,12.0,21.0
3,,,


In [145]:
a = pd.DataFrame({'age':[2,3,4],'hieght':[44,54,56]})
b = pd.DataFrame({'age':[2,3,4,6],'hieght':[44,54,56,45]})

In [146]:
a

Unnamed: 0,age,hieght
0,2,44
1,3,54
2,4,56


In [147]:
b

Unnamed: 0,age,hieght
0,2,44
1,3,54
2,4,56
3,6,45


In [148]:
a+b

Unnamed: 0,age,hieght
0,4.0,88.0
1,6.0,108.0
2,8.0,112.0
3,,


In [150]:
a.add(b,fill_value=0)

Unnamed: 0,age,hieght
0,4.0,88.0
1,6.0,108.0
2,8.0,112.0
3,6.0,45.0


In [162]:
c = pd.Series([1,2])
c

0    1
1    2
dtype: int64

In [163]:
a

Unnamed: 0,age,hieght
0,2,44
1,3,54
2,4,56


In [164]:
a-c

Unnamed: 0,age,hieght,0,1
0,,,,
1,,,,
2,,,,


In [165]:
a

Unnamed: 0,age,hieght
0,2,44
1,3,54
2,4,56


In [171]:
d = pd.Series([22,33,44],index=[1,2,3])
d

1    22
2    33
3    44
dtype: int64

In [172]:
a['d'] = d

In [173]:
a

Unnamed: 0,age,hieght,d
0,2,44,
1,3,54,22.0
2,4,56,33.0


In [180]:
a.drop(index=[1],columns=['d'])

Unnamed: 0,age,hieght
0,2,44
2,4,56


In [193]:
a


Unnamed: 0,age,hieght,d
0,2,44,
1,3,54,22.0
2,4,56,33.0


In [196]:
a[['d']]

Unnamed: 0,d
0,
1,22.0
2,33.0


In [197]:
a


Unnamed: 0,age,hieght,d
0,2,44,
1,3,54,22.0
2,4,56,33.0


In [22]:
a = pd.DataFrame({
    'country':['PK','UK','US','IND'],
    'GDP':[2,5,6,3],
    'Population m':[4,7,9,12]
})
a

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,9
3,IND,3,12


In [199]:
a.sort_index(axis=0)

Unnamed: 0,country,GDP,Population m
0,PK,2,4
1,UK,5,7
2,US,6,9
3,IND,3,12


In [206]:
a.sort_values(by='country')

Unnamed: 0,country,GDP,Population m
3,IND,3,12
0,PK,2,4
1,UK,5,7
2,US,6,9


In [210]:
a.rank(method='max',axis=1)

Unnamed: 0,GDP,Population m
0,1.0,2.0
1,1.0,2.0
2,1.0,2.0
3,1.0,2.0


In [24]:
b = pd.Series([1,2],index=['GDP','Population m'])
b

GDP             1
Population m    2
dtype: int64

In [39]:
a.loc[[0,1,2,3],['GDP','Population m']]

Unnamed: 0,GDP,Population m
0,2,4
1,5,7
2,6,9
3,3,12


In [40]:
a.loc[[0,1,2,3],['GDP','Population m']] + b

Unnamed: 0,GDP,Population m
0,3,6
1,6,9
2,7,11
3,4,14


### Integer Indexing

In [27]:
a = pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
a

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [28]:
a[2
 'a']

SyntaxError: invalid syntax (<ipython-input-28-3a954256d1e7>, line 2)

In [29]:
a[-1]

5

In [30]:
a = pd.Series([1,2,3,4,5])
a

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [31]:
a[-1]

KeyError: -1

In [32]:
a.iloc[1]

2

In [33]:
a.iloc[-1]

5

In [35]:
a = pd.DataFrame({'col1':[2,3,4],'col2':['a','s','d']})
a

Unnamed: 0,col1,col2
0,2,a
1,3,s
2,4,d


In [36]:
a.iloc[[-1],[-1]]

Unnamed: 0,col2
2,d


#### Apply Mehtod

In [42]:
a = pd.DataFrame({
    "Year":[2001,2002,2003,2004],
    "Country":["Pakistan","India","Bangladesh","Nepal"],
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
},index=[1,2,3,4])
a

Unnamed: 0,Year,Country,GDP,Population in (m)
1,2001,Pakistan,3,2
2,2002,India,5,12
3,2003,Bangladesh,4,2
4,2004,Nepal,4,1


In [72]:
f = lambda x: x.max()
f2 = lambda x: x+100

In [73]:
a.apply(f,axis='index')

GDP                   5
Population in (m)    12
dtype: int64

In [74]:
a = pd.DataFrame({
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
},index=[1,2,3,4])
a

Unnamed: 0,GDP,Population in (m)
1,3,2
2,5,12
3,4,2
4,4,1


In [75]:
a.applymap(f2)

Unnamed: 0,GDP,Population in (m)
1,103,102
2,105,112
3,104,102
4,104,101


#### Sorting and Ranking

In [81]:
a = pd.DataFrame({
    "Year":[2001,2002,2003,2004],
    "Country":["Pakistan","India","Bangladesh","Nepal"],
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
},index=[4,1,2,3])
a

Unnamed: 0,Year,Country,GDP,Population in (m)
4,2001,Pakistan,3,2
1,2002,India,5,12
2,2003,Bangladesh,4,2
3,2004,Nepal,4,1


In [82]:
b = pd.Series([22,33,44,55],index=['c','d','a','b'])
b

c    22
d    33
a    44
b    55
dtype: int64

In [83]:
b.sort_index()

a    44
b    55
c    22
d    33
dtype: int64

In [84]:
a.sort_index()

Unnamed: 0,Year,Country,GDP,Population in (m)
1,2002,India,5,12
2,2003,Bangladesh,4,2
3,2004,Nepal,4,1
4,2001,Pakistan,3,2


In [85]:
b.sort_values()

c    22
d    33
a    44
b    55
dtype: int64

In [86]:
a.sort_values(by='Year')

Unnamed: 0,Year,Country,GDP,Population in (m)
4,2001,Pakistan,3,2
1,2002,India,5,12
2,2003,Bangladesh,4,2
3,2004,Nepal,4,1


In [95]:
a.sort_values(by='GDP',ascending=False)

Unnamed: 0,Year,Country,GDP,Population in (m)
1,2002,India,5,12
2,2003,Bangladesh,4,2
3,2004,Nepal,4,1
4,2001,Pakistan,3,2


In [96]:
a.sort_values(by=['GDP','Population in (m)'])

Unnamed: 0,Year,Country,GDP,Population in (m)
4,2001,Pakistan,3,2
3,2004,Nepal,4,1
2,2003,Bangladesh,4,2
1,2002,India,5,12


In [97]:
a.sort_values(by=['Population in (m)','GDP'])

Unnamed: 0,Year,Country,GDP,Population in (m)
3,2004,Nepal,4,1
4,2001,Pakistan,3,2
2,2003,Bangladesh,4,2
1,2002,India,5,12


In [98]:
b

c    22
d    33
a    44
b    55
dtype: int64

In [102]:
b.rank(method='dense')

c    1.0
d    2.0
a    3.0
b    4.0
dtype: float64

In [103]:
a

Unnamed: 0,Year,Country,GDP,Population in (m)
4,2001,Pakistan,3,2
1,2002,India,5,12
2,2003,Bangladesh,4,2
3,2004,Nepal,4,1


In [107]:
a.rank(axis=0)

Unnamed: 0,Year,Country,GDP,Population in (m)
4,1.0,4.0,1.0,2.5
1,2.0,2.0,4.0,4.0
2,3.0,1.0,2.5,2.5
3,4.0,3.0,2.5,1.0


In [112]:
b.rank(method='first')

c    1.0
d    2.0
a    3.0
b    4.0
dtype: float64

### Stats Mehtods

In [3]:
a = pd.DataFrame({
    "Year":[2001,2002,2003,2004],
    "Country":["Pakistan","India","Bangladesh","Nepal"],
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
},index=[4,1,2,3])
a

Unnamed: 0,Year,Country,GDP,Population in (m)
4,2001,Pakistan,3,2
1,2002,India,5,12
2,2003,Bangladesh,4,2
3,2004,Nepal,4,1


In [9]:
b = pd.DataFrame({
    "Year":[2001,2002,2003,2004],
    "GDP":[3,5,4,4],
    "Population in (m)":[2,12,2,1]
},index=[4,1,2,3])
b

Unnamed: 0,Year,GDP,Population in (m)
4,2001,3,2
1,2002,5,12
2,2003,4,2
3,2004,4,1


In [4]:
a.sum()

Year                                         8010
Country              PakistanIndiaBangladeshNepal
GDP                                            16
Population in (m)                              17
dtype: object

In [5]:
a.sum(axis=1)

4    2006
1    2019
2    2009
3    2009
dtype: int64

In [6]:
a.mean()

Year                 2002.50
GDP                     4.00
Population in (m)       4.25
dtype: float64

In [11]:
b.max()

Year                 2004
GDP                     5
Population in (m)      12
dtype: int64

In [12]:
b.idxmax(axis=1)

4    Year
1    Year
2    Year
3    Year
dtype: object

In [13]:
b.idxmax(axis=0)

Year                 3
GDP                  1
Population in (m)    1
dtype: int64

In [14]:
a.cumsum()

Unnamed: 0,Year,Country,GDP,Population in (m)
4,2001,Pakistan,3,2
1,4003,PakistanIndia,8,14
2,6006,PakistanIndiaBangladesh,12,16
3,8010,PakistanIndiaBangladeshNepal,16,17


In [15]:
a.describe()

Unnamed: 0,Year,GDP,Population in (m)
count,4.0,4.0,4.0
mean,2002.5,4.0,4.25
std,1.290994,0.816497,5.188127
min,2001.0,3.0,1.0
25%,2001.75,3.75,1.75
50%,2002.5,4.0,2.0
75%,2003.25,4.25,4.5
max,2004.0,5.0,12.0


### data from DataReader

In [28]:
from pandas_datareader import wb

In [29]:
matches = wb.search('gdp.*capita.*const')

In [40]:
dat = wb.download(indicator='NY.GDP.PCAP.KD', country=['US', 'CA', 'MX'], start=2005, end=2010)

In [41]:
dat

Unnamed: 0_level_0,Unnamed: 1_level_0,NY.GDP.PCAP.KD
country,year,Unnamed: 2_level_1
Canada,2010,47450.31847
Canada,2009,46542.904868
Canada,2008,48497.560167
Canada,2007,48536.539413
Canada,2006,48014.931197
Canada,2005,47257.472197
Mexico,2010,9271.398233
Mexico,2009,8947.741474
Mexico,2008,9587.636339
Mexico,2007,9622.047957


### series Mehtods

In [44]:
a = pd.Series(['a','b','c','c','d','e','e'])
a

0    a
1    b
2    c
3    c
4    d
5    e
6    e
dtype: object

In [45]:
a.unique()

array(['a', 'b', 'c', 'd', 'e'], dtype=object)

In [48]:
a.value_counts()

c    2
e    2
d    1
a    1
b    1
dtype: int64

In [50]:
a.isin(['a','e'])

0     True
1    False
2    False
3    False
4    False
5     True
6     True
dtype: bool

# Chapter 6

## reading/writing data from Textfiles

### read_csv and read_table

In [52]:
import pandas as pd

In [57]:
file1 = pd.read_csv('csvFiles/csvfile1.csv')

In [58]:
file1

Unnamed: 0,Country,GDP,Population,Per Capita Income
0,Pakistan,4,25,500
1,China,9,100,1000
2,USA,8,89,1300
3,UK,7,46,1400
4,England,8,32,1200


In [61]:
file1 = pd.read_csv('csvFiles/csvfile1.csv',header=None)
file1

Unnamed: 0,0,1,2,3
0,Country,GDP,Population,Per Capita Income
1,Pakistan,4,25,500
2,China,9,100,1000
3,USA,8,89,1300
4,UK,7,46,1400
5,England,8,32,1200


In [65]:
file1 = pd.read_csv('csvFiles/csvfile1.csv',names=['col1','col2','col3','col4','col5'])
file1

Unnamed: 0,col1,col2,col3,col4,col5
0,Country,GDP,Population,Per Capita Income,
1,Pakistan,4,25,500,
2,China,9,100,1000,
3,USA,8,89,1300,
4,UK,7,46,1400,
5,England,8,32,1200,


In [68]:
file1 = pd.read_csv('csvFiles/csvfile1.csv',index_col='Country')
file1

Unnamed: 0_level_0,GDP,Population,Per Capita Income
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Pakistan,4,25,500
China,9,100,1000
USA,8,89,1300
UK,7,46,1400
England,8,32,1200


In [73]:
file1.loc[['UK'],['GDP']]

Unnamed: 0_level_0,GDP
Country,Unnamed: 1_level_1
UK,7


In [80]:
file1 = pd.read_csv('csvFiles/csvfile2.csv',nrows=10)
file1

Unnamed: 0,Country,GDP,Population,Per Capita Income
0,Pakistan,4,25,500
1,China,9,100,1000
2,USA,8,89,1300
3,UK,7,46,1400
4,England,8,32,1200
5,Pakistan,4,25,500
6,China,9,100,1000
7,USA,8,89,1300
8,UK,7,46,1400
9,England,8,32,1200


In [81]:
file1 = pd.read_csv('csvFiles/csvfile2.csv',chunksize=10)
file1

<pandas.io.parsers.TextFileReader at 0x7f0321394c90>

In [82]:
for a in file1:
    print('this',a)

this     Country  GDP  Population  Per Capita Income
0  Pakistan    4          25                500
1     China    9         100               1000
2       USA    8          89               1300
3        UK    7          46               1400
4   England    8          32               1200
5  Pakistan    4          25                500
6     China    9         100               1000
7       USA    8          89               1300
8        UK    7          46               1400
9   England    8          32               1200
this      Country  GDP  Population  Per Capita Income
10  Pakistan    4          25                500
11     China    9         100               1000
12       USA    8          89               1300
13        UK    7          46               1400
14   England    8          32               1200
15  Pakistan    4          25                500
16     China    9         100               1000
17       USA    8          89               1300
18        UK    7    

In [87]:
file1 = pd.read_csv('csvFiles/csvfile1.csv',index_col='Country',skiprows=[5,1])
file1

Unnamed: 0_level_0,GDP,Population,Per Capita Income
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
China,9,100,1000
USA,8,89,1300
UK,7,46,1400
England,8,32,1200
