# Pandas for Data Analysis

#### Tutorial from https://www.youtube.com/watch?v=oGzU688xCUs

In [2]:
import pandas as pd

In [3]:
pd.__version__

'0.23.4'

Open Files by using **read_csv** function with a **delimeter** parameter

In [4]:
pd.read_csv('data/gapminder.tsv', delimiter='\t')

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.853030
2,Afghanistan,Asia,1962,31.997,10267083,853.100710
3,Afghanistan,Asia,1967,34.020,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106
5,Afghanistan,Asia,1977,38.438,14880372,786.113360
6,Afghanistan,Asia,1982,39.854,12881816,978.011439
7,Afghanistan,Asia,1987,40.822,13867957,852.395945
8,Afghanistan,Asia,1992,41.674,16317921,649.341395
9,Afghanistan,Asia,1997,41.763,22227415,635.341351


Save the content in a variable

In [5]:
df = pd.read_csv('data/gapminder.tsv', delimiter='\t')

In [6]:
# Show the first top lines
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [8]:
type(df)

pandas.core.frame.DataFrame

In [9]:
# Shape (Rows and Columns)
df.shape

(1704, 6)

In [13]:
# Information of the structure of your data,
# e.g number of rows and columns, column names, memory usage etc... 
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 6 columns):
country      1704 non-null object
continent    1704 non-null object
year         1704 non-null int64
lifeExp      1704 non-null float64
pop          1704 non-null int64
gdpPercap    1704 non-null float64
dtypes: float64(2), int64(2), object(2)
memory usage: 80.0+ KB


In [14]:
df['country'].head()

0    Afghanistan
1    Afghanistan
2    Afghanistan
3    Afghanistan
4    Afghanistan
Name: country, dtype: object

In [17]:
# or df.country
country = df['country']

In [18]:
df[['country', 'year', 'pop']].head()

Unnamed: 0,country,year,pop
0,Afghanistan,1952,8425333
1,Afghanistan,1957,9240934
2,Afghanistan,1962,10267083
3,Afghanistan,1967,11537966
4,Afghanistan,1972,13079460


In [19]:
df.columns

Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap'], dtype='object')

In [20]:
# Delete column 
del df['country']

In [22]:
# Another way to delete
# axis=1 to specify we dropping columns, not rows
df = df.drop('continent', axis=1)

In [23]:
df.columns

Index(['year', 'lifeExp', 'pop', 'gdpPercap'], dtype='object')

Lets Reload the DATASET

In [25]:
df = pd.read_csv('data/gapminder.tsv', delimiter='\t')

Getting Row data

In [27]:
# loc for row labels
df.loc[0]

country      Afghanistan
continent           Asia
year                1952
lifeExp           28.801
pop              8425333
gdpPercap        779.445
Name: 0, dtype: object

In [30]:
df.loc[[0,99,999], ['country', 'lifeExp', 'gdpPercap']]

Unnamed: 0,country,lifeExp,gdpPercap
0,Afghanistan,28.801,779.445314
99,Bangladesh,43.453,721.186086
999,Mongolia,51.253,1226.04113


In [28]:
# iloc for indexes
df.iloc[0]

country      Afghanistan
continent           Asia
year                1952
lifeExp           28.801
pop              8425333
gdpPercap        779.445
Name: 0, dtype: object

In [32]:
df.iloc[[0,99,999], [0, 3, 5]]

Unnamed: 0,country,lifeExp,gdpPercap
0,Afghanistan,28.801,779.445314
99,Bangladesh,43.453,721.186086
999,Mongolia,51.253,1226.04113


In [29]:
# since its indexes, we can do backwards indexing.
df.iloc[-1]

country      Zimbabwe
continent      Africa
year             2007
lifeExp        43.487
pop          12311143
gdpPercap     469.709
Name: 1703, dtype: object

Mean

In [33]:
le_mean = df['lifeExp'].mean()

In [34]:
le_mean

59.47443936619713

Accepts conditions as well

In [35]:
df.loc[df['lifeExp'] > le_mean, :]

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
14,Albania,Europe,1962,64.820,1728137,2312.888958
15,Albania,Europe,1967,66.220,1984060,2760.196931
16,Albania,Europe,1972,67.690,2263554,3313.422188
17,Albania,Europe,1977,68.930,2509048,3533.003910
18,Albania,Europe,1982,70.420,2780097,3630.880722
19,Albania,Europe,1987,72.000,3075321,3738.932735
20,Albania,Europe,1992,71.581,3326498,2497.437901
21,Albania,Europe,1997,72.950,3428038,3193.054604
22,Albania,Europe,2002,75.651,3508512,4604.211737
23,Albania,Europe,2007,76.423,3600523,5937.029526
