# Project - Play with DataFrames

## Goal of Project
- Master pandas DataFrame

### Step 1: Import pandas
- Execute the cell below (SHIFT + ENTER)

In [1]:
import pandas as pd

### Step 2: Read the data

In [2]:
data = pd.read_csv('files/population.csv')

In [6]:
data

Unnamed: 0,Country,Year,Population
0,Denmark,2000,5.3
1,Denmark,2010,5.5
2,Denmark,2020,5.8
3,Sweden,2000,8.8
4,Sweden,2010,9.3
5,Sweden,2020,10.2


### Step 3: Investigate the data types

In [5]:
data.dtypes

Country        object
Year            int64
Population    float64
dtype: object

### Step 4: Convert Year to Datetime
- ```format='%Y'```: Format of input, year.

In [10]:
data['Year'] = pd.to_datetime(data['Year'], format='%Y')

In [11]:
data

Unnamed: 0,Country,Year,Population
0,Denmark,2000-01-01,5.3
1,Denmark,2010-01-01,5.5
2,Denmark,2020-01-01,5.8
3,Sweden,2000-01-01,8.8
4,Sweden,2010-01-01,9.3
5,Sweden,2020-01-01,10.2


In [12]:
data.dtypes

Country               object
Year          datetime64[ns]
Population           float64
dtype: object

### Step 5: Scale Population to millions

In [13]:
data['Population'] = data['Population']*1000000

In [14]:
data

Unnamed: 0,Country,Year,Population
0,Denmark,2000-01-01,5300000.0
1,Denmark,2010-01-01,5500000.0
2,Denmark,2020-01-01,5800000.0
3,Sweden,2000-01-01,8800000.0
4,Sweden,2010-01-01,9300000.0
5,Sweden,2020-01-01,10200000.0


### Step 6: Calculate mean population for each country

In [17]:
data.groupby('Country').mean()

Unnamed: 0_level_0,Population
Country,Unnamed: 1_level_1
Denmark,5533333.0
Sweden,9433333.0


### Step 7: Shorten the country names

In [18]:
data['Country']=data['Country'].str.replace('Denmark', 'DNK')

In [19]:
data

Unnamed: 0,Country,Year,Population
0,DNK,2000-01-01,5300000.0
1,DNK,2010-01-01,5500000.0
2,DNK,2020-01-01,5800000.0
3,Sweden,2000-01-01,8800000.0
4,Sweden,2010-01-01,9300000.0
5,Sweden,2020-01-01,10200000.0


In [20]:
data['Country']=data['Country'].str.replace('Sweden', 'SWE')

In [21]:
data

Unnamed: 0,Country,Year,Population
0,DNK,2000-01-01,5300000.0
1,DNK,2010-01-01,5500000.0
2,DNK,2020-01-01,5800000.0
3,SWE,2000-01-01,8800000.0
4,SWE,2010-01-01,9300000.0
5,SWE,2020-01-01,10200000.0
