In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import requests

In [2]:
# Import csv file, GDP by city, year
# the csv file is downloaded from https://stats.oecd.org/Index.aspx?DataSetCode=CITIES
GDP_file="Resources/CITIES_02022019170650160.csv"
GDP_df = pd.read_csv(GDP_file, encoding="ISO-8859-1")
GDP_df.head()

Unnamed: 0,METRO_ID,Metropolitan areas,VAR,Variables,TIME,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2001,2001,USD,US Dollar,0,Units,,,12526.0,,
1,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2002,2002,USD,US Dollar,0,Units,,,12970.0,,
2,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2003,2003,USD,US Dollar,0,Units,,,13390.0,,
3,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2004,2004,USD,US Dollar,0,Units,,,14022.0,,
4,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2005,2005,USD,US Dollar,0,Units,,,14209.0,,


In [3]:
# filter in year=2015 and 2016
GDP_df=GDP_df.loc[GDP_df['Year'] >= 2015]
GDP_df.head()

Unnamed: 0,METRO_ID,Metropolitan areas,VAR,Variables,TIME,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
14,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2015,2015,USD,US Dollar,0,Units,,,19379.0,,
15,USA145,Nashville,GDP_REAL_PPP,"GDP (Millions USD, constant prices, constant P...",2016,2016,USD,US Dollar,0,Units,,,20452.0,,
30,UK560,Oxford,GDP_PC_SH_NAT,Share GDP per capita in Metropolitan area over...,2015,2015,PC,Percentage,0,Units,,,123.7,,
31,UK560,Oxford,GDP_PC_SH_NAT,Share GDP per capita in Metropolitan area over...,2016,2016,PC,Percentage,0,Units,,,123.4,,
46,UK569,Ipswich,GDP_SH_NAT,Share of GDP in Metropolitan area over the nat...,2015,2015,PC,Percentage,0,Units,,,0.5,,


In [4]:
# Create total GDP dataframe
Total_GDP_df=GDP_df.loc[GDP_df['Variables'] == "GDP (Millions USD, constant prices, constant PPP, base year 2010)"]
Total_GDP_df = Total_GDP_df[['METRO_ID', 'Metropolitan areas','Year','Value']].copy()
GDP_average = Total_GDP_df.groupby(['METRO_ID', 'Metropolitan areas']).mean()["Value"].rename("Total GDP")
Final_GDP_df = pd.DataFrame({"Total GDP(millions)": GDP_average})
Final_GDP_df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Total GDP(millions)
METRO_ID,Metropolitan areas,Unnamed: 2_level_1
AT001,Vienna,130700.0
AT002,Graz,19246.0
AT003,Linz,29148.0
AT004,Salzburg,19432.0
AT005,Innsbruck,14331.0


In [5]:
# Create GDP per capita dataframe
Percapita_GDP_df=GDP_df.loc[GDP_df['Variables'] == "GDP per capita (USD, constant prices, constant PPP, base year 2010)"]
Percapita_GDP_df = Percapita_GDP_df[['METRO_ID', 'Metropolitan areas','Year','Value']].copy()
GDP_average_2 = Percapita_GDP_df.groupby(['METRO_ID', 'Metropolitan areas']).mean()["Value"].rename("Percapita GDP")
Final_Percapita_GDP_df = pd.DataFrame({"Percapita GDP": GDP_average_2})
Final_Percapita_GDP_df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Percapita GDP
METRO_ID,Metropolitan areas,Unnamed: 2_level_1
AT001,Vienna,46787.0
AT002,Graz,45376.0
AT003,Linz,46498.0
AT004,Salzburg,55594.0
AT005,Innsbruck,46502.0


In [6]:
# Merge Total GDP and GDP per capita

merge_table = pd.merge(Final_GDP_df, Final_Percapita_GDP_df, on=['METRO_ID', 'Metropolitan areas'], how="inner")
merge_table.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total GDP(millions),Percapita GDP
METRO_ID,Metropolitan areas,Unnamed: 2_level_1,Unnamed: 3_level_1
AT001,Vienna,130700.0,46787.0
AT002,Graz,19246.0,45376.0
AT003,Linz,29148.0,46498.0
AT004,Salzburg,19432.0,55594.0
AT005,Innsbruck,14331.0,46502.0


In [7]:
# Export to csv
merge_table.to_csv("Output/GDP by city.csv", index=True, header=True)