## Import dependencies

In [41]:
import os

import pandas as pd
import numpy as np

from flask import Flask, render_template, jsonify, request
from flask_sqlalchemy import SQLAlchemy

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine

import sqlite3

## Database setup

In [42]:
conn = sqlite3.connect("Resources/gdp_olympic.sqlite")
cursor = conn.cursor()

## Mismatched country codes

#### Load tables

In [43]:
# Load in wdi table
wdi = pd.read_sql_query("SELECT * FROM wdi", conn)

In [44]:
# Load in winter medal winners table
winter_medals = pd.read_sql_query("SELECT * FROM winter", conn)

#### Create relevant lists and explore

In [45]:
# Define list of years where winter games were held and which appear in WDI table
years = [1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988, 1992, 1994, 1998, 2002, 2006, 2010, 2014]

In [46]:
# Make list of number of medals per year of winter games based on winter_medals table

num_winter_medals = []

for year in years:
    num_winter_medals.append(len(winter_medals[winter_medals.year==year]))

num_winter_medals

[147, 185, 199, 200, 210, 218, 222, 264, 325, 343, 447, 481, 531, 529, 612]

In [47]:
# Define countries1 to be unique list of countries based on wdi table
countries1 = wdi.country_code.unique()

In [48]:
# Show there are 263 such countries
len(countries1)

263

In [49]:
# Define countries2 to be unique list of countries based on winter_medals table
countries2 = winter_medals.country_code.unique()

In [50]:
# Show there are 45 such countries
len(countries2)

45

In [51]:
# Find which countries appear in countries2 BUT NOT countries1. This mismatch is messing with our counts.
problematic_countries = []

for country in countries2:
    if country not in countries1:
        problematic_countries.append(country)

problematic_countries

['SUI',
 'GER',
 'TCH',
 'FRG',
 'NED',
 'URS',
 'EUA',
 'GDR',
 'BUL',
 'YUG',
 'EUN',
 'SLO',
 'DEN',
 'CRO',
 'LAT']

#### Map values 

In [52]:
# Create a dictionary that maps the country codes in winter_medals table which are not equivalent strings but 
# represent equivalent countries to be represented by one unique code.

medals_country_code_dict = {
    'country_code': {
        'TCH': 'CZE', # both represent Czech Republic
        'FRG': 'GER', # FRG represents West Germany, which for our purposes we will track as just Germany
        'GDR': 'GER', # GDR represents East Germany, which for our purposes we will track as just Germany
        'URS': 'RUS', # URS represents Soviet Union, which for our purposes we will track as just Russia
        'EUA': 'USA', # EUA represents America, which for our purposes we will track as just USA
        'EUN': 'RUS', # EUN represents Soviet Union at 1992 games, which for our purposes we will track as just Russia
    }
}

In [53]:
# Create a dictionary that maps the country codes in wdi table to match those coming from winter_medals table.

wdi_country_code_dict = {
    'country_code': {
       'SWZ': 'SUI', # both represent Switzerland
       'DEU': 'GER', # both represent Germany
       'NLD': 'NED', # both represent Netherlands
       'BGR': 'BUL', # both represent Bulgaria
       'SRB': 'YUG', # SRB represents Serbia and YUG represents Serbia and Montenegro - choosing just Serbia 
       'SVN': 'SLO', # both represent Slovania
       'DNK': 'DEN', # both represent Denmark
       'HRV': 'CRO', # both represent Croatia
       'LVA': 'LAT' # both represent Latvia
    }
}

In [56]:
# Update the data in wdi table based on this dictionary mapping
winter_medals = winter_medals.replace(medals_country_code_dict)
wdi = wdi.replace(wdi_country_code_dict)

In [58]:
# Push the changes to csv and subsequently the sqlite database which calls this csv
wdi.to_csv("Resources/wdi.csv", index=False)
winter_medals.to_csv("Resources/winter.csv", index=False)