In [1]:
import pandas as pd
from sqlalchemy import create_engine
import sqlalchemy as sa
import os

In [2]:
connection_url = f"mysql+pymysql://{os.environ['TEST_DB_USERNAME']}:{os.environ['TEST_DB_PASSWORD']}@{os.environ['TEST_DB_HOSTNAME']}/{os.environ['TEST_DB_DATABASE_NAME']}"
db_engine = create_engine(connection_url)

try:
    with db_engine.connect() as connection:
        print("Connection to MySQL database successful!")
except Exception as e:
    print(f"Error: {e}")

Connection to MySQL database successful!


In [3]:
df = pd.read_sql("SELECT region FROM country_regions", con=db_engine)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   region  27 non-null     object
dtypes: object(1)
memory usage: 348.0+ bytes


In [4]:
df['region'].unique()

array(['Northern Europe', 'Southern Europe', 'Western Europe',
       'Central Europe', 'Eastren Europe', 'Southeastren Europe'],
      dtype=object)

In [5]:
region_id = {'Northern Europe' : 'NE', 
             'Southern Europe': 'SE', 
             'Western Europe': 'WE',
             'Central Europe': 'CE', 
             'Eastren Europe': 'EE', 
             'Southeastren Europe': 'SEE'
             }

df['region_id'] = df['region'].map(region_id)
df

Unnamed: 0,region,region_id
0,Northern Europe,NE
1,Northern Europe,NE
2,Northern Europe,NE
3,Northern Europe,NE
4,Northern Europe,NE
5,Northern Europe,NE
6,Northern Europe,NE
7,Southern Europe,SE
8,Southern Europe,SE
9,Southern Europe,SE


---------------------

In [9]:
df2 = pd.read_sql("SELECT country, region FROM country_regions", con=db_engine)
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   country  27 non-null     object
 1   region   27 non-null     object
dtypes: object(2)
memory usage: 564.0+ bytes


In [10]:
country_codes = {
    "Latvia": "LV",
    "Malta": "MT",
    "Cyprus": "CY",
    "Denmark": "DK",
    "Greece": "GR",
    "Italy": "IT",
    "Belgium": "BE",
    "Czech Republic": "CZ",
    "Croatia": "HR",
    "Sweden": "SE",
    "Estonia": "EE",
    "Germany": "DE",
    "Finland": "FI",
    "Lithuania": "LT",
    "Spain": "ES",
    "Luxembourg": "LU",
    "Bulgaria": "BG",
    "Poland": "PL",
    "Romania": "RO",
    "Austria": "AT",
    "Slovakia": "SK",
    "Netherlands": "NL",
    "Ireland": "IE",
    "France": "FR",
    "Hungary": "HU",
    "Portugal": "PT",
    "Slovenia": "SI"
}

df2['CountryCode'] = df2['country'].map(country_codes)
df2

Unnamed: 0,country,region,CountryCode
0,Denmark,Northern Europe,DK
1,Sweden,Northern Europe,SE
2,Estonia,Northern Europe,EE
3,Finland,Northern Europe,FI
4,Ireland,Northern Europe,IE
5,Lithuania,Northern Europe,LT
6,Latvia,Northern Europe,LV
7,Malta,Southern Europe,MT
8,Cyprus,Southern Europe,CY
9,Greece,Southern Europe,GR


-------------------------


In [33]:
df3 = pd.read_sql("SELECT * FROM country_income", con=db_engine)
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 6 columns):
 #   Column                              Non-Null Count  Dtype 
---  ------                              --------------  ----- 
 0   Unnamed: 0                          104 non-null    int64 
 1   country                             104 non-null    object
 2   region                              104 non-null    object
 3   Code                                104 non-null    object
 4   Year                                104 non-null    int64 
 5   World Bank's income classification  104 non-null    object
dtypes: int64(2), object(4)
memory usage: 5.0+ KB


In [34]:
df3 = df3[['country', 'Year', 'World Bank\'s income classification']]

In [35]:
country_codes = {
    "Latvia": "LV",
    "Malta": "MT",
    "Cyprus": "CY",
    "Denmark": "DK",
    "Greece": "GR",
    "Italy": "IT",
    "Belgium": "BE",
    "Czech Republic": "CZ",
    "Croatia": "HR",
    "Sweden": "SE",
    "Estonia": "EE",
    "Germany": "DE",
    "Finland": "FI",
    "Lithuania": "LT",
    "Spain": "ES",
    "Luxembourg": "LU",
    "Bulgaria": "BG",
    "Poland": "PL",
    "Romania": "RO",
    "Austria": "AT",
    "Slovakia": "SK",
    "Netherlands": "NL",
    "Ireland": "IE",
    "France": "FR",
    "Hungary": "HU",
    "Portugal": "PT",
    "Slovenia": "SI"
}

df3['country_code'] = df3['country'].map(country_codes)
df3

Unnamed: 0,country,Year,World Bank's income classification,country_code
0,Denmark,2014,High-income countries,DK
1,Sweden,2014,High-income countries,SE
2,Estonia,2014,High-income countries,EE
3,Finland,2014,High-income countries,FI
4,Ireland,2014,High-income countries,IE
...,...,...,...,...
99,Hungary,2017,High-income countries,HU
100,Bulgaria,2017,Upper-middle-income countries,BG
101,Romania,2017,Upper-middle-income countries,RO
102,Croatia,2017,High-income countries,HR


In [37]:
income_classification = {
    'Low Income': 'LIC',
    'Lower Middle Income': 'LMC',
    'Upper Middle Income': 'UMC',
    'High Income': 'HIC'
}

income_ranges = {
    'LIC': '≤ $1,085',
    'LMC': '$1,086 - $4,255',
    'UMC': '$4,256 - $13,205',
    'HIC': '≥ $13,206'
}

classification_name_fix = {
    'High-income countries'	: 'High Income',
    'Low-income countries': 'Low Income',
    'Lower-middle-income countries': 'Lower Middle Income',
    'Upper-middle-income countries': 'Upper Middle Income',
}

df3['classification'] = df3['World Bank\'s income classification'].map(classification_name_fix)

df3['classification_id'] = df3['classification'].map(income_classification)

df3['income_range'] = df3['classification_id'].map(income_ranges)

df3 = df3.drop(['World Bank\'s income classification'], axis=1)

df3

Unnamed: 0,country,Year,country_code,classification,classification_id,income_range
0,Denmark,2014,DK,High Income,HIC,"≥ $13,206"
1,Sweden,2014,SE,High Income,HIC,"≥ $13,206"
2,Estonia,2014,EE,High Income,HIC,"≥ $13,206"
3,Finland,2014,FI,High Income,HIC,"≥ $13,206"
4,Ireland,2014,IE,High Income,HIC,"≥ $13,206"
...,...,...,...,...,...,...
99,Hungary,2017,HU,High Income,HIC,"≥ $13,206"
100,Bulgaria,2017,BG,Upper Middle Income,UMC,"$4,256 - $13,205"
101,Romania,2017,RO,Upper Middle Income,UMC,"$4,256 - $13,205"
102,Croatia,2017,HR,High Income,HIC,"≥ $13,206"
