In [27]:
# Resources
import numpy as np
import sqlite3
import pandas as pd

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy 
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import extract
from sqlalchemy import select
from sqlalchemy.orm import join
from sqlalchemy.sql import func
from sqlalchemy import or_
from sqlalchemy import inspect

In [28]:
##################################
# Database Setup
##################################

# https://docs.sqlalchemy.org/en/14/core/engines.html#sqlite
# Module 9 Challenge
engine = create_engine(r'sqlite:///../../data/LIFE_EXPECTANCY_DB.db')

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save references to each table
Happiness = Base.classes.HAPPINESS_TABLE
Expectancy = Base.classes.EXPECTANCY_TABLE

In [29]:
# Tables in database
Base.classes.keys()

['EXPECTANCY_TABLE', 'HAPPINESS_TABLE']

In [30]:
# Create our session (link) from Python to the DB
session = Session(engine)

In [31]:
inst = inspect(Expectancy)
Expectancy_columns = [c_attr.key for c_attr in inst.mapper.column_attrs]

In [32]:
# Columns of EXPECTANCY_TABLE
print(Expectancy_columns)

['ID', 'COUNTRY', 'YEAR', 'STATUS', 'EXPECTANCY', 'MORTALITY', 'INFANT_DEATH', 'ALCOHOL', 'EXPENDITURE_PERCENT', 'HEPATITUS_B', 'MEASLES', 'BMI', 'UNDER_FIVE_DEATH', 'POLIIO', 'EXPENDITURE_TOTAL', 'DIPHTHERIA', 'HIV_AIDS', 'GDP', 'POPULATION', 'THIN_1TO19_YR', 'THIN_5TO9_YR', 'INC_COMPOSITION', 'SCHOOLING']


In [33]:
inst = inspect(Happiness)
Happiness_columns = [c_attr.key for c_attr in inst.mapper.column_attrs]

In [34]:
# Columns of HAPPINESS_TABLE
print(Happiness_columns)

['ID', 'COUNTRY', 'REGION', 'HAPPINESS_RANK', 'HAPPINESS_SCORE', 'STANDARD_ERROR', 'ECONOMY', 'FAMILY', 'HEALTH', 'FREEDOM', 'TRUST', 'GENEROSITY', 'DYSTOPIA']


In [35]:
expect_results = []

# Query contents of EXPECTANCY_TABLE to list
expect_results = session.query(
    Expectancy.ID,
    Expectancy.COUNTRY,
    Expectancy.YEAR,
    Expectancy.STATUS,
    Expectancy.EXPECTANCY,
    Expectancy.MORTALITY,
    Expectancy.INFANT_DEATH,
    Expectancy.ALCOHOL,
    Expectancy.EXPENDITURE_PERCENT,
    Expectancy.HEPATITUS_B,
    Expectancy.MEASLES,
    Expectancy.BMI,
    Expectancy.UNDER_FIVE_DEATH,
    Expectancy.POLIIO,
    Expectancy.EXPENDITURE_TOTAL,
    Expectancy.DIPHTHERIA,
    Expectancy.HIV_AIDS,
    Expectancy.GDP,
    Expectancy.POPULATION,
    Expectancy.THIN_1TO19_YR,
    Expectancy.THIN_5TO9_YR,
    Expectancy.INC_COMPOSITION,
    Expectancy.SCHOOLING
    ).all()

#for temp_result in expect_results:
#    print(temp_result)

In [36]:
# Convert Expectancy list to dataframe
expect_df = pd.DataFrame(expect_results, columns=Expectancy_columns)
print(expect_df)

        ID      COUNTRY  YEAR      STATUS EXPECTANCY MORTALITY  INFANT_DEATH  \
0        1  Afghanistan  2015  Developing         65       263          62.0   
1        2  Afghanistan  2014  Developing       59.9       271          64.0   
2        3  Afghanistan  2013  Developing       59.9       268          66.0   
3        4  Afghanistan  2012  Developing       59.5       272          69.0   
4        5  Afghanistan  2011  Developing       59.2       275          71.0   
...    ...          ...   ...         ...        ...       ...           ...   
2933  2934     Zimbabwe  2004  Developing       44.3       723          27.0   
2934  2935     Zimbabwe  2003  Developing       44.5       715          26.0   
2935  2936     Zimbabwe  2002  Developing       44.8        73          25.0   
2936  2937     Zimbabwe  2001  Developing       45.3       686          25.0   
2937  2938     Zimbabwe  2000  Developing         46       665          24.0   

     ALCOHOL  EXPENDITURE_PERCENT HEPAT

In [37]:
happy_results = []

# Query contents of HAPPINESS_TABLE to list
happy_results = session.query(
    Happiness.ID,
    Happiness.COUNTRY,
    Happiness.REGION,
    Happiness.HAPPINESS_RANK,
    Happiness.HAPPINESS_SCORE,
    Happiness.STANDARD_ERROR,
    Happiness.ECONOMY,
    Happiness.FAMILY,
    Happiness.HEALTH,
    Happiness.FREEDOM,
    Happiness.TRUST,
    Happiness.GENEROSITY,
    Happiness.DYSTOPIA
    ).all()

#for temp_result in happy_results:
#    print(temp_result)

In [38]:
# Convert Happiness list to dataframe
happy_df = pd.DataFrame(happy_results, columns=Happiness_columns)
print(happy_df)

      ID      COUNTRY                           REGION  HAPPINESS_RANK  \
0      1  Switzerland                   Western Europe               1   
1      2      Iceland                   Western Europe               2   
2      3      Denmark                   Western Europe               3   
3      4       Norway                   Western Europe               4   
4      5       Canada                    North America               5   
..   ...          ...                              ...             ...   
153  154       Rwanda               Sub-Saharan Africa             154   
154  155        Benin               Sub-Saharan Africa             155   
155  156        Syria  Middle East and Northern Africa             156   
156  157      Burundi               Sub-Saharan Africa             157   
157  158         Togo               Sub-Saharan Africa             158   

     HAPPINESS_SCORE  STANDARD_ERROR  ECONOMY   FAMILY   HEALTH  FREEDOM  \
0              7.587         0.0341

In [39]:
# INNER JOIN between Expectancy & Happiness data with filter on Expectancy data to only view 2015 year.

happy_expect_join = []

happy_expect_join_columns = Expectancy_columns + Happiness_columns

happy_expect_join = session.query(
    Expectancy.ID.label("E_ID"),
    Expectancy.COUNTRY,
    Expectancy.YEAR,
    Expectancy.STATUS,
    Expectancy.EXPECTANCY,
    Expectancy.MORTALITY,
    Expectancy.INFANT_DEATH,
    Expectancy.ALCOHOL,
    Expectancy.EXPENDITURE_PERCENT,
    Expectancy.HEPATITUS_B,
    Expectancy.MEASLES,
    Expectancy.BMI,
    Expectancy.UNDER_FIVE_DEATH,
    Expectancy.POLIIO,
    Expectancy.EXPENDITURE_TOTAL,
    Expectancy.DIPHTHERIA,
    Expectancy.HIV_AIDS,
    Expectancy.GDP,
    Expectancy.POPULATION,
    Expectancy.THIN_1TO19_YR,
    Expectancy.THIN_5TO9_YR,
    Expectancy.INC_COMPOSITION,
    Expectancy.SCHOOLING,
    Happiness.ID.label('H_ID'),
    Happiness.COUNTRY,
    Happiness.REGION,
    Happiness.HAPPINESS_RANK,
    Happiness.HAPPINESS_SCORE,
    Happiness.STANDARD_ERROR,
    Happiness.ECONOMY,
    Happiness.FAMILY,
    Happiness.HEALTH,
    Happiness.FREEDOM,
    Happiness.TRUST,
    Happiness.GENEROSITY,
    Happiness.DYSTOPIA
).join(Happiness, func.lower(Expectancy.COUNTRY) == func.lower(Happiness.COUNTRY)
).filter(Expectancy.YEAR == '2015').all()

happy_expect_join_df = pd.DataFrame(happy_expect_join, columns=happy_expect_join_columns)

print(happy_expect_join_df)

       ID      COUNTRY  YEAR      STATUS  EXPECTANCY  MORTALITY  INFANT_DEATH  \
0    2522  Switzerland  2015   Developed        83.4       49.0           0.0   
1    1171      Iceland  2015   Developed        82.7       49.0           0.0   
2     738      Denmark  2015   Developed        86.0       71.0           0.0   
3    1911       Norway  2015   Developed        81.8       59.0           0.0   
4     497       Canada  2015  Developing        82.2       64.0           2.0   
..    ...          ...   ...         ...         ...        ...           ...   
130     1  Afghanistan  2015  Developing        65.0      263.0          62.0   
131  2152       Rwanda  2015  Developing        66.1      227.0          11.0   
132   273        Benin  2015  Developing        60.0      249.0          25.0   
133   417      Burundi  2015  Developing        59.6      288.0          21.0   
134  2618         Togo  2015  Developing        59.9      287.0          13.0   

    ALCOHOL  EXPENDITURE_PE

In [40]:
# Columns of the joined data between Expectancy & Happiness

for col in happy_expect_join_df.columns: 
    print(col) 

ID
COUNTRY
YEAR
STATUS
EXPECTANCY
MORTALITY
INFANT_DEATH
ALCOHOL
EXPENDITURE_PERCENT
HEPATITUS_B
MEASLES
BMI
UNDER_FIVE_DEATH
POLIIO
EXPENDITURE_TOTAL
DIPHTHERIA
HIV_AIDS
GDP
POPULATION
THIN_1TO19_YR
THIN_5TO9_YR
INC_COMPOSITION
SCHOOLING
ID
COUNTRY
REGION
HAPPINESS_RANK
HAPPINESS_SCORE
STANDARD_ERROR
ECONOMY
FAMILY
HEALTH
FREEDOM
TRUST
GENEROSITY
DYSTOPIA


In [41]:
# Check for NULL values in Happy Dataframe
for column in happy_df.columns:
    print(f'Column {column} has {happy_df[column].isnull().sum()} null values')

Column ID has 0 null values
Column COUNTRY has 0 null values
Column REGION has 0 null values
Column HAPPINESS_RANK has 0 null values
Column HAPPINESS_SCORE has 0 null values
Column STANDARD_ERROR has 0 null values
Column ECONOMY has 0 null values
Column FAMILY has 0 null values
Column HEALTH has 0 null values
Column FREEDOM has 0 null values
Column TRUST has 0 null values
Column GENEROSITY has 0 null values
Column DYSTOPIA has 0 null values


In [42]:
# Check for NULL values in Expect Dataframe
for column in expect_df.columns:
    print(f'Column {column} has {expect_df[column].isnull().sum()} null values')

Column ID has 0 null values
Column COUNTRY has 0 null values
Column YEAR has 0 null values
Column STATUS has 0 null values
Column EXPECTANCY has 0 null values
Column MORTALITY has 0 null values
Column INFANT_DEATH has 0 null values
Column ALCOHOL has 0 null values
Column EXPENDITURE_PERCENT has 0 null values
Column HEPATITUS_B has 0 null values
Column MEASLES has 0 null values
Column BMI has 0 null values
Column UNDER_FIVE_DEATH has 0 null values
Column POLIIO has 0 null values
Column EXPENDITURE_TOTAL has 0 null values
Column DIPHTHERIA has 0 null values
Column HIV_AIDS has 0 null values
Column GDP has 0 null values
Column POPULATION has 0 null values
Column THIN_1TO19_YR has 0 null values
Column THIN_5TO9_YR has 0 null values
Column INC_COMPOSITION has 0 null values
Column SCHOOLING has 0 null values


In [24]:
# Close DB Session
session.close()