# Analysis of Health Impacts and Mortality Risk of Air Pollution in Different Countries 

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy import desc

In [2]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

In [3]:
# Connect to local database
DATABASE_URI = 'postgresql://postgres:1372@localhost:5432/air_qualities'
engine = create_engine(DATABASE_URI)

In [4]:
# reflect an existing database into a new model
Base = automap_base()

# reflect the tables
Base.prepare(autoload_with=engine)

In [5]:
# View all of the classes that automap found
Base.classes.keys()

['number_of_deaths_by_risk_factor',
 'countries_codes_and_coordinates',
 'ambient_air_quality_data',
 'death_rates_from_air_pollution',
 'disease_burden_by_risk_factor',
 'outdoor_air_death_rates_by_age',
 'aq_pollution_mortality_data']

In [6]:
# Save references to each table
Countries_codes_and_coordinates = Base.classes.countries_codes_and_coordinates
Ambient_air_quality_data = Base.classes.ambient_air_quality_data
Death_rates_from_air_pollution = Base.classes.death_rates_from_air_pollution
Disease_burden_by_risk_factor = Base.classes.disease_burden_by_risk_factor
Number_of_deaths_by_risk_factor = Base.classes.number_of_deaths_by_risk_factor
Outdoor_air_death_rates_by_age = Base.classes.outdoor_air_death_rates_by_age
Aq_pollution_mortality_data = Base.classes.aq_pollution_mortality_data


In [7]:
# Create our session (link) from Python to the DB
session = Session(engine)
session

<sqlalchemy.orm.session.Session at 0x23ac6ca08e0>

In [8]:
connection = engine.connect()

# Ambient Air Quality Data


In [11]:
CCC = pd.read_sql('select * from Countries_codes_and_coordinates', connection)
CCC

Unnamed: 0,coor_id,country,alpha_2_code,alpha_3_code,numeric_code,latitude,longitude
0,LL1,Afghanistan,AF,AFG,4,33.0000,65.0
1,LL2,Albania,AL,ALB,8,41.0000,20.0
2,LL3,Algeria,DZ,DZA,12,28.0000,3.0
3,LL4,American Samoa,AS,ASM,16,-14.3333,-170.0
4,LL5,Andorra,AD,AND,20,42.5000,1.6
...,...,...,...,...,...,...,...
240,LL252,Western Sahara,EH,ESH,732,24.5000,-13.0
241,LL253,Yemen,YE,YEM,887,15.0000,48.0
242,LL254,Zambia,ZM,ZMB,894,-15.0000,30.0
243,LL255,Zimbabwe,ZW,ZWE,716,-20.0000,30.0


In [15]:
country_informaiton=session.query(Countries_codes_and_coordinates.country,
                                  (Countries_codes_and_coordinates.latitude).label("lat"),
                                  (Countries_codes_and_coordinates.longitude).label("lng")
                                 ).distinct().all()
country_informaiton

[('Tonga', -20.0, -175.0),
 ('Azerbaijan', 40.5, 47.5),
 ('Mozambique', -18.25, 35.0),
 ('Kiribati', 1.4167, 173.0),
 ('Panama', 9.0, -80.0),
 ('French Polynesia', -15.0, -140.0),
 ('American Samoa', -14.3333, -170.0),
 ('Marshall Islands', 9.0, 168.0),
 ('Tanzania, United Republic of', -6.0, 35.0),
 ('Italy', 42.8333, 12.8333),
 ('San Marino', 43.7667, 12.4167),
 ('El Salvador', 13.8333, -88.9167),
 ('Antigua and Barbuda', 17.05, -61.8),
 ('Dominican Republic', 19.0, -70.6667),
 ('Saint Pierre and Miquelon', 46.8333, -56.3333),
 ('Jamaica', 18.25, -77.5),
 ('Egypt', 27.0, 30.0),
 ('Suriname', 4.0, -56.0),
 ('Bolivia', -17.0, -65.0),
 ('United States', 38.0, -97.0),
 ('French Southern Territories', -43.0, 67.0),
 ('Cook Islands', -21.2333, -159.7667),
 ('Ukraine', 49.0, 32.0),
 ('Jordan', 31.0, 36.0),
 ('Poland', 52.0, 20.0),
 ('Anguilla', 18.25, -63.1667),
 ('Madagascar', -20.0, 47.0),
 ('Armenia', 40.0, 45.0),
 ('Malta', 35.8333, 14.5833),
 ('India', 20.0, 77.0),
 ('Latvia', 57.0, 25

In [22]:
source={}
country_source=[]
country_name =[]
for row in country_informaiton:
    country = row.country
    lat = row.lat
    lng = row.lng
    
    source = {
        'country': country,
        'lat': lat,
        'lng': lng
    }
    country_source.append(source)
    country_name.append(country)
country = {
    'name': country_name,
    'metadata': country_source
}
country

{'name': ['Tonga',
  'Azerbaijan',
  'Mozambique',
  'Kiribati',
  'Panama',
  'French Polynesia',
  'American Samoa',
  'Marshall Islands',
  'Tanzania, United Republic of',
  'Italy',
  'San Marino',
  'El Salvador',
  'Antigua and Barbuda',
  'Dominican Republic',
  'Saint Pierre and Miquelon',
  'Jamaica',
  'Egypt',
  'Suriname',
  'Bolivia',
  'United States',
  'French Southern Territories',
  'Cook Islands',
  'Ukraine',
  'Jordan',
  'Poland',
  'Anguilla',
  'Madagascar',
  'Armenia',
  'Malta',
  'India',
  'Latvia',
  'Montserrat',
  'Eritrea',
  'Netherlands',
  'Ghana',
  'Morocco',
  'Bouvet Island',
  'Holy See (Vatican City State)',
  'Equatorial Guinea',
  'Luxembourg',
  'Liberia',
  'Cambodia',
  'Belize',
  'Hong Kong',
  'Guernsey',
  'Turkey',
  'Sri Lanka',
  'Burundi',
  'British Indian Ocean Territory',
  'Trinidad and Tobago',
  'Sweden',
  'Andorra',
  'South Korea',
  'Zimbabwe',
  'Vanuatu',
  'Pitcairn',
  'Botswana',
  'Turks and Caicos Islands',
  'Aust

In [None]:
# shortest coding
country = {
    'name': [row['country'] for row in country_information],
    'metadata': [{'country': row['country'], 'lat': row['lat'], 'lng': row['lng']} for row in country_information]
}

Top countries with the highest concentartion of PM25, PM10, and NO2 in the air quality

In [9]:
Ambient_Air_Quality_Data = pd.read_sql('select * from Ambient_Air_Quality_Data', connection)
Ambient_Air_Quality_Data

Unnamed: 0,aq_id,region,iso3,country,city,year,pm25,pm10,no2
0,A1,Eastern Mediterranean Region,AFG,Afghanistan,Kabul,2019,119.77,0.00,0.00
1,A2,European Region,ALB,Albania,Durres,2015,0.00,17.65,26.63
2,A3,European Region,ALB,Albania,Durres,2016,14.32,24.56,24.78
3,A4,European Region,ALB,Albania,Elbasan,2015,0.00,0.00,23.96
4,A5,European Region,ALB,Albania,Elbasan,2016,0.00,0.00,26.26
...,...,...,...,...,...,...,...,...,...
32186,A32187,African Region,ZAF,South Africa,West Coast,2015,7.47,24.64,7.64
32187,A32188,African Region,ZAF,South Africa,West Coast,2016,8.42,33.28,7.27
32188,A32189,African Region,ZAF,South Africa,West Coast,2017,6.83,20.49,8.72
32189,A32190,African Region,ZAF,South Africa,West Coast,2018,6.10,17.99,7.15


In [10]:
unique_regin=Ambient_Air_Quality_Data["region"].unique()
unique_regin

array(['Eastern Mediterranean Region', 'European Region',
       'Region of the Americas', 'Western Pacific Region',
       'South East Asia Region', 'African Region', '0'], dtype=object)

In [11]:
# Define the date range for the past 10 years
current_year = 2019
past_years = 10
start_year = current_year - past_years

# Create the SQLAlchemy query
result = (session.query(Ambient_air_quality_data.country, Ambient_air_quality_data.year,
            func.sum(Ambient_air_quality_data.pm25).label('pm25_sum'),
            func.sum(Ambient_air_quality_data.pm10).label('pm10_sum'),
            func.sum(Ambient_air_quality_data.no2).label('no2_sum'))
          .filter(Ambient_air_quality_data.year >= start_year)
          .group_by(Ambient_air_quality_data.country, Ambient_air_quality_data.year))

# # Convert the result to a pandas DataFrame
top_100_countries = pd.read_sql(result.statement, session.bind)

# # Print the result
top_100_countries.head()

print(result.all())

[('Portugal', 2015, 137.89000000000001, 789.71, 588.71), ('Costa Rica', 2013, 36.0, 183.0, 0.0), ('Italy', 2019, 3246.8400000000024, 8385.76000000001, 8191.510000000003), ('Switzerland', 2010, 0.0, 369.00999999999993, 439.12000000000006), ('Monaco', 2018, 0.0, 11.5, 34.25), ('Switzerland', 2016, 163.58, 1379.180000000001, 2242.9900000000002), ('India', 2017, 3655.8599999999997, 28117.05, 6510.510000000001), ('Indonesia', 2016, 127.95000000000002, 0.0, 0.0), ('North Macedonia', 2019, 0.0, 104.36, 139.36), ('Netherlands', 2013, 458.46999999999997, 1225.1899999999998, 1220.4799999999998), ('Argentina', 2018, 0.0, 24.4, 16.6), ('Lithuania', 2015, 74.88, 368.84, 204.76), ('Georgia', 2015, 24.5, 47.5, 0.0), ('Italy', 2015, 3381.850000000001, 8904.210000000001, 8643.319999999998), ('Albania', 2019, 10.32, 0.0, 0.0), ('Slovakia', 2017, 514.05, 716.55, 399.58), ('Philippines', 2012, 0.0, 225.5, 0.0), ('Ukraine', 2019, 23.36, 31.19, 0.0), ('Portugal', 2013, 44.68, 447.6500000000001, 297.45000000

In [12]:
result.all()[0][0]

'Portugal'

In [13]:
top_100_countries.head()

Unnamed: 0,country,year,pm25_sum,pm10_sum,no2_sum
0,Portugal,2015,137.89,789.71,588.71
1,Costa Rica,2013,36.0,183.0,0.0
2,Italy,2019,3246.84,8385.76,8191.51
3,Switzerland,2010,0.0,369.01,439.12
4,Monaco,2018,0.0,11.5,34.25


In [14]:
result_list = []
for row in result:
    country = row.country
    year = row.year
    pm25_sum = row.pm25_sum
    pm10_sum = row.pm10_sum
    no2_sum = row.no2_sum
    
    result_list.append({
        'country': country,
        'year': year,
        'pm25_sum': pm25_sum,
        'pm10_sum': pm10_sum,
        'no2_sum': no2_sum
    })

In [15]:
result_dict = {}
for row in result:
    country = row.country
    year = row.year
    pm25_sum = row.pm25_sum
    pm10_sum = row.pm10_sum
    no2_sum = row.no2_sum
    
    if country not in result_dict:
        result_dict[country] = {}
    
    result_dict[country][year] = {
        'pm25_sum': pm25_sum,
        'pm10_sum': pm10_sum,
        'no2_sum': no2_sum
    }
    

In [16]:
result_dict

{'Portugal': {2015: {'pm25_sum': 137.89000000000001,
   'pm10_sum': 789.71,
   'no2_sum': 588.71},
  2013: {'pm25_sum': 44.68,
   'pm10_sum': 447.6500000000001,
   'no2_sum': 297.45000000000005},
  2018: {'pm25_sum': 70.72000000000001,
   'pm10_sum': 771.5499999999996,
   'no2_sum': 669.3000000000002},
  2017: {'pm25_sum': 111.71000000000001,
   'pm10_sum': 811.1199999999999,
   'no2_sum': 529.98},
  2019: {'pm25_sum': 91.65, 'pm10_sum': 644.81, 'no2_sum': 634.62},
  2010: {'pm25_sum': 120.69000000000001,
   'pm10_sum': 1039.2,
   'no2_sum': 983.2799999999999},
  2014: {'pm25_sum': 99.06, 'pm10_sum': 604.97, 'no2_sum': 454.51},
  2016: {'pm25_sum': 93.57, 'pm10_sum': 683.4100000000002, 'no2_sum': 515.05}},
 'Costa Rica': {2013: {'pm25_sum': 36.0, 'pm10_sum': 183.0, 'no2_sum': 0.0},
  2019: {'pm25_sum': 7.4, 'pm10_sum': 0.0, 'no2_sum': 0.0},
  2018: {'pm25_sum': 45.0, 'pm10_sum': 117.75, 'no2_sum': 40.71},
  2017: {'pm25_sum': 0.0, 'pm10_sum': 24.0, 'no2_sum': 0.0},
  2015: {'pm25_sum':

In [17]:
result_list

[{'country': 'Portugal',
  'year': 2015,
  'pm25_sum': 137.89000000000001,
  'pm10_sum': 789.71,
  'no2_sum': 588.71},
 {'country': 'Costa Rica',
  'year': 2013,
  'pm25_sum': 36.0,
  'pm10_sum': 183.0,
  'no2_sum': 0.0},
 {'country': 'Italy',
  'year': 2019,
  'pm25_sum': 3246.8400000000024,
  'pm10_sum': 8385.76000000001,
  'no2_sum': 8191.510000000003},
 {'country': 'Switzerland',
  'year': 2010,
  'pm25_sum': 0.0,
  'pm10_sum': 369.00999999999993,
  'no2_sum': 439.12000000000006},
 {'country': 'Monaco',
  'year': 2018,
  'pm25_sum': 0.0,
  'pm10_sum': 11.5,
  'no2_sum': 34.25},
 {'country': 'Switzerland',
  'year': 2016,
  'pm25_sum': 163.58,
  'pm10_sum': 1379.180000000001,
  'no2_sum': 2242.9900000000002},
 {'country': 'India',
  'year': 2017,
  'pm25_sum': 3655.8599999999997,
  'pm10_sum': 28117.05,
  'no2_sum': 6510.510000000001},
 {'country': 'Indonesia',
  'year': 2016,
  'pm25_sum': 127.95000000000002,
  'pm10_sum': 0.0,
  'no2_sum': 0.0},
 {'country': 'North Macedonia',
  '

Top 10 countries in 2019 with the highest concentartion of PM25, PM10, and NO2 in the air quality

In [18]:
# Define the date range for the year 2019
year = 2019

# Create the SQLAlchemy query
result = (session.query(Ambient_air_quality_data.country,
            func.sum(Ambient_air_quality_data.pm25).label('pm25_sum'),
            func.sum(Ambient_air_quality_data.pm10).label('pm10_sum'),
            func.sum(Ambient_air_quality_data.no2).label('no2_sum'))
          .filter(Ambient_air_quality_data.year == year)
          .group_by(Ambient_air_quality_data.country)
          .order_by((func.sum(Ambient_air_quality_data.pm25) +
                     func.sum(Ambient_air_quality_data.pm10) +
                     func.sum(Ambient_air_quality_data.no2)).desc())
          .limit(10))

# Convert the result to a pandas DataFrame
top_10_countries = pd.read_sql(result.statement, session.bind)

# Print the result
top_10_countries


Unnamed: 0,country,pm25_sum,pm10_sum,no2_sum
0,India,5231.42,32467.79,7350.89
1,China,37430.51,310.35,0.0
2,Italy,3246.84,8385.76,8191.51
3,Germany,1616.68,4109.01,7232.68
4,France,1280.87,4954.21,4964.15
5,Spain,843.15,4093.52,4120.49
6,Poland,1648.66,4983.4,1767.99
7,Turkey,962.13,4860.04,1997.51
8,Switzerland,580.2,1302.62,1968.48
9,United Kingdom,585.12,898.41,2319.16


# Outdoor Pollution Rates by Ages


In [19]:
Outdoor_Air_Death_Rates_by_Age = pd.read_sql('select * from Outdoor_Air_Death_Rates_by_Age', connection)
Outdoor_Air_Death_Rates_by_Age

Unnamed: 0,oad_id,country,iso3,year,under_5,age_5_to_14_years,age_70plus_years,age_15_to_49_years,age_50_to_69_years
0,DA1,Afghanistan,AFG,1990,53.513443,0.772926,258.285326,5.001706,73.454321
1,DA2,Afghanistan,AFG,1991,48.115794,0.781314,252.993731,4.524409,72.133861
2,DA3,Afghanistan,AFG,1992,44.290436,0.802953,248.153433,4.018610,70.967892
3,DA4,Afghanistan,AFG,1993,43.980623,0.811577,247.668589,3.830266,71.272964
4,DA5,Afghanistan,AFG,1994,43.744352,0.790209,250.505987,3.863836,72.554477
...,...,...,...,...,...,...,...,...,...
6835,DA6836,Zimbabwe,ZWE,2015,40.182696,0.463899,301.418735,4.219640,80.215731
6836,DA6837,Zimbabwe,ZWE,2016,38.165762,0.476214,291.492273,4.130005,77.641537
6837,DA6838,Zimbabwe,ZWE,2017,35.788257,0.467371,280.556991,3.978193,74.446368
6838,DA6839,Zimbabwe,ZWE,2018,34.269344,0.459083,273.146813,3.912856,72.576652


Top 10 countries with the highest death rates in 2019

In [20]:
# Query the Outdoor_air_death_rates_by_age table to get the top 10 countries with highest death rates in 2019 for the specified age groups
age_pollution_results = session.query(
    Outdoor_air_death_rates_by_age.country,
    Outdoor_air_death_rates_by_age.year,
    func.sum(Outdoor_air_death_rates_by_age.under_5).label('under_5'),
    func.sum(Outdoor_air_death_rates_by_age.age_5_to_14_years).label('age_5_to_14_years'),
    func.sum(Outdoor_air_death_rates_by_age.age_70plus_years).label('age_70plus_years'),
    func.sum(Outdoor_air_death_rates_by_age.age_15_to_49_years).label('age_15_to_49_years'),
    func.sum(Outdoor_air_death_rates_by_age.age_50_to_69_years).label('age_50_to_69_years')
).filter(
    Outdoor_air_death_rates_by_age.year >= 2015
).group_by(
    Outdoor_air_death_rates_by_age.country,
     Outdoor_air_death_rates_by_age.year
).order_by(
    func.sum(Outdoor_air_death_rates_by_age.under_5).desc(),
    func.sum(Outdoor_air_death_rates_by_age.age_5_to_14_years).desc(),
    func.sum(Outdoor_air_death_rates_by_age.age_70plus_years).desc(),
    func.sum(Outdoor_air_death_rates_by_age.age_15_to_49_years).desc(),
    func.sum(Outdoor_air_death_rates_by_age.age_50_to_69_years).desc()
).all()

# Convert the query results to a pandas DataFrame
df = pd.DataFrame(age_pollution_results, columns=[
    'country',"year", 'under_5', 'age_5_to_14_years', 'age_70plus_years', 'age_15_to_49_years', 'age_50_to_69_years'
])

# Display the DataFrame
age_pollution_results


[('Pakistan', 2015, 110.2231312, 1.189375447, 837.0853908, 12.70312707, 186.0205775),
 ('Nigeria', 2015, 108.8025172, 0.682083492, 525.5728484, 4.181871736, 76.24315599),
 ('Pakistan', 2016, 107.7449883, 1.176999801, 837.5480162, 12.73433672, 185.537523),
 ('Nigeria', 2016, 106.6380387, 0.66731762, 519.2813726, 4.134090625, 75.59376248),
 ('Pakistan', 2017, 104.0637316, 1.137575071, 836.6982715, 12.714846, 184.4133503),
 ('Pakistan', 2019, 101.8945619, 1.084323456, 864.855474, 13.27606122, 190.3770877),
 ('Nigeria', 2017, 100.7980145, 0.635481052, 520.15049, 4.05725005, 74.45793407),
 ('Pakistan', 2018, 99.99006297, 1.090732787, 841.6443882, 12.86680553, 185.6011983),
 ('Nigeria', 2019, 98.99414511, 0.610761588, 555.7349088, 4.124566886, 77.07022459),
 ('Nigeria', 2018, 98.37515337, 0.616614743, 533.1902365, 4.038902674, 74.73343144),
 ('India', 2015, 84.63922532, 0.867259358, 965.1944631, 11.37680241, 199.7637799),
 ('South Asia (WB)', 2015, 83.23603143, 0.925755384, 902.9575343, 10.9

In [21]:
age_pollution_results_dict = {}
for row in age_pollution_results:
    country = row.country
    year = row.year
    under5 = row.under_5
    age5to14 = row.age_5_to_14_years
    age15to49 = row.age_15_to_49_years
    age50to69 = row.age_50_to_69_years
    age70plus = row.age_70plus_years 
    
    if country not in age_pollution_results_dict:
        age_pollution_results_dict[country] = {}
    
    age_pollution_results_dict[country][year] = {
        'under5': under5,
        'age5to14': age5to14,
        'age15to49': age15to49,
        'age50to69': age50to69,
        'age70plus': age70plus
    }
age_pollution_results_dict

{'Pakistan': {2015: {'under5': 110.2231312,
   'age5to14': 1.189375447,
   'age15to49': 12.70312707,
   'age50to69': 186.0205775,
   'age70plus': 837.0853908},
  2016: {'under5': 107.7449883,
   'age5to14': 1.176999801,
   'age15to49': 12.73433672,
   'age50to69': 185.537523,
   'age70plus': 837.5480162},
  2017: {'under5': 104.0637316,
   'age5to14': 1.137575071,
   'age15to49': 12.714846,
   'age50to69': 184.4133503,
   'age70plus': 836.6982715},
  2019: {'under5': 101.8945619,
   'age5to14': 1.084323456,
   'age15to49': 13.27606122,
   'age50to69': 190.3770877,
   'age70plus': 864.855474},
  2018: {'under5': 99.99006297,
   'age5to14': 1.090732787,
   'age15to49': 12.86680553,
   'age50to69': 185.6011983,
   'age70plus': 841.6443882}},
 'Nigeria': {2015: {'under5': 108.8025172,
   'age5to14': 0.682083492,
   'age15to49': 4.181871736,
   'age50to69': 76.24315599,
   'age70plus': 525.5728484},
  2016: {'under5': 106.6380387,
   'age5to14': 0.66731762,
   'age15to49': 4.134090625,
   '

Unnamed: 0,dap_id,country,iso3,year,household_air_pollution_deaths,ambient_particulate_matter_pollution_deaths,air_pollution_deaths,ambient_ozone_pollution_deaths
0,DAP1,Afghanistan,AFG,1990,370.050474,30.822693,402.175651,6.581093
1,DAP2,Afghanistan,AFG,1991,358.978418,29.826184,390.085258,6.267613
2,DAP3,Afghanistan,AFG,1992,352.766453,29.20203,383.201196,5.926444
3,DAP4,Afghanistan,AFG,1993,357.055923,29.429702,387.704919,5.860345
4,DAP5,Afghanistan,AFG,1994,362.970439,29.813259,394.022027,6.065343
5,DAP6,Afghanistan,AFG,1995,363.232965,29.787901,394.255345,6.271907
6,DAP7,Afghanistan,AFG,1996,364.608163,29.841739,395.6426,6.226651
7,DAP8,Afghanistan,AFG,1997,367.393777,29.981738,398.577797,6.281136
8,DAP9,Afghanistan,AFG,1998,369.813696,30.105435,401.162022,6.443587
9,DAP10,Afghanistan,AFG,1999,372.246992,30.250746,403.811977,6.84348


In [13]:
four_air_pollution = session.query(
    Death_rates_from_air_pollution.country,
    Death_rates_from_air_pollution.year,
    (Death_rates_from_air_pollution.household_air_pollution_deaths).label('HAP'),
    (Death_rates_from_air_pollution.ambient_particulate_matter_pollution_deaths).label('APM'),
    (Death_rates_from_air_pollution.air_pollution_deaths).label('AP'),
    (Death_rates_from_air_pollution.ambient_ozone_pollution_deaths).label('AOP'))\
    .filter(Death_rates_from_air_pollution.year >= 2015)

four_air_pollution.all()

[('Afghanistan', 2015, 208.0648033, 54.27253393, 264.0362314, 5.145500768),
 ('Afghanistan', 2016, 200.6279496, 53.67419963, 256.3074443, 5.435587081),
 ('Afghanistan', 2017, 194.3335609, 53.32235623, 249.7658605, 5.790926921),
 ('Afghanistan', 2018, 187.2769889, 54.43239826, 243.8629854, 5.112804737),
 ('Afghanistan', 2019, 179.4553489, 56.89574057, 238.3304042, 5.04938666),
 ('African Region (WHO)', 2015, 117.0798081, 45.20457487, 163.1505625, 1.925257034),
 ('African Region (WHO)', 2016, 113.6308641, 44.57695891, 159.082762, 1.802128365),
 ('African Region (WHO)', 2017, 110.3497462, 43.84418493, 155.063919, 1.817639088),
 ('African Region (WHO)', 2018, 107.0841608, 43.49840674, 151.6418913, 2.127564093),
 ('African Region (WHO)', 2019, 103.3387132, 43.87136477, 148.2782521, 2.220023162),
 ('Albania', 2015, 21.2667667, 38.83486147, 60.79822497, 0.871049562),
 ('Albania', 2016, 20.19794403, 38.31610978, 59.1432911, 0.781784491),
 ('Albania', 2017, 19.23666919, 37.8349009, 57.62991018,

In [14]:
four_air_pollution_dict = {}
for row in four_air_pollution:
    country = row.country
    year = row.year
    HAP = row.HAP
    APM = row.APM
    AP = row.AP
    AOP = row.AOP
    
    if country not in four_air_pollution_dict:
        four_air_pollution_dict[country] = {}
    
    four_air_pollution_dict[country][year] = {
        'HAP': HAP,
        'APM': APM,
        'AP': AP,
        'AOP': AOP,
    }
four_air_pollution_dict

{'Afghanistan': {2015: {'HAP': 208.0648033,
   'APM': 54.27253393,
   'AP': 264.0362314,
   'AOP': 5.145500768},
  2016: {'HAP': 200.6279496,
   'APM': 53.67419963,
   'AP': 256.3074443,
   'AOP': 5.435587081},
  2017: {'HAP': 194.3335609,
   'APM': 53.32235623,
   'AP': 249.7658605,
   'AOP': 5.790926921},
  2018: {'HAP': 187.2769889,
   'APM': 54.43239826,
   'AP': 243.8629854,
   'AOP': 5.112804737},
  2019: {'HAP': 179.4553489,
   'APM': 56.89574057,
   'AP': 238.3304042,
   'AOP': 5.04938666}},
 'African Region (WHO)': {2015: {'HAP': 117.0798081,
   'APM': 45.20457487,
   'AP': 163.1505625,
   'AOP': 1.925257034},
  2016: {'HAP': 113.6308641,
   'APM': 44.57695891,
   'AP': 159.082762,
   'AOP': 1.802128365},
  2017: {'HAP': 110.3497462,
   'APM': 43.84418493,
   'AP': 155.063919,
   'AOP': 1.817639088},
  2018: {'HAP': 107.0841608,
   'APM': 43.49840674,
   'AP': 151.6418913,
   'AOP': 2.127564093},
  2019: {'HAP': 103.3387132,
   'APM': 43.87136477,
   'AP': 148.2782521,
   'AOP

Top 10 countries in year 2019 who have the highest value of DALYs dalys_air_pollution,
dalys_household_air_pollution_from_solid_fuels & dalys_particulate_matter_pollution

In [16]:
Disease_Burden_by_Risk_Factor = pd.read_sql('select * from Disease_Burden_by_Risk_Factor', connection)
Disease_Burden_by_Risk_Factor

Unnamed: 0,dalys_id,country,iso3,year,dalys_low_physical_activity,dalys_non_exclusive_breastfeeding,dalys_air_pollution,dalys_child_wasting,dalys_high_systolic_bp,dalys_high_fasting_glucose,...,dalys_diet_low_in_fruits,dalys_diet_high_in_sodium,dalys_drug_use,dalys_household_air_pollution_from_solid_fuels,dalys_high_ldl_cholesterol,dalys_iron_deficiency,dalys_zinc_deficiency,dalys_smoking,dalys_vitamina_deficiency,dalys_particulate_matter_pollution
0,DBR1,Afghanistan,AFG,1990,61720.058220,197049.34320,1.986290e+06,1.708694e+06,663575.4691,310177.8242,...,94406.71757,28130.50339,18453.02800,1.841529e+06,368120.25090,76413.87330,2126.187912,146352.7718,184149.10080,143037.5420
1,DBR2,Afghanistan,AFG,1991,62191.602520,222485.78620,2.069430e+06,1.779058e+06,670934.5694,320839.9349,...,96652.39774,28464.36402,20531.09958,1.919544e+06,372702.68500,83459.00932,2741.145867,148548.4481,188899.76640,148112.8005
2,DBR3,Afghanistan,AFG,1992,63325.234390,271585.19440,2.298508e+06,2.005481e+06,685869.8593,335451.5776,...,100583.27850,29182.42948,24164.95588,2.133595e+06,383044.26640,95694.18445,3732.054083,152365.4702,194698.04870,163113.8479
3,DBR4,Afghanistan,AFG,1993,64873.624040,331279.27820,2.555748e+06,2.366581e+06,705695.9436,351943.2439,...,105191.80590,30120.10482,26870.38797,2.373299e+06,395832.84610,105355.90030,5601.761233,157201.5741,214913.90300,180597.5405
4,DBR5,Afghanistan,AFG,1994,66452.067740,340745.13300,2.707120e+06,2.559192e+06,725499.9783,367981.2854,...,109624.02640,31057.42686,28280.62748,2.514445e+06,407919.38880,110864.38390,6591.110924,162079.2068,246066.04090,190782.7127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6835,DBR6836,Zimbabwe,ZWE,2015,7196.883376,65671.53411,6.534109e+05,4.945638e+05,288229.1347,261661.6027,...,53594.84841,29747.99776,67068.71339,5.201553e+05,87253.13887,76773.61038,496.813378,254297.1727,19442.22770,132720.8334
6836,DBR6837,Zimbabwe,ZWE,2016,7424.673391,62675.10862,6.419202e+05,4.892715e+05,293491.9943,267324.8710,...,54674.74567,30316.60498,66051.17202,5.121190e+05,89293.17103,76749.55229,498.188006,257688.5603,15954.19609,128623.7526
6837,DBR6838,Zimbabwe,ZWE,2017,7655.216366,60760.78056,6.253864e+05,4.729722e+05,297975.1709,271637.4149,...,55585.37552,30801.41865,64765.92231,5.016625e+05,91114.20768,76705.13473,491.272163,260584.2492,14143.22946,122945.0690
6838,DBR6839,Zimbabwe,ZWE,2018,7862.559712,57715.00502,6.095453e+05,4.531539e+05,303252.9954,276985.7616,...,56701.62029,31371.98089,63150.79984,4.889329e+05,93196.02857,76782.61720,455.911840,263968.0567,14170.80801,120112.4537


In [17]:
# Query to filter the top 10 countries in year 2019 who have the highest value of DALYs
top_countries = session.query(Disease_burden_by_risk_factor.country,
                              Disease_burden_by_risk_factor.dalys_air_pollution,
                              Disease_burden_by_risk_factor.dalys_household_air_pollution_from_solid_fuels,
                              Disease_burden_by_risk_factor.dalys_particulate_matter_pollution)\
                        .filter(Disease_burden_by_risk_factor.year>=2015)\
                        .order_by(Disease_burden_by_risk_factor.dalys_id.desc())\
                        .limit(10)\
                        .all()

# Convert the query result to a pandas dataframe
df = pd.DataFrame(top_countries, columns=['Country', 'DALYs_Air_Pollution', 'DALYs_Household_Air_Pollution_from_Solid_Fuels', 'DALYs_Particulate_Matter_Pollution'])

df



Unnamed: 0,Country,DALYs_Air_Pollution,DALYs_Household_Air_Pollution_from_Solid_Fuels,DALYs_Particulate_Matter_Pollution
0,Cameroon,1128638.0,624292.5875,501804.3281
1,Cameroon,1132256.0,653958.7513,474727.3575
2,Cameroon,1147616.0,681523.3147,464176.2488
3,Cameroon,1186180.0,709824.5631,474073.9826
4,Cameroon,1202221.0,725129.3739,475343.555
5,Cambodia,603557.3,486219.3063,115671.7781
6,Cambodia,608844.1,497243.8969,109677.1579
7,Cambodia,615372.5,507584.0185,106753.9046
8,Cambodia,627907.4,519729.5218,107351.3713
9,Cambodia,640113.6,530768.8884,108090.2512


In [18]:
Number_of_Deaths_by_Risk_Factor = pd.read_sql('select * from Number_of_Deaths_by_Risk_Factor', connection)
Number_of_Deaths_by_Risk_Factor

Unnamed: 0,dbr_id,country,iso3,year,deaths_from_outdoor_air_pollution,deaths_from_high_systolic_blood_pressure,deaths_from_high_sodium_diet,deaths_from_low_whole_grains_diet,deaths_from_alcohol_use,deaths_from_low_fruits_diet,...,deaths_from_high_body_mass_index,deaths_from_unsafe_sanitation,deaths_from_no_access_to_handwashing_facility,deaths_from_drug_use,deaths_from_low_bone_mineral_density,deaths_from_vitamina_deficiency,deaths_from_child_stunting,deaths_from_discontinued_breastfeeding,deaths_from_non_exclusive_breastfeeding,deaths_from_iron_deficiency
0,NDB1,Afghanistan,AFG,1990,3169,25633,1045,7077,356,3185,...,9518,2798,4825,174,389,2016,7686,107,2216,564
1,NDB2,Afghanistan,AFG,1991,3222,25872,1055,7149,364,3248,...,9489,3254,5127,188,389,2056,7886,121,2501,611
2,NDB3,Afghanistan,AFG,1992,3395,26309,1075,7297,376,3351,...,9528,4042,5889,211,393,2100,8568,150,3053,700
3,NDB4,Afghanistan,AFG,1993,3623,26961,1103,7499,389,3480,...,9611,5392,7007,232,411,2316,9875,204,3726,773
4,NDB5,Afghanistan,AFG,1994,3788,27658,1134,7698,399,3610,...,9675,5418,7421,247,413,2665,11031,204,3833,812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6835,NDB6836,Zimbabwe,ZWE,2015,2835,11483,1063,1354,4854,1820,...,5636,2879,4328,1068,407,138,730,21,734,282
6836,NDB6837,Zimbabwe,ZWE,2016,2781,11663,1082,1383,4915,1854,...,5849,2798,4295,1042,415,101,674,20,700,275
6837,NDB6838,Zimbabwe,ZWE,2017,2700,11819,1098,1409,4992,1883,...,6047,2744,4251,1007,424,82,629,19,678,267
6838,NDB6839,Zimbabwe,ZWE,2018,2669,12002,1117,1439,5044,1917,...,6248,2608,4153,969,434,85,607,17,644,261


Countries ranked by the highest count of mortality death related to pollution

In [19]:
AQ_Pollution_Mortality_Data = pd.read_sql('select * from AQ_Pollution_Mortality_Data', connection)
AQ_Pollution_Mortality_Data

Unnamed: 0,mor_id,country,iso3,death_rate_ranking,total_pollution_deaths,air_pollution_deaths,water_pollution_deaths,occupational_pollution_deaths,lead_deaths
0,M1,Afghanistan,AFG,30,44247,26054,8048,605,9539
1,M2,Albania,ALB,92,2269,1639,9,149,472
2,M3,Algeria,DZA,159,17940,13222,383,709,3626
3,M4,Andorra,AND,140,45,23,0,18,3
4,M5,Angola,AGO,49,30842,10728,18846,354,914
...,...,...,...,...,...,...,...,...,...
183,M184,Venezuela,VEN,155,14871,9197,983,1134,3557
184,M185,Vietnam,VNM,103,71365,50232,3097,9809,8227
185,M186,Yemen,YEM,50,29038,12379,8087,550,8022
186,M187,Zambia,ZMB,58,16600,7106,8691,310,492


In [9]:
# Query to get the values of pollution deaths and their rank
pollution_deaths = session.query(
                        Aq_pollution_mortality_data.country,
                        (Aq_pollution_mortality_data.total_pollution_deaths).label("TP"),
                        func.rank().over(order_by=Aq_pollution_mortality_data.total_pollution_deaths.desc()).label('TP_Rank'),
                        (Aq_pollution_mortality_data.air_pollution_deaths).label("AP"),
                        func.rank().over(order_by=Aq_pollution_mortality_data.air_pollution_deaths.desc()).label('AP_Rank'),
                        (Aq_pollution_mortality_data.water_pollution_deaths).label("WP"),
                        func.rank().over(order_by=Aq_pollution_mortality_data.water_pollution_deaths.desc()).label('WP_Rank'),
                        (Aq_pollution_mortality_data.occupational_pollution_deaths).label("OP"),
                        func.rank().over(order_by=Aq_pollution_mortality_data.occupational_pollution_deaths.desc()).label('OP_Rank'),
                        (Aq_pollution_mortality_data.lead_deaths).label("LP"),
                        func.rank().over(order_by=Aq_pollution_mortality_data.lead_deaths.desc()).label('LP_Rank')
                    )\
                    .all()

# Convert the query result to a pandas dataframe
df = pd.DataFrame(pollution_deaths, columns=['Country', 'Total_Pollution_Deaths', 'Total_Pollution_Deaths_Rank', 'Air_Pollution_Deaths', 'Air_Pollution_Deaths_Rank', 'Water_Pollution_Deaths', 'Water_Pollution_Deaths_Rank', 'Occupational_Pollution_Deaths', 'Occupational_Pollution_Deaths_Rank', 'Lead_Deaths', 'Lead_Deaths_Rank'])

df.head(10)


Unnamed: 0,Country,Total_Pollution_Deaths,Total_Pollution_Deaths_Rank,Air_Pollution_Deaths,Air_Pollution_Deaths_Rank,Water_Pollution_Deaths,Water_Pollution_Deaths_Rank,Occupational_Pollution_Deaths,Occupational_Pollution_Deaths_Rank,Lead_Deaths,Lead_Deaths_Rank
0,India,2326771,1,1240529,2,698597,1,153528,2,234117,2
1,China,1865566,2,1242987,1,9585,25,255580,1,357414,1
2,Nigeria,279318,3,114115,6,159777,2,2088,36,3338,35
3,Indonesia,232974,4,123753,4,60040,5,16331,9,32850,4
4,Pakistan,223836,5,128005,3,60213,4,8787,15,26831,6
5,Bangladesh,207922,6,122734,5,33583,7,13558,11,38048,3
6,United States of America,196930,7,107507,7,1628,59,59536,3,28260,5
7,Russian Federation,118687,8,99392,8,685,72,9634,14,8976,13
8,Ethiopia,110787,9,40614,19,63454,3,1931,38,4788,24
9,Brazil,109438,10,66245,9,7152,35,14462,10,21580,8


In [10]:
pollution_deaths_dict = {}
for row in pollution_deaths:
    country = row.country
    TP = row.TP
    TP_Rank = row.TP_Rank
    AP = row.AP
    AP_Rank = row.AP_Rank
    WP = row.WP
    WP_Rank = row.WP_Rank
    OP = row.OP
    OP_Rank = row.OP_Rank
    LP = row.LP
    LP_Rank = row.LP_Rank
    
    if country not in pollution_deaths_dict:
        pollution_deaths_dict[country] = {}
    
    pollution_deaths_dict[country] = {
        'TP': TP,
        'TP_Rank': TP_Rank,
        'AP': AP,
        'AP_Rank': AP_Rank,
        'WP': WP,
        'WP_Rank': WP_Rank,
        'OP': OP,
        'OP_Rank': OP_Rank,
        'LP': LP,
        'LP_Rank': LP_Rank,
    }
pollution_deaths_dict

{'India': {'TP': 2326771,
  'TP_Rank': 1,
  'AP': 1240529,
  'AP_Rank': 2,
  'WP': 698597,
  'WP_Rank': 1,
  'OP': 153528,
  'OP_Rank': 2,
  'LP': 234117,
  'LP_Rank': 2},
 'China': {'TP': 1865566,
  'TP_Rank': 2,
  'AP': 1242987,
  'AP_Rank': 1,
  'WP': 9585,
  'WP_Rank': 25,
  'OP': 255580,
  'OP_Rank': 1,
  'LP': 357414,
  'LP_Rank': 1},
 'Nigeria': {'TP': 279318,
  'TP_Rank': 3,
  'AP': 114115,
  'AP_Rank': 6,
  'WP': 159777,
  'WP_Rank': 2,
  'OP': 2088,
  'OP_Rank': 36,
  'LP': 3338,
  'LP_Rank': 35},
 'Indonesia': {'TP': 232974,
  'TP_Rank': 4,
  'AP': 123753,
  'AP_Rank': 4,
  'WP': 60040,
  'WP_Rank': 5,
  'OP': 16331,
  'OP_Rank': 9,
  'LP': 32850,
  'LP_Rank': 4},
 'Pakistan': {'TP': 223836,
  'TP_Rank': 5,
  'AP': 128005,
  'AP_Rank': 3,
  'WP': 60213,
  'WP_Rank': 4,
  'OP': 8787,
  'OP_Rank': 15,
  'LP': 26831,
  'LP_Rank': 6},
 'Bangladesh': {'TP': 207922,
  'TP_Rank': 6,
  'AP': 122734,
  'AP_Rank': 5,
  'WP': 33583,
  'WP_Rank': 7,
  'OP': 13558,
  'OP_Rank': 11,
  'LP