#### Filtering and analysing a summary statistic report

In [1]:
# Load and activate the SQL extension to allow us to execute SQL in a Jupyter notebook. 
# If you get an error here, make sure that mysql and pymysql are installed correctly. 

%load_ext sql

In [2]:
# Establish a connection to the local database using the '%sql' magic command.
# Replace 'password' with our connection password and `db_name` with our database name. 
# If you get an error here, please make sure the database name or password is correct.

%sql mysql+pymysql://root:********@localhost:3306/united_nations

'Connected: root@united_nations'

In [3]:
%%sql

SELECT 
    *
FROM
    Access_to_Basic_Services
LIMIT 5;

 * mysql+pymysql://root:***@localhost:3306/united_nations
5 rows affected.


Region,Sub_region,Country_name,Time_period,Pct_managed_drinking_water_services,Pct_managed_sanitation_services,Est_population_in_millions,Est_gdp_in_billions,Land_area,Pct_unemployment
Central and Southern Asia,Central Asia,Kazakhstan,2015,94.67,98.0,17.542806,184.39,2699700.0,4.93
Central and Southern Asia,Central Asia,Kazakhstan,2016,94.67,98.0,17.794055,137.28,2699700.0,4.96
Central and Southern Asia,Central Asia,Kazakhstan,2017,95.0,98.0,18.037776,166.81,2699700.0,4.9
Central and Southern Asia,Central Asia,Kazakhstan,2018,95.0,98.0,18.276452,179.34,2699700.0,4.85
Central and Southern Asia,Central Asia,Kazakhstan,2019,95.0,98.0,18.513673,181.67,2699700.0,4.8


In [4]:
%%sql

SELECT Region, 
    Sub_region,
    MIN(Pct_managed_drinking_water_services) AS min_Pct_managed_drinking_water_services, 
    MAX(Pct_managed_drinking_water_services) AS max_Pct_managed_drinking_water_services, 
    AVG(Pct_managed_drinking_water_services) AS avg_Pct_managed_drinking_water_services,
    COUNT(DISTINCT(Country_name)) AS Number_of_countries,
    SUM(EST_gdp_in_billions) AS EST_total_gdp_in_billions 
FROM united_nations.Access_to_Basic_Services 
GROUP BY Region, Sub_region
ORDER BY EST_total_gdp_in_billions;

 * mysql+pymysql://root:***@localhost:3306/united_nations
18 rows affected.


Region,Sub_region,min_Pct_managed_drinking_water_services,max_Pct_managed_drinking_water_services,avg_Pct_managed_drinking_water_services,Number_of_countries,EST_total_gdp_in_billions
Oceania,Polynesia,91.0,100.0,98.506481,9,46.76
Oceania,Micronesia,73.67,100.0,93.414634,7,46.92
Oceania,Melanesia,53.67,99.0,82.700333,5,237.87
Sub-Saharan Africa,Middle Africa,38.33,77.33,58.967059,9,837.34
Central and Southern Asia,Central Asia,80.33,100.0,93.144667,5,1670.32
Sub-Saharan Africa,Eastern Africa,46.33,100.0,70.128421,20,1944.61
Latin America and the Caribbean,Caribbean,64.0,100.0,96.005,27,2070.17
Sub-Saharan Africa,Southern Africa,73.33,92.0,82.645,5,2386.64
Northern Africa and Western Asia,Northern Africa,61.33,100.0,88.906111,6,2736.8
Sub-Saharan Africa,Western Africa,53.33,99.0,72.365686,17,3621.31


#### Filter for the year 2020.

In [5]:
%%sql

SELECT Region, 
    Sub_region,
    MIN(Pct_managed_drinking_water_services) AS min_Pct_managed_drinking_water_services, 
    MAX(Pct_managed_drinking_water_services) AS max_Pct_managed_drinking_water_services, 
    AVG(Pct_managed_drinking_water_services) AS avg_Pct_managed_drinking_water_services, 
    COUNT(DISTINCT(Country_name)) AS Number_of_countries,
    SUM(EST_gdp_in_billions) AS EST_total_gdp_in_billions
FROM Access_to_Basic_Services 
WHERE Time_period = 2020
GROUP BY Region, Sub_region
ORDER BY EST_total_gdp_in_billions ASC;


 * mysql+pymysql://root:***@localhost:3306/united_nations
18 rows affected.


Region,Sub_region,min_Pct_managed_drinking_water_services,max_Pct_managed_drinking_water_services,avg_Pct_managed_drinking_water_services,Number_of_countries,EST_total_gdp_in_billions
Oceania,Micronesia,77.0,100.0,94.5,6,6.67
Oceania,Polynesia,92.0,100.0,98.555556,9,7.84
Oceania,Melanesia,56.67,99.0,82.934,5,40.21
Sub-Saharan Africa,Middle Africa,38.33,77.33,59.3325,8,123.22
Central and Southern Asia,Central Asia,85.0,100.0,94.134,5,239.1
Latin America and the Caribbean,Caribbean,65.0,100.0,95.910667,15,343.26
Sub-Saharan Africa,Eastern Africa,48.33,100.0,70.018824,17,359.1
Sub-Saharan Africa,Southern Africa,76.33,92.0,83.668,5,369.34
Northern Africa and Western Asia,Northern Africa,62.33,100.0,90.053333,6,386.29
Sub-Saharan Africa,Western Africa,53.33,99.0,73.607059,17,631.91


#### Focusing on countries where the percentage of managed drinking water services is below 60%.

In [6]:
%%sql

SELECT Region, 
    Sub_region,
    MIN(Pct_managed_drinking_water_services) AS min_Pct_managed_drinking_water_services, 
    MAX(Pct_managed_drinking_water_services) AS max_Pct_managed_drinking_water_services, 
    AVG(Pct_managed_drinking_water_services) AS avg_Pct_managed_drinking_water_services, 
    COUNT(DISTINCT(Country_name)) AS Number_of_countries,
    SUM(EST_gdp_in_billions) AS EST_total_gdp_in_billions
FROM Access_to_Basic_Services 
WHERE Time_period = 2020
    AND Pct_managed_drinking_water_services < 60
GROUP BY Region, Sub_region
ORDER BY EST_total_gdp_in_billions ASC;

 * mysql+pymysql://root:***@localhost:3306/united_nations
4 rows affected.


Region,Sub_region,min_Pct_managed_drinking_water_services,max_Pct_managed_drinking_water_services,avg_Pct_managed_drinking_water_services,Number_of_countries,EST_total_gdp_in_billions
Oceania,Melanesia,56.67,56.67,56.67,1,23.85
Sub-Saharan Africa,Western Africa,53.33,57.33,55.33,2,31.67
Sub-Saharan Africa,Middle Africa,38.33,52.67,47.75,4,66.67
Sub-Saharan Africa,Eastern Africa,48.33,58.0,54.9975,4,127.59


#### Filter for the regions and sub-regions that have fewer than four countries.

In [7]:
%%sql

SELECT Region, 
    Sub_region,
    MIN(Pct_managed_drinking_water_services) AS min_Pct_managed_drinking_water_services, 
    MAX(Pct_managed_drinking_water_services) AS max_Pct_managed_drinking_water_services, 
    AVG(Pct_managed_drinking_water_services) AS avg_Pct_managed_drinking_water_services, 
    COUNT(DISTINCT(Country_name)) AS Number_of_countries,
    SUM(EST_gdp_in_billions) AS EST_total_gdp_in_billions
FROM Access_to_Basic_Services 
WHERE Time_period = 2020
    AND Pct_managed_drinking_water_services < 60
GROUP BY Region, Sub_region
HAVING Number_of_countries < 4
ORDER BY EST_total_gdp_in_billions ASC;

 * mysql+pymysql://root:***@localhost:3306/united_nations
2 rows affected.


Region,Sub_region,min_Pct_managed_drinking_water_services,max_Pct_managed_drinking_water_services,avg_Pct_managed_drinking_water_services,Number_of_countries,EST_total_gdp_in_billions
Oceania,Melanesia,56.67,56.67,56.67,1,23.85
Sub-Saharan Africa,Western Africa,53.33,57.33,55.33,2,31.67
