In [7]:
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.sql import select
import pandas as pd

In [41]:
con = create_engine('postgresql://postgres@localhost:5432/int_join_dc')

In [42]:
con.table_names()

['populations', 'countries', 'economies', 'languages', 'cities']

In [43]:
countries = pd.read_csv('countries2/countries.csv', sep=',', index_col='code')
countries.to_sql('countries', con=con, if_exists='replace')
populations = pd.read_csv('countries2/populations.csv', sep=',', index_col='pop_id')
populations.to_sql('populations', con=con, if_exists='replace')
economies = pd.read_csv('countries2/economies.csv', sep=',', index_col='econ_id')
economies.to_sql('economies', con=con, if_exists='replace')
languages = pd.read_csv('countries2/languages.csv', sep=',', index_col='lang_id')
languages.to_sql('languages', con=con, if_exists='replace')
cities = pd.read_csv('countries2/cities.csv', sep=',', index_col='name')
cities.to_sql('cities', con=con, if_exists='replace')

In [44]:
sql_result = pd.read_sql("\
SELECT c.code, local_name, region, e.year, fertility_rate, unemployment_rate \
FROM countries AS c \
INNER JOIN populations AS p \
ON c.code = p.country_code \
INNER JOIN economies AS e \
ON c.code = e.code AND e.year = p.year; \
", con)
sql_result.head()

Unnamed: 0,code,local_name,region,year,fertility_rate,unemployment_rate
0,AFG,Afganistan/Afqanestan,Southern and Central Asia,2010,5.746,
1,AFG,Afganistan/Afqanestan,Southern and Central Asia,2015,4.653,
2,AGO,Angola,Central Africa,2010,6.416,
3,AGO,Angola,Central Africa,2015,5.996,
4,ALB,Shqiperia,Southern Europe,2010,1.663,14.0


In [45]:
sql_result = pd.read_sql("\
SELECT c.code AS country_code, country_name, year, inflation_rate \
FROM countries AS c \
INNER JOIN economies AS e \
ON e.code = c.code; \
", con)
sql_result.head()

Unnamed: 0,country_code,country_name,year,inflation_rate
0,AFG,Afghanistan,2010,2.179
1,AFG,Afghanistan,2015,-1.549
2,AGO,Angola,2010,14.48
3,AGO,Angola,2015,10.287
4,ALB,Albania,2010,3.605


In [46]:
sql_result = pd.read_sql("\
SELECT c.country_name AS country, continent, l.name AS language, official \
FROM countries AS c \
INNER JOIN languages AS l \
USING(code); \
", con)
sql_result.head()

Unnamed: 0,country,continent,language,official
0,Afghanistan,Asia,Dari,True
1,Afghanistan,Asia,Pashto,True
2,Afghanistan,Asia,Turkic,False
3,Afghanistan,Asia,Other,False
4,Albania,Europe,Albanian,True


In [47]:
sql_result = pd.read_sql("\
SELECT p1.country_code, \
       p1.size AS size2010, \
       p2.size AS size2015 \
FROM populations AS p1 \
INNER JOIN populations AS p2 \
ON  p1.country_code = p2.country_code; \
", con)
sql_result.head()

Unnamed: 0,country_code,size2010,size2015
0,ABW,101597.0,103889.0
1,ABW,101597.0,101597.0
2,ABW,103889.0,103889.0
3,ABW,103889.0,101597.0
4,AFG,27962207.0,32526562.0


In [48]:
sql_result = pd.read_sql("\
SELECT country_code, size, \
    CASE WHEN size > 50000000 THEN 'large' \
        WHEN size > 1000000 THEN 'medium' \
        ELSE 'small' END \
        AS popsize_group \
FROM populations \
WHERE year = 2015; \
", con)
sql_result.head()

Unnamed: 0,country_code,size,popsize_group
0,ABW,103889.0,small
1,AFG,32526562.0,medium
2,AGO,25021974.0,medium
3,ALB,2889167.0,medium
4,AND,70473.0,small


In [49]:
sql_result = pd.read_sql("\
SELECT c.country_name AS country, local_name, l.name AS language, percent \
FROM countries AS c \
INNER JOIN languages AS l \
ON c.code = l.code \
ORDER BY country DESC; \
", con)
sql_result.head()

Unnamed: 0,country,local_name,language,percent
0,Zimbabwe,Zimbabwe,Shona,
1,Zimbabwe,Zimbabwe,Tonga,
2,Zimbabwe,Zimbabwe,Tswana,
3,Zimbabwe,Zimbabwe,Venda,
4,Zimbabwe,Zimbabwe,Xhosa,


In [52]:
sql_result = pd.read_sql("\
SELECT cities.name AS city, urbanarea_pop, countries.country_name AS country, \
       indep_year, languages.name AS language, percent \
FROM languages \
RIGHT JOIN countries \
ON countries.code = languages.code \
RIGHT JOIN cities \
ON cities.country_code = countries.code \
ORDER BY city, language; \
", con)
sql_result.head()

Unnamed: 0,city,urbanarea_pop,country,indep_year,language,percent
0,Abidjan,4765000,Cote d'Ivoire,1960.0,French,
1,Abidjan,4765000,Cote d'Ivoire,1960.0,Other,
2,Abu Dhabi,1145000,United Arab Emirates,1971.0,Arabic,
3,Abu Dhabi,1145000,United Arab Emirates,1971.0,English,
4,Abu Dhabi,1145000,United Arab Emirates,1971.0,Hindi,


In [53]:
sql_result = pd.read_sql("\
SELECT DISTINCT name \
FROM languages \
WHERE code IN ( \
    SELECT code \
    FROM countries \
    WHERE region = 'Middle East' \
) \
ORDER BY name \
", con)
sql_result.head()

Unnamed: 0,name
0,Arabic
1,Aramaic
2,Armenian
3,Azerbaijani
4,Azeri


In [54]:
sql_result = pd.read_sql("\
SELECT * \
FROM populations \
WHERE year = 2015 \
AND life_expectancy > 1.15 * ( \
    SELECT AVG(life_expectancy) \
    FROM populations \
    WHERE year = 2015 \
); \
", con)
sql_result.head()

Unnamed: 0,pop_id,country_code,year,fertility_rate,life_expectancy,size
0,21,AUS,2015,1.833,82.45122,23789752.0
1,376,CHE,2015,1.54,83.197561,8281430.0
2,356,ESP,2015,1.32,83.380488,46443994.0
3,134,FRA,2015,2.01,82.670732,66538391.0
4,170,HKG,2015,1.195,84.278049,7305700.0


In [56]:
sql_result = pd.read_sql("\
SELECT country_name AS country, \
  (SELECT COUNT(*) \
   FROM cities \
   WHERE countries.code = cities.country_code) AS cities_num \
FROM countries \
ORDER BY cities_num DESC, country \
LIMIT 9; \
", con)
sql_result.head()

Unnamed: 0,country,cities_num
0,China,36
1,India,18
2,Japan,11
3,Brazil,10
4,Pakistan,9


In [57]:
sql_result = pd.read_sql("\
SELECT local_name, subquery.lang_num \
FROM countries, \
(SELECT code, COUNT(*) lang_num \
FROM languages \
GROUP BY code) AS subquery \
WHERE countries.code = subquery.code \
ORDER BY lang_num DESC \
", con)
sql_result.head()

Unnamed: 0,local_name,lang_num
0,Zambia,19
1,YeItyop´iya,16
2,Zimbabwe,16
3,Bharat/India,14
4,Nepal,14


In [60]:
sql_result = pd.read_sql("\
SELECT country_name, continent, inflation_rate \
FROM countries \
INNER JOIN economies \
ON countries.code = economies.code \
WHERE year = 2015 \
    AND inflation_rate IN ( \
        SELECT MAX(inflation_rate) AS max_inf \
        FROM ( \
             SELECT country_name, continent, inflation_rate \
             FROM countries \
             INNER JOIN economies \
             ON countries.code = economies.code \
             WHERE year = 2015) AS subquery \
        GROUP BY continent); \
", con)
sql_result.head()

Unnamed: 0,country_name,continent,inflation_rate
0,Haiti,North America,7.524
1,Malawi,Africa,21.858
2,Nauru,Oceania,9.784
3,Ukraine,Europe,48.684
4,Venezuela,South America,121.738
