Build a graph from database data

In [None]:
import pandas as pd
from sqlalchemy import create_engine, text
from matplotlib import pyplot as plt

In [None]:
# etl

pg_host = 'localhost'
pg_port = 5432
pg_user = 'postgres'
pg_pass = 'postgres'
pg_db = 'pgdb'

engine = create_engine(f'postgresql+psycopg2://{pg_user}:{pg_pass}@{pg_host}:{pg_port}/{pg_db}')
pg_conn = engine.connect()

etl = pd.read_csv('./data/csv/gapminder.csv')
etl.to_sql('gapminder', pg_conn, if_exists='replace')
pg_conn.commit()

In [None]:
# select data

data = pd.read_sql_query(sql=text('select * from gapminder'), con=pg_conn)
print(data)

pg_conn.close()

analysis

In [None]:
latest = data.year.max()
latest

In [None]:
earliest = data.year.min()
earliest

In [None]:
data.year.unique()

In [None]:
# only latest year

year = latest
sel_year = data[data.year == year]
sel_year

In [None]:
continent = sel_year.groupby('continent')
continent.mean().lifeExp.plot(legend=True, title=f'Life Expectancy in {year}', kind='bar')
plt.savefig('./results/lifeExp.png')
plt.show()
continent.pop.sum().plot(legend=True, title=f'Population in {year}', kind='bar')
plt.savefig('./results/population.png')
plt.show()
continent.gdpPercap.sum().plot(legend=True, title=f'GDP per Capita in {year}', kind='bar')
plt.savefig('./results/gdp.png')
plt.show()

In [None]:
countries = data.country.unique()
countries

specific country, multiple plots

In [None]:
country_name = 'Afghanistan'
country = data[data.country == country_name]

norm = lambda x: (x-x.min())/(x.max()-x.min())

plt.plot(country['year'], norm(country['lifeExp']), 'red', linestyle='-', label='life expectancy')
plt.plot(country['year'], norm(country['pop']), 'lime', linestyle='-', label='population')
plt.plot(country['year'], norm(country['gdpPercap']), 'black', linestyle='--', label='gdp per capita')
plt.legend(loc='best')
plt.title(f'{country_name}, {country.continent.unique()[0]}')
plt.yticks([])

plt.show()


In [None]:
country_name = 'Afghanistan'
country = data[data.country == country_name]

fig, ax1 = plt.subplots(1,1)

ax1.plot(country['year'], country['lifeExp'], 'red', linestyle='-', label='life expectancy')
ax1.set_xlabel('year')
ax1.set_ylabel('life expectancy', color='r')
ax1.tick_params('y', colors='r')

ax2 = ax1.twinx()
ax2.spines['right'].set_position(('axes', 1.15)) # move the axis right a bit
ax2.plot(country['year'], country['pop'], 'blue', linestyle=':', label='population')
ax2.set_ylabel('population (Mil)', color='blue')
ax2.tick_params('y', colors='blue')
                
ax3 = ax1.twinx()
ax3.plot(country['year'], country['gdpPercap'], 'black', linestyle='--', label='gdp per capita')
ax3.set_ylabel('gdp per capita', color='black')
ax3.tick_params('y', colors='black')

fig.tight_layout()

plt.title(f'{country_name}, {country.continent.unique()[0]}')
plt.show()