In [1]:
import pandas as pd
from sqlalchemy import create_engine
import numpy as np

In [2]:
csv_file = "./Resources/GlobalTemperatures.csv"
yearly_temps_df = pd.read_csv(csv_file)
yearly_temps_df.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.49,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,


In [3]:
#converting dt into a date, removing all data before our scope.
yearly_temps_df['dt'] = pd.to_datetime(yearly_temps_df['dt'])
yearly_temps_df['year'] = pd.DatetimeIndex(yearly_temps_df['dt']).year
yearly_temps_df = yearly_temps_df.loc[yearly_temps_df['dt'] >= '1900']

#removing columns outside of our scope, some renaming
yearly_temps_df.rename(columns={"LandAndOceanAverageTemperature":"avg_temp"}, inplace=True)
yearly_temps_df = yearly_temps_df[['year','avg_temp']]
yearly_temps_df.head()

Unnamed: 0,year,avg_temp
1800,1900,13.142
1801,1900,13.777
1802,1900,14.4
1803,1900,15.17
1804,1900,15.955


In [4]:
#grouping by year and taking average.  
grouped_temps = yearly_temps_df.groupby(['year'])
yearly_temps_avg = grouped_temps["avg_temp"].mean()
yearly_temps_cleaned = pd.DataFrame({"avg_temp": yearly_temps_avg})

#resetting index and displaying.
yearly_temps_cleaned = yearly_temps_cleaned.reset_index()
yearly_temps_cleaned.head()

Unnamed: 0,year,avg_temp
0,1900,15.143917
1,1901,15.073333
2,1902,14.958333
3,1903,14.836583
4,1904,14.810417


In [5]:
#Connecting to db
rds_connection_string = "postgres:p4ssword@localhost:5432/disaster_db"
engine = create_engine(f'postgresql://{rds_connection_string}')
engine.table_names()

['disasters', 'temps']

In [7]:
#Loading db
yearly_temps_cleaned.to_sql(name='temps', con=engine, if_exists='append', index=False)

In [8]:
#retreiving from db to verify
pd.read_sql_query('select * from temps order by avg_temp desc', con=engine).head(10)

Unnamed: 0,year,avg_temp
0,2015,16.058583
1,2014,15.913
2,2010,15.8955
3,2005,15.87925
4,2013,15.854417
5,2002,15.829167
6,2007,15.827333
7,2009,15.827167
8,2003,15.826583
9,1998,15.826
