In [42]:
# dependencies
import pandas as pd
from sqlalchemy import create_engine

In [43]:
# setup connection to postgres SQL db
connection_string = "postgres:postgres@localhost:5432/solar_weather_db"
engine = create_engine(f'postgresql://{connection_string}')

# Confirm tables
engine.table_names()

['weather', 'solar_ogilvie', 'solar_minneapolis']

In [44]:
# import data from db
weather_df = pd.read_sql('select * from weather', con=engine)
olg_df = pd.read_sql('select * from solar_ogilvie', con=engine)
mpls_df = pd.read_sql('select * from solar_minneapolis', con=engine)

In [45]:
# preview data
weather_df.head()

Unnamed: 0,weather_date_time,weather_description,clouds_all,temp_f,rain_1h,snow_1h
0,2017-04-01 00:00:00,scattered clouds,40,56.678,0,0
1,2017-04-01 01:00:00,scattered clouds,40,52.25,0,0
2,2017-04-01 02:00:00,sky is clear,1,47.66,0,0
3,2017-04-01 03:00:00,sky is clear,1,42.206,0,0
4,2017-04-01 04:00:00,sky is clear,1,39.812,0,0


In [46]:
olg_df.head()

Unnamed: 0,og_date_time,power_delivered,energy_delivered,cumulative_energy
0,2019-04-21 11:45:00,14856,3714,4853
1,2019-04-21 12:00:00,14892,3723,8576
2,2019-04-21 12:15:00,15524,3881,12457
3,2019-04-21 12:30:00,11136,2784,15241
4,2019-04-21 12:45:00,12876,3219,18460


In [47]:
mpls_df.head()

Unnamed: 0,mpls_date_time,power_delivered,energy_delivered,cumulative_energy
0,2017-05-26 14:45:00,445,111,111
1,2017-05-26 15:45:00,2280,570,681
2,2017-05-26 16:45:00,4186,1047,1728
3,2017-05-26 17:45:00,4283,1071,2799
4,2017-05-26 18:45:00,4043,1011,3809


In [48]:
from datetime import timedelta
# 15 min timedelta
min_15 = timedelta(minutes=15)

# add 15 minutes to each mpls date time to accomodate integration with other datasets
mpls_dates = list(mpls_df.mpls_date_time)

for i in range(len(mpls_dates)):
    mpls_dates[i] += min_15
    
mpls_df["date_time"] = mpls_dates

mpls_df.drop("mpls_date_time", axis=1, inplace=True)
mpls_df.head()

Unnamed: 0,power_delivered,energy_delivered,cumulative_energy,date_time
0,445,111,111,2017-05-26 15:00:00
1,2280,570,681,2017-05-26 16:00:00
2,4186,1047,1728,2017-05-26 17:00:00
3,4283,1071,2799,2017-05-26 18:00:00
4,4043,1011,3809,2017-05-26 19:00:00


In [49]:
olg_df.head()

Unnamed: 0,og_date_time,power_delivered,energy_delivered,cumulative_energy
0,2019-04-21 11:45:00,14856,3714,4853
1,2019-04-21 12:00:00,14892,3723,8576
2,2019-04-21 12:15:00,15524,3881,12457
3,2019-04-21 12:30:00,11136,2784,15241
4,2019-04-21 12:45:00,12876,3219,18460


In [51]:
# rename date_time columns to 'date_time' in each df
olg_df.rename(columns={"og_date_time": "date_time"}, inplace=True)
weather_df.rename(columns={"weather_date_time": "date_time"}, inplace=True)

In [52]:
# test for renamed column
weather_df.head()

Unnamed: 0,date_time,weather_description,clouds_all,temp_f,rain_1h,snow_1h
0,2017-04-01 00:00:00,scattered clouds,40,56.678,0,0
1,2017-04-01 01:00:00,scattered clouds,40,52.25,0,0
2,2017-04-01 02:00:00,sky is clear,1,47.66,0,0
3,2017-04-01 03:00:00,sky is clear,1,42.206,0,0
4,2017-04-01 04:00:00,sky is clear,1,39.812,0,0


In [53]:
mpls_weather_df = pd.merge(mpls_df.copy(), weather_df.copy(), how='inner',
                        on='date_time', suffixes=['m_', 'w_'])

mpls_weather_df.head()

Unnamed: 0,power_delivered,energy_delivered,cumulative_energy,date_time,weather_description,clouds_all,temp_f,rain_1h,snow_1h
0,445,111,111,2017-05-26 15:00:00,overcast clouds,90,65.12,0,0
1,2280,570,681,2017-05-26 16:00:00,overcast clouds,90,66.758,0,0
2,4186,1047,1728,2017-05-26 17:00:00,scattered clouds,40,68.432,0,0
3,4283,1071,2799,2017-05-26 18:00:00,scattered clouds,40,71.582,0,0
4,4043,1011,3809,2017-05-26 19:00:00,scattered clouds,40,73.616,0,0


In [54]:
olg_weather_df = pd.merge(olg_df.copy(), weather_df.copy(), how='inner',
                        on='date_time', suffixes=['o_', 'w_'])

olg_weather_df.head()

Unnamed: 0,date_time,power_delivered,energy_delivered,cumulative_energy,weather_description,clouds_all,temp_f,rain_1h,snow_1h
0,2019-04-21 12:00:00,14892,3723,8576,light rain,1,52.718,0,0
1,2019-04-21 13:00:00,17908,4477,22937,proximity shower rain,90,52.196,1,0
2,2019-04-21 14:00:00,18784,4696,41582,moderate rain,75,53.474,1,0
3,2019-04-21 15:00:00,17464,4366,59588,light rain,40,56.768,0,0
4,2019-04-21 16:00:00,14720,3680,75476,broken clouds,75,61.466,0,0


In [56]:
# output merged datasets to csv
mpls_weather_df.to_csv('mpls_solar_weather.csv')
olg_weather_df.to_csv('olg_solar_weather.csv')