In [4]:
import pandas as pd
import numpy as np

""" 
DSC672
Team: Steven Rummel, Ricardo De Leon II, Prabhakaran Raghavan, Sibi Augustin, Tyler Jewell
Project: Energy Consumption and Renewable Energy
Purpose: 

Import solar and wind data from the relevant source files, 
convert string-format date and time into valid datetime values,
remove unneeded columns, normalize column headers, and
export to canonical data source for further analysis.
"""

solar_prod = pd.read_csv('./raw/solararray_production.csv')
wind_prod = pd.read_csv('./raw/windfarm_production.csv')

solar_prod.rename(index=str, columns={"Electricity_KW_HR": "KWH"}, inplace=True)
wind_prod.rename(index=str, columns={"Electricity_KW_HR": "KWH"}, inplace=True)

solar_prod['Date'] = pd.to_datetime(solar_prod['Date'])
solar_prod['Date'] +=  pd.to_timedelta(solar_prod['Hour'], unit='h')
solar_prod.drop(['Hour'], axis=1, inplace=True)
solar_prod['Source'] = 'Solar'

wind_prod['Date'] = pd.to_datetime(wind_prod['Date'])
wind_prod['Date'] +=  pd.to_timedelta(wind_prod['Hour'], unit='h')
wind_prod.drop(['Hour'], axis=1, inplace=True)
wind_prod['Source'] = 'Wind'

power_production = pd.concat([solar_prod, wind_prod], ignore_index=True, sort=False)

In [5]:
sol = len(solar_prod.index)
wind = len(wind_prod.index)
allpower = len(power_production.index)

assert ((sol + wind) == allpower), "Dataframe concatenation failure! Data Lost! Check your logic!"

print("Solar: {} Wind: {} Total: {} Power: {}".format(sol, wind, sol + wind, allpower))
power_production.to_pickle("./processed/power_production.pkl")

Solar: 18704 Wind: 15385 Total: 34089 Power: 34089


In [6]:
for source in power_production['Source'].unique():
    print("Source: {}".format(source))

Source: Solar
Source: Wind
