# Linear Regression Model

In [1]:
import warnings
import sys
warnings.simplefilter('ignore')

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib



In [2]:
# read in files as dfs
mpls_df = pd.read_csv('resources/mpls_solar_weather.csv')
olg_df = pd.read_csv('resources/olg_solar_weather.csv')


features = [
    'clouds_all', 'temp_f', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'hour', 'day_of_year',
    'month', 'sin_day', 'cos_day', 'sin_hour', 'cos_hour', 'sin_month', 'cos_month', 'dl_sec'
]

# parameter to predict
target = 'power_delivered'

# get input dimensions
input_dim = len(features)

In [3]:
def get_features_target(df):
    '''Takes in df and returns features and target dataframes for training and validation.'''
    X = df[features].copy()
    y = df[target].copy()
    
    return X, y

In [4]:
# dict of solar datasets to loop over
dfs_dict = {'mpls': mpls_df,
       'olg': olg_df}

# perform cross validation on datasets
for key in dfs_dict:
    # get features and target
    print(dfs_dict[key])
    X, y = get_features_target(dfs_dict[key])
    y = y.values.reshape(-1,1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    X_scaler = StandardScaler().fit(X_train)
    y_scaler = StandardScaler().fit(y_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    y_train_scaled = y_scaler.transform(y_train)
    y_test_scaled = y_scaler.transform(y_test)
    model = LinearRegression()
    model.fit(X_train_scaled, y_train_scaled)
    predictions = model.predict(X_test_scaled)
    MSE = mean_squared_error(y_test_scaled, predictions)
    r2 = model.score(X_test_scaled, y_test_scaled)
    joblib.dump(model, f'{key}_linear.dat')
    print(f"{key}  MSE: {MSE}, R2: {r2}")

                 date_time  power_delivered  energy_delivered  \
0      2017-05-26 15:00:00              445               111   
1      2017-05-26 16:00:00             2280               570   
2      2017-05-26 17:00:00             4186              1047   
3      2017-05-26 18:00:00             4283              1071   
4      2017-05-26 19:00:00             4043              1011   
...                    ...              ...               ...   
19854  2019-09-02 14:00:00             3461               865   
19855  2019-09-02 15:00:00             3061               765   
19856  2019-09-02 16:00:00             1959               490   
19857  2019-09-02 17:00:00              692               173   
19858  2019-09-02 18:00:00              258                65   

       cumulative_energy weather_description  clouds_all  temp_f  pressure  \
0                    111        sky is clear           1  76.316      1007   
1                    681    scattered clouds          40  77.00