# Linear Regression Model

In [1]:
import warnings
import sys
warnings.simplefilter('ignore')

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib



In [2]:
# read in files as dfs
mpls_df = pd.read_csv('resources/mpls_solar_weather.csv')
olg_df = pd.read_csv('resources/olg_solar_weather.csv')


features = [
    'clouds_all', 'temp_f', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'hour', 'day_of_year',
    'month', 'sin_day', 'cos_day', 'sin_hour', 'cos_hour', 'sin_month', 'cos_month', 'dl_sec'
]

# parameter to predict
target = 'power_delivered'

# get input dimensions
input_dim = len(features)

In [3]:
def get_features_target(df):
    '''Takes in df and returns features and target dataframes for training and validation.'''
    X = df[features].copy()
    y = df[target].copy()
    
    return X, y

In [4]:
# dict of solar datasets to loop over
dfs_dict = {'mpls': mpls_df,
       'olg': olg_df}

# perform cross validation on datasets
for key in dfs_dict:
    # get features and target
    X, y = get_features_target(dfs_dict[key])
    y = y.values.reshape(-1,1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    X_scaler = StandardScaler().fit(X_train)
    y_scaler = StandardScaler().fit(y_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    y_train_scaled = y_scaler.transform(y_train)
    y_test_scaled = y_scaler.transform(y_test)
    model = LinearRegression()
    model.fit(X_train_scaled, y_train_scaled)
    predictions = model.predict(X_test_scaled)
    MSE = mean_squared_error(y_test_scaled, predictions)
    r2 = model.score(X_test_scaled, y_test_scaled)
    joblib.dump(model, f'{key}_linear.dat')
    print(f"{key}  MSE: {MSE}, R2: {r2}")

mpls  MSE: 0.39362499607236634, R2: 0.5966234922003943
olg  MSE: 0.251546261483711, R2: 0.7377352581532926
