In [1]:
import math
import pandas as pd
import numpy as np
from scipy import stats
from datetime import datetime
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
%matplotlib inline

# Reading data

In [2]:
train_df = pd.read_csv('data/TrainData1.csv', delimiter = ',').dropna().reset_index(drop=True)
train_df['TIMESTAMP'] = pd.to_datetime(train_df['TIMESTAMP'], format='%Y%m%d %H:%M')
train_df = train_df.set_index('TIMESTAMP')
std_dev = 3
train_df = train_df[(np.abs(stats.zscore(train_df)) < float(std_dev)).all(axis=1)]

In [3]:
pred_df = pd.read_csv('data/WeatherForecastInput1.csv', delimiter = ',')
pred_df['TIMESTAMP'] = pd.to_datetime(pred_df['TIMESTAMP'], format='%Y%m%d %H:%M')
pred_df = pred_df.set_index('TIMESTAMP')

# Model 1

In [81]:
X_train = train_df[['U10', 'V10', 'U100', 'V100']].copy()
y_train = train_df[['POWER']].copy()

X_est = pred_df[['U10', 'V10', 'U100', 'V100']].copy()

polynomial_features = PolynomialFeatures(degree=3)

X_train = pd.DataFrame(polynomial_features.fit_transform(X_train))
X_est = pd.DataFrame(polynomial_features.fit_transform(X_est))

model = LinearRegression()
scores =[]

kfold = KFold(n_splits=2, shuffle=True, random_state=42)

for i, (train, test) in enumerate(kfold.split(X_train, y_train)):
    model.fit(X_train.iloc[train,:], y_train.iloc[train,:])
    y_est = model.predict(X_train.iloc[test,:])
    error = (y_est - y_train.iloc[test,:])**2
    RMSE = math.sqrt(error.sum())/len(y_est)
    scores.append(RMSE)
print(scores)

[0.0013135597399546008, 0.0013071976253938636]


# Model 2

In [85]:
X_train = pd.DataFrame()
X_train['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_train['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)

y_train = train_df[['POWER']].copy()

X_est = pd.DataFrame()
X_est['W10'] = np.sqrt(pred_df['U10']**2 + pred_df['V10']**2)
X_est['W100'] = np.sqrt(pred_df['U100']**2 + pred_df['V100']**2)

polynomial_features = PolynomialFeatures(degree=3)

X_train = pd.DataFrame(polynomial_features.fit_transform(X_train))
X_est = pd.DataFrame(polynomial_features.fit_transform(X_est))

model = LinearRegression()
scores =[]

kfold = KFold(n_splits=2, shuffle=True, random_state=42)

for i, (train, test) in enumerate(kfold.split(X_train, y_train)):
    model.fit(X_train.iloc[train,:], y_train.iloc[train,:])
    y_est = model.predict(X_train.iloc[test,:])
    error = (y_est - y_train.iloc[test,:])**2
    RMSE = math.sqrt(error.sum())/len(y_est)
    scores.append(RMSE)
print(scores)

[0.0012717464942937533, 0.0012607977729050545]


In [86]:
stage1 = pd.DataFrame(model2.predict(X_est2)).to_csv('stage1.csv', index = False)

# Model 3

In [41]:
X_train3 = train_df
X_train3['beta_0'] = 1
X_train3 = X_train3.drop(columns=['U10', 'V10', 'U100', 'V100', 'POWER'])
X_train3['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_train3['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)
X_train3['W10^3'] = X_train3['W10']**3
X_train3['W100^3'] = X_train3['W100']**3

y_train3 = train_df[['POWER']].copy()

X_est3 = pd.DataFrame()
X_est3['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_est3['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)
X_est3['W10^3'] = X_est3['W10']**3
X_est3['W100^3'] = X_est3['W100']**3

model3 = LinearRegression()
model3.fit(X_train3,y_train3)

y_est3 = model3.predict(X_train3)
error3 = (y_est3 - y_train3)**2
RMSE3 = math.sqrt(error3.sum())/len(y_est3)
RMSE3

0.0009078858447008036

# Model 5

In [4]:
X_train2 = pd.DataFrame()

X_train2['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_train2['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)

y_train2 = train_df[['POWER']].copy()

X_est2 = pd.DataFrame()
X_est2['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_est2['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)

polynomial_features = PolynomialFeatures(degree=3)

X_train2 = pd.DataFrame(polynomial_features.fit_transform(X_train2))
X_est2 = pd.DataFrame(polynomial_features.fit_transform(X_est2))

X_train2['HOUR'] = train_df.index.hour 
X_est2['HOUR'] = train_df.index.hour

model2 = LinearRegression()
model2.fit(X_train2,y_train2)

y_est2 = model2.predict(X_train2)
error2 = (y_est2 - y_train2)**2
RMSE2 = math.sqrt(error2.sum())/len(y_est2)
RMSE2

0.0008950030704158654

# Testing second stage model here

In [9]:
X_train = pd.DataFrame()
X_train['U10'] = train_df['U10']
X_train['U100'] = train_df['U100']
X_train['V10'] = train_df['V10']
X_train['V100'] = train_df['V100']
X_train['U10^2'] = X_train['U10']**2
X_train['U100^2'] = X_train['U100']**2
X_train['V10^2'] = X_train['U10']**2
X_train['V100^2'] = X_train['U100']**2
X_train['U10^3'] = X_train['U10']**3
X_train['U100^3'] = X_train['U100']**3
X_train['V10^3'] = X_train['V10']**3
X_train['V100^3'] = X_train['V100']**3

X_train['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_train['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)
X_train['W10^2'] = X_train['W10']**2
X_train['W100^2'] = X_train['W100']**2
X_train['W10^3'] = X_train['W10']**3
X_train['W100^3'] = X_train['W100']**3
X_train['beta_0'] = 1
X_train['hour'] = X_train.index.hour
X_train['year'] = X_train.index.year
X_train['month'] = X_train.index.month

y_train = train_df[['POWER']].copy()

X_pred = pd.DataFrame()

X_pred['U10'] = pred_df['U10']
X_pred['U100'] = pred_df['U100']
X_pred['V10'] = pred_df['V10']
X_pred['V100'] = pred_df['V100']
X_pred['U10^2'] = X_pred['U10']**2
X_pred['U100^2'] = X_pred['U100']**2
X_pred['V10^2'] = X_pred['V10']**2
X_pred['V100^2'] = X_pred['V100']**2
X_pred['U10^3'] = X_pred['U10']**3
X_pred['U100^3'] = X_pred['U100']**3
X_pred['V10^3'] = X_pred['V10']**3
X_pred['V100^3'] = X_pred['V100']**3

X_pred['W10'] = np.sqrt(pred_df['U10']**2 + pred_df['V10']**2)
X_pred['W100'] = np.sqrt(pred_df['U100']**2 + pred_df['V100']**2)
X_pred['W10^2'] = X_pred['W10']**2
X_pred['W100^2'] = X_pred['W100']**2
X_pred['W10^3'] = X_pred['W10']**3
X_pred['W100^3'] = X_pred['W100']**3

X_pred['beta_0'] = 1

X_pred['hour'] = X_pred.index.hour
X_pred['year'] = X_pred.index.year
X_pred['month'] = X_pred.index.month

In [10]:
model = LinearRegression()
model.fit(X_train, y_train)
y_est = model.predict(X_pred)

In [11]:
y_est

array([[0.60991036],
       [0.68115271],
       [0.88562281],
       [0.81306285],
       [0.79470488],
       [0.84321334],
       [0.81036848],
       [0.93958517],
       [0.96049432],
       [0.94517202],
       [0.96622873],
       [0.98461274],
       [0.90874919],
       [0.86298037],
       [0.88558971],
       [0.83786513],
       [0.77405452],
       [0.76529862],
       [0.75378145],
       [0.69534919],
       [0.66397156],
       [0.66143757],
       [0.60417649],
       [0.57452479]])