In [1]:
import math
import pandas as pd
import numpy as np
from scipy import stats
from datetime import datetime
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
import matplotlib.pyplot as plt
%matplotlib inline

# Reading data

In [2]:
train_df = pd.read_csv('data/TrainData2.csv', delimiter = ',').dropna().reset_index(drop=True)
train_df['TIMESTAMP'] = pd.to_datetime(train_df['TIMESTAMP'], format='%Y%m%d %H:%M')
train_df = train_df.set_index('TIMESTAMP')
std_dev = 3
train_df = train_df[(np.abs(stats.zscore(train_df)) < float(std_dev)).all(axis=1)]

In [3]:
pred_df = pd.read_csv('data/WeatherForecastInput2.csv', delimiter = ',')
pred_df['TIMESTAMP'] = pd.to_datetime(pred_df['TIMESTAMP'], format='%Y%m%d %H:%M')
pred_df = pred_df.set_index('TIMESTAMP')

# Feature Engineering

In [4]:
X = pd.DataFrame()
X['U10'] = train_df['U10']
X['U100'] = train_df['U100']
X['V10'] = train_df['V10']
X['V100'] = train_df['V100']
X['U10^2'] = X['U10']**2
X['U100^2'] = X['U100']**2
#X['V10^2'] = X['V10']**2
#X['V100^2'] = X['V100']**2
#X['U10^3'] = X['U10']**3
#X['U100^3'] = X['U100']**3
X['V10^3'] = X['V10']**3
X['V100^3'] = X['V100']**3

X['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)
X['W10^2'] = X['W10']**2
X['W100^2'] = X['W100']**2
X['W10^3'] = X['W10']**3
X['W100^3'] = X['W100']**3
X['beta_0'] = 1
X['hour'] = X.index.hour
X['year'] = X.index.year
X['month'] = X.index.month

y = train_df[['POWER']].copy()

X_pred = pd.DataFrame()

X_pred['U10'] = pred_df['U10']
X_pred['U100'] = pred_df['U100']
X_pred['V10'] = pred_df['V10']
X_pred['V100'] = pred_df['V100']
X_pred['U10^2'] = X_pred['U10']**2
X_pred['U100^2'] = X_pred['U100']**2
#X_pred['V10^2'] = X_pred['V10']**2
#X_pred['V100^2'] = X_pred['V100']**2
#X_pred['U10^3'] = X_pred['U10']**3
#X_pred['U100^3'] = X_pred['U100']**3
X_pred['V10^3'] = X_pred['V10']**3
X_pred['V100^3'] = X_pred['V100']**3

X_pred['W10'] = np.sqrt(pred_df['U10']**2 + pred_df['V10']**2)
X_pred['W100'] = np.sqrt(pred_df['U100']**2 + pred_df['V100']**2)
X_pred['W10^2'] = X_pred['W10']**2
X_pred['W100^2'] = X_pred['W100']**2
X_pred['W10^3'] = X_pred['W10']**3
X_pred['W100^3'] = X_pred['W100']**3

X_pred['beta_0'] = 1

X_pred['hour'] = X_pred.index.hour
X_pred['year'] = X_pred.index.year
X_pred['month'] = X_pred.index.month


# Last Model

In [None]:
N = int(len(X)*0.90)

X_train = X.iloc[:N]
y_train = y.iloc[:N]
X_test = X.iloc[N:]
y_test = y.iloc[N:]

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
y_est = model.predict(X_test)
np.sqrt((((y_test - y_est)**2).sum().values)/len(y_est))*100

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
y_est = model.predict(X_test)
np.sqrt((((y_test - y_est)**2).sum().values)/len(y_est))*100

# Using complete data

In [5]:
model_complete = LinearRegression()
model_complete.fit(X, y)
model_complete.predict(X_pred).clip(min=0,max=1)
#np.savetxt("stage2_complete.csv", y_pred, delimiter=",")

array([[4.21576337e-01],
       [4.92833915e-01],
       [4.33785997e-01],
       [3.85106222e-01],
       [3.75410560e-01],
       [3.69888415e-01],
       [4.06823540e-01],
       [3.71221593e-01],
       [3.34038474e-01],
       [3.19321117e-01],
       [3.02225973e-01],
       [3.81577023e-01],
       [4.85106798e-01],
       [3.28695342e-01],
       [3.77494389e-01],
       [4.72296542e-01],
       [5.76429681e-01],
       [5.67638350e-01],
       [6.15898718e-01],
       [6.38734113e-01],
       [6.74736693e-01],
       [6.44271086e-01],
       [7.64449743e-01],
       [8.29425997e-01],
       [8.16978589e-01],
       [8.37147084e-01],
       [8.43518106e-01],
       [8.38455895e-01],
       [8.24910945e-01],
       [8.43597825e-01],
       [8.23570876e-01],
       [5.68491992e-01],
       [4.15714102e-01],
       [6.54483939e-01],
       [5.86342969e-01],
       [4.91349729e-01],
       [3.79625084e-01],
       [2.97547927e-01],
       [2.82720123e-01],
       [1.79211982e-01],


# Third stage