In [10]:
import math
import pandas as pd
import numpy as np
from scipy import stats
from datetime import datetime
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
%matplotlib inline

# Reading data

In [11]:
train_df = pd.read_csv('data/TrainData1.csv', delimiter = ',').dropna().reset_index(drop=True)
train_df['TIMESTAMP'] = pd.to_datetime(train_df['TIMESTAMP'], format='%Y%m%d %H:%M')
train_df = train_df.set_index('TIMESTAMP')
std_dev = 3
train_df = train_df[(np.abs(stats.zscore(train_df)) < float(std_dev)).all(axis=1)]

In [12]:
pred_df = pd.read_csv('data/WeatherForecastInput1.csv', delimiter = ',')
pred_df['TIMESTAMP'] = pd.to_datetime(pred_df['TIMESTAMP'], format='%Y%m%d %H:%M')
pred_df = pred_df.set_index('TIMESTAMP')

# Model

In [13]:
X_train = pd.DataFrame()
X_train['W10'] = np.sqrt(train_df['U10']**2 + train_df['V10']**2)
X_train['W100'] = np.sqrt(train_df['U100']**2 + train_df['V100']**2)

y_train = train_df[['POWER']].copy()

X_est = pd.DataFrame()
X_est['W10'] = np.sqrt(pred_df['U10']**2 + pred_df['V10']**2)
X_est['W100'] = np.sqrt(pred_df['U100']**2 + pred_df['V100']**2)

polynomial_features = PolynomialFeatures(degree=3)

X_train = pd.DataFrame(polynomial_features.fit_transform(X_train))
X_est = pd.DataFrame(polynomial_features.fit_transform(X_est))

model = LinearRegression()
scores =[]

kfold = KFold(n_splits=2, shuffle=True, random_state=42)

for i, (train, test) in enumerate(kfold.split(X_train, y_train)):
    model.fit(X_train.iloc[train,:], y_train.iloc[train,:])
    y_est = model.predict(X_train.iloc[test,:])
    error = (y_est - y_train.iloc[test,:])**2
    RMSE = math.sqrt(error.sum())/len(y_est)
    scores.append(RMSE)
print(scores)

[0.0012717464942937533, 0.0012607977729050545]


# Training on entire dataset and making predictions:

In [14]:
model.fit(X_train, y_train)
predictions = model.predict(X_est)
stage1 = pd.DataFrame(predictions).to_csv('stage1.csv', index = False)

In [15]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.0,7.738767,11.812319,59.888518,91.412784,139.530871,463.463304,707.422261,1079.796936,1648.183110
1,1.0,8.408169,12.662342,70.697301,106.467107,160.334902,594.434840,895.193402,1348.122910,2030.215344
2,1.0,9.310131,13.839159,86.678544,128.844386,191.522321,806.988620,1199.558147,1783.097943,2650.507842
3,1.0,9.584993,15.038131,91.872084,144.140372,226.145374,880.593250,1381.584405,2167.601747,3400.803685
4,1.0,10.447402,15.547499,109.148202,162.430966,241.724721,1140.315111,1696.981548,2525.395257,3758.214821
5,1.0,11.587905,17.221163,134.279539,199.557195,296.568444,1556.018520,2312.449783,3436.606910,5107.253416
6,1.0,12.520565,18.685304,156.764559,233.950570,349.140583,1962.780912,2929.193423,4371.437514,6523.797911
7,1.0,13.823701,20.625344,191.094700,285.118583,425.404820,2641.635924,3941.393939,5880.668885,8774.120798
8,1.0,14.426024,21.541395,208.110168,310.756684,464.031708,3002.202275,4482.983376,6694.132544,9995.890405
9,1.0,13.501155,19.849790,182.281180,267.995090,394.014171,2461.006421,3618.243180,5319.646302,7821.098632
