# Interview case - Wind Forecast Denmark

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn import svm, metrics
import numpy as np
plt.rcParams["figure.figsize"]=15,10

# Task 

Make a wind model for wind power production in one or both of the two price zones in Denmark (DK1 and DK2) based on the wind and temperature data given in the file WindData.csv (Attached). The file WindData.csv contains actual production for the two Danish price zones, forecasted wind speed (WND) and wind direction (DD) from several points in Denmark. Your task is to make a model based on the historical data.

#### Import the data

In [None]:
base_dir = os.path.dirname(os.getcwd())
filename = os.path.join(base_dir,"data","WindData.csv")

In [None]:
df = pd.read_csv(filename)

In [None]:
df.head()

#### Adjusting the index

In [None]:
df["Date"] = pd.to_datetime(df["Date"], format = "%d/%m/%Y %H:%M")
df = df.set_index("Date").sort_index().astype(float) #Preparing the dataset by 

In [None]:
col_actual_prod = df.groupby(by = lambda x: "Wind Production" in x, axis=1).groups[True]
col_wind_speed = df.groupby(by = lambda x: "WND" in x, axis=1).groups[True]
col_wind_dir = df.groupby(by = lambda x: "DD" in x, axis=1).groups[True]

### Adjusting the variables

In [None]:
X = df[col_wind_speed[:-2].union(col_wind_dir[:-2], sort=False)].to_numpy()

In [None]:
rows = np.arange(X.shape[0])
size_test = int(1.5*X.shape[0]) 
#The factor 1.5 is added in order achieve 80/20-distribution between the training and test set.

In [None]:
rng = np.random.default_rng(5)
index_training = np.unique(rng.choice(rows, size = size_test))
index_test = np.delete(rows, index_training, 0)
ratio = len(index_training)/X.shape[0]

print(f"The ratio of row distribution between the datasets for training and test is {ratio:.2%}.")

In [None]:
X_training = X[list(index_training)]
X_test = X[list(index_test)]

In [None]:
y_dk1 = df[col_actual_prod[0]].to_numpy()
y_dk2 = df[col_actual_prod[1]].to_numpy()

# Wind Power Production model to DK1

In [None]:
regr_dk1 = svm.SVR(kernel = "linear")
regr_dk1.fit(X[index_training],y_dk1[index_training])

In [None]:
y_pred_dk1 = regr_dk1.predict(X[index_test])

In [None]:
"The R2 squared value for the Wind Power Production Model for the price zone is DK1 {:.2f}".format(metrics.r2_score(y_dk1[index_test], y_pred_dk1))

In [None]:
plt.figure()
plt.title(r"Wind Power Production for to DK1 \n ")
plt.plot(y_pred_dk1, label = "Predicted production")
plt.plot(y_dk1[index_test], label = "Actual production")
plt.legend()

In [None]:
regr_dk1.get_params()

# Wind Power Production model of DK2

In [None]:
regr_dk2 = svm.SVR(kernel = "linear")
regr_dk2.fit(X[index_training],y_dk2[index_training])

In [None]:
y_pred_dk2 = regr_dk2.predict(X[index_test])

In [None]:
"The R2 squared value for the Wind Power Production Model for the price zone is DK2 {:.2f}".format(metrics.r2_score(y_dk2[index_test], y_pred_dk2))

In [None]:
plt.figure()
plt.plot(y_pred_dk2, label = "Predicted production")
plt.plot(y_dk2[index_test], label = "Actual production")
plt.legend()

In [None]:
regr_dk2.get_params()