In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

In [2]:
df_train = pd.read_csv('train_data_FULL.csv')
df_test = pd.read_csv('test_data_FULL.csv')

In [3]:
X_train = df_train.iloc[:, :-1].values  # All columns except the last as features
y_train = df_train.iloc[:, -1].values   # Last column as target
X_test = df_test.iloc[:, 1:].values   # All columns except the first as features

In [4]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Add a directory for TensorBoard logs
tensorboard_callback = TensorBoard(log_dir='D:\\Python\\TBlogs')

In [6]:
# Define the EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=30,         # Number of epochs to wait without improvement
    restore_best_weights=True  # Restore the weights of the best epoch
)

In [7]:
optimizer = Adam(learning_rate=0.001) # default is 0.001

In [8]:
# Define the FCNN model
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='linear')
])

# Compile the model
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

# Model summary
model.summary()

In [9]:
history = model.fit(
    X_train, y_train,
    validation_split=0.2,  # Use 20% of training data for validation
    epochs=200,  # Number of epochs (adjust as needed)
    batch_size=32,  # Batch size
    verbose=1 , # Display training progress
    callbacks=[tensorboard_callback]
)

Epoch 1/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 4.7649 - mae: 1.5620 - val_loss: 0.8635 - val_mae: 0.6756
Epoch 2/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 1.5289 - mae: 0.9106 - val_loss: 0.6238 - val_mae: 0.5801
Epoch 3/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 1.1252 - mae: 0.7764 - val_loss: 0.5356 - val_mae: 0.5415
Epoch 4/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.9035 - mae: 0.6978 - val_loss: 0.5636 - val_mae: 0.5599
Epoch 5/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.8314 - mae: 0.6661 - val_loss: 0.5699 - val_mae: 0.5835
Epoch 6/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 0.7698 - mae: 0.6446 - val_loss: 0.6063 - val_mae: 0.6016
Epoch 7/200
[1m3236/3236[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [10]:
y_pred = model.predict(X_test)

[1m2016/2016[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 496us/step


In [11]:
predictions_df = pd.DataFrame(y_pred)

In [12]:
column_from_df1 = df_test["Unnamed: 0"]
column_from_df2 = predictions_df.iloc[:, -1]

In [13]:
predictions_df = pd.concat([column_from_df1, column_from_df2], axis=1)

In [14]:
predictions_df.columns = ['id', 'target_feature']

In [15]:
predictions_df

Unnamed: 0,id,target_feature
0,2016,8.156261
1,2017,8.333298
2,2018,8.710829
3,2019,7.875579
4,2020,7.242989
...,...,...
64507,193531,5.773967
64508,193532,5.488265
64509,193533,5.438738
64510,193534,5.695416


In [16]:
print(predictions_df.dtypes)

id                  int64
target_feature    float32
dtype: object


In [56]:
sample_submission = pd.read_csv("sample_submission.csv")

In [57]:
sample_submission

Unnamed: 0,id,target_feature
0,2016,5.0
1,2017,5.0
2,2018,5.0
3,2019,5.0
4,2020,5.0
...,...,...
64507,193531,5.0
64508,193532,5.0
64509,193533,5.0
64510,193534,5.0


In [58]:
print(sample_submission.dtypes)

id                  int64
target_feature    float64
dtype: object


In [17]:
# Save submission as CSV file
predictions_df.to_csv('D:\\Python\\Predict the wind speed at a wind turbine\\submissions\\oleg_bissing_submission_6.csv', index=False)