In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score

pd.options.mode.chained_assignment = None

# Załaduj swoje dane
file_path = "E:/Studia/Praca dyplomowa/kod/Ostateczna_baza_danych.csv"
data = pd.read_csv(file_path)

# Konwersja kolumny timestamp na format daty
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Filtruj dane na podstawie daty
train_data = data[data['timestamp'] < pd.to_datetime('2019-01-01')]
test_data = data[data['timestamp'] >= pd.to_datetime('2019-01-01')]

# Normalizacja danych

train_data['humidity'] = preprocessing.normalize(train_data[['humidity']], norm='max', axis=0)
train_data['air_temp'] = preprocessing.normalize(train_data[['air_temp']], norm='max', axis=0)
train_data['sun_elev'] = preprocessing.normalize(train_data[['sun_elev']], norm='max', axis=0)
test_data['humidity'] = preprocessing.normalize(test_data[['humidity']], norm='max', axis=0)
test_data['air_temp'] = preprocessing.normalize(test_data[['air_temp']], norm='max', axis=0)
test_data['sun_elev'] = preprocessing.normalize(test_data[['sun_elev']], norm='max', axis=0)

print(test_data)
print(train_data)

                 timestamp     GHI     toa  wind_dir  humidity  precipitation  \
153810 2019-01-01 08:00:00   41.44   56.88    181.18  0.924242            0.0   
153811 2019-01-01 08:30:00   82.87  169.06    185.26  0.930202            0.0   
153812 2019-01-01 09:00:00  124.31  272.15    204.34  0.910303            0.0   
153813 2019-01-01 09:30:00  230.91  364.41    172.97  0.835960            0.0   
153814 2019-01-01 10:00:00  255.79  444.25    211.76  0.811313            0.0   
...                    ...     ...     ...       ...       ...            ...   
162867 2020-01-01 15:00:00  231.50  386.29    342.06  0.591212            0.0   
162868 2020-01-01 15:30:00  206.48  297.18    323.49  0.731313            0.0   
162869 2020-01-01 16:00:00  167.26  196.79    323.50  0.806263            0.0   
162870 2020-01-01 16:30:00  111.51   86.85    325.19  0.816061            0.0   
162871 2020-01-01 17:00:00   55.75    3.58    337.79  0.878283            0.0   

        air_temp  wind_sp  

In [11]:
# Podział danych na cechy (X) i etykietę (y)
X_train_full = train_data[['humidity', 'air_temp', 'sun_elev']]
y_train_full = train_data['GHI']
X_test = test_data[['humidity', 'air_temp', 'sun_elev']]
y_test = test_data['GHI']

# Podział danych treningowych na treningowe i walidacyjne
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.2)

mlp = MLPRegressor(hidden_layer_sizes=(50,50,50), activation='relu', max_iter=500)

mlp.fit(X_train, y_train)

y_val_pred = mlp.predict(X_val)




In [None]:
# Obliczenie metryk dla zestawu walidacyjnego
mse_val = mean_squared_error(y_val, y_val_pred)
rmse_val = np.sqrt(mse_val)
r2_val = r2_score(y_val, y_val_pred)

# # Wyświetlenie wyników dla zestawu walidacyjnego i testowego
print("Validation Set - MSE:", mse_val)
print("Validation Set - RMSE:", rmse_val)
print("Validation Set - R^2 Score:", r2_val)

In [None]:
y_test_pred = mlp.predict(X_test)

mse_test = mean_squared_error(y_test, y_test_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test, y_test_pred)

print("Test Set - MSE:", mse_test)
print("Test Set - RMSE:", rmse_test)
print("Test Set - R^2 Score:", r2_test)