Import libraries and modules

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

Load the data from CSV

In [None]:
data = pd.read_csv('data_smfc_data.csv')

Convert TIME to datetime and extract features

In [None]:
data['TIME'] = pd.to_datetime(data['TIME'], format='day-%d; %I%p')
data['Day'] = data['TIME'].dt.day
data['Hour'] = data['TIME'].dt.hour

Create lag features

In [None]:
data['VOLTAGE_LAG_1'] = data['VOLTAGE'].shift(1)
data['VOLTAGE_LAG_2'] = data['VOLTAGE'].shift(2)
data = data.dropna()

Prepare the data

In [None]:
X = data[['Day', 'Hour', 'VOLTAGE_LAG_1', 'VOLTAGE_LAG_2']]
y = data['VOLTAGE']

Split the data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Train the model

In [None]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

Test the model

In [None]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Predict future voltages

In [None]:
future_days = np.arange(27, 33)  # Predict for days 27 to 32
future_hours = [1, 7, 13, 19]  # Same hours as in the dataset

Initialize with the last known data point

In [None]:
last_known_voltage = data.iloc[-1]['VOLTAGE']
last_known_voltage_lag_1 = data.iloc[-1]['VOLTAGE_LAG_1']
last_known_voltage_lag_2 = data.iloc[-1]['VOLTAGE_LAG_2']

future_predictions = []
for day in future_days:
    for hour in future_hours:
        future_data = pd.DataFrame([(day, hour, last_known_voltage_lag_1, last_known_voltage_lag_2)], columns=['Day', 'Hour', 'VOLTAGE_LAG_1', 'VOLTAGE_LAG_2'])
        future_voltage = model.predict(future_data)[0]
        future_predictions.append([day, hour,future_voltage])
        last_known_voltage_lag_2 = last_known_voltage_lag_1
        last_known_voltage_lag_1 = last_known_voltage
        last_known_voltage = future_voltage

Display future predictions

In [None]:
for day, hour, voltage in future_predictions:
    print(f'Day {day}, Hour {hour}: Predicted Voltage = {voltage:.3f} V')

Predict on training set for plotting

In [None]:
y_train_pred = model.predict(X_train)
y_test_pred = y_pred  # Already predicted earlier

R² for train/test sets

In [None]:
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)

Calculate R² scores

In [None]:
print(" ")
print(f'Training R² Score: {r2_train:.4f}')
print(f'Test R² Score: {r2_test:.4f}')

Plotting the results of Training set plot and Test set

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
axs[0].scatter(y_train, y_train_pred, color='royalblue', alpha=0.6, label=f'R²: {r2_train:.4f}')
axs[0].plot([min(y_train), max(y_train)], [min(y_train), max(y_train)], color='red', lw=2)  # y = x line
axs[0].set_title('Training Set')
axs[0].set_xlabel('Actual Voltage')
axs[0].set_ylabel('Predicted Voltage')
axs[0].legend()
    
axs[1].scatter(y_test, y_test_pred, color='seagreen', alpha=0.6, label=f'R²: {r2_test:.4f}')
axs[1].plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', lw=2)  # y = x line
axs[1].set_title('Test Set')
axs[1].set_xlabel('Actual Voltage')
axs[1].set_ylabel('Predicted Voltage')
axs[1].legend()
    
plt.suptitle('Random Forest Regressor: Actual vs. Predicted Voltage')
plt.tight_layout()
plt.show()

Reshape future_prediction for heatmap

In [None]:
future_predictions_df = pd.DataFrame(future_predictions, columns=['Day', 'Hour', 'Voltage'])
heatmap_data = future_predictions_df.pivot(index='Day', columns='Hour', values='Voltage')

Create the heatmap

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap_data, annot=True, fmt='.3f', cmap='viridis')
plt.title('Predicted voltage Heatmap')
plt.xlabel('Hour')
plt.ylabel('Day')
plt.show()