### 6COM2007 - Coursework 2: Intelligence
- Author: Marcelo Hernandez 
- University of Hertfordshire - ID: 23033126
- April 25, 2024

In [10]:
# Libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import numpy as np
import joblib

In [11]:
# Load the dataset
df = pd.read_csv('../IAS_DATA/6com2007-3_cleaned.csv')

# Select relevant features and the target variable
features = df[['CO2_room', 'Relative_humidity_room', 'Indoor_temperature_room']]
target = df['Satisfaction']

# Normalise the features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_features = scaler.fit_transform(features)

print('Features:', features.shape)
print('Target:', target.shape)

Features: (2764, 3)
Target: (2764,)


In [12]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_features, target, test_size=0.2, random_state=42)

In [13]:
# Create the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model on the training data
rf_model.fit(X_train, y_train)

In [14]:
# Predict on the test data
y_pred_rf = rf_model.predict(X_test)

# Calculate RMSE for Random Forest model
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))
print(f"Test RMSE with Random Forest model: {rmse_rf:.2f}")

# Calculate R^2 Score
r2_rf = r2_score(y_test, y_pred_rf) * 100
print(f"R^2 Score as a percentage with Random Forest model: {r2_rf:.2f}%")

Test RMSE with Random Forest model: 3.63
R^2 Score as a percentage with Random Forest model: 88.23%


In [15]:
# Save the model for later use
joblib.dump(rf_model, 'random_forest_model.pkl')

# Save the scaler for later use
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

#### Apply Trained Model to "New_day" Dataset to Predict Satisfaction

In [16]:
# Load the new day's dataset
new_day_df = pd.read_csv('../IAS_DATA/New_day_cleaned.csv')

# Select relevant features for the model
new_day_features = new_day_df[['CO2_room', 'Relative_humidity_room', 'Indoor_temperature_room']]

# Normalise the features using the previously fitted scaler
new_day_scaled_features = scaler.transform(new_day_features)

print('New day features shape:', new_day_scaled_features.shape)

New day features shape: (96, 3)


In [17]:
# Predict satisfaction scores for the new day's data
predicted_satisfaction = rf_model.predict(new_day_scaled_features)

In [18]:
# Add predictions to the dataframe
new_day_df['Predicted_Satisfaction'] = predicted_satisfaction

print(new_day_df.head())

# Save the DataFrame with the new column back to a CSV file
new_day_df.to_csv('../IAS_DATA/New_day_with_Predicted_Satisfaction.csv', index=False)

         Date   Time    CO2_room  Relative_humidity_room  Lighting_room  \
0  12/04/2012  00:00  198.967871               53.699924      13.814564   
1  12/04/2012  00:15  198.618986               52.509405      13.131954   
2  12/04/2012  00:30  198.828400               51.386424      14.304592   
3  12/04/2012  00:45  198.422066               54.805172      13.680236   
4  12/04/2012  01:00  198.931140               52.759482      13.547493   

   Meteo_Rain  Meteo_Sun_dusk  Meteo_Wind  Meteo_Sun_light_in_west_facade  \
0         0.0        1.222544    0.346890                               0   
1         0.0        0.658924    0.447105                               0   
2         0.0        1.605216    0.272932                               0   
3         0.0        1.376596    0.285899                               0   
4         0.0        0.752146    0.724640                               0   

   Meteo_Sun_light_in_east_facade  ...  Outdoor_relative_humidity_Sensor  \
0         

PermissionError: [Errno 13] Permission denied: '../IAS_DATA/New_day_with_Predicted_Satisfaction.csv'