<a href="https://colab.research.google.com/github/rabsonczimba/text/blob/main/Rabson_Zimba_Task_7HD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler
import random

In [2]:
# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

In [3]:
# prompt: reading Tetuan City power consumption.csv

df = pd.read_csv('Tetuan City power consumption.csv')
data = df.copy()


In [4]:
data.head()

Unnamed: 0,DateTime,Temperature,Humidity,Wind Speed,general diffuse flows,diffuse flows,Zone 1 Power Consumption,Zone 2 Power Consumption,Zone 3 Power Consumption
0,1/1/2017 0:00,6.559,73.8,0.083,0.051,0.119,34055.6962,16128.87538,20240.96386
1,1/1/2017 0:10,6.414,74.5,0.083,0.07,0.085,29814.68354,19375.07599,20131.08434
2,1/1/2017 0:20,6.313,74.5,0.08,0.062,0.1,29128.10127,19006.68693,19668.43373
3,1/1/2017 0:30,6.121,75.0,0.083,0.091,0.096,28228.86076,18361.09422,18899.27711
4,1/1/2017 0:40,5.921,75.7,0.081,0.048,0.085,27335.6962,17872.34043,18442.40964


In [5]:
# Simulating the dataset as described (this is a simplified version)
data = pd.DataFrame({
    'Month': np.random.randint(1, 13, size=52560),
    'Day': np.random.randint(1, 32, size=52560),
    'Hour': np.random.randint(0, 24, size=52560),
    'Temperature': np.random.uniform(5, 40, size=52560),
    'Humidity': np.random.uniform(10, 100, size=52560),
    'WindSpeed': np.random.uniform(0, 10, size=52560),
    'PowerConsumption': np.random.uniform(1000, 50000, size=52560)  # Target variable
})

In [7]:
# Feature selection (hour and temperature are important)
X = data[['Month', 'Day', 'Hour', 'Temperature', 'Humidity', 'WindSpeed']]
y = data['PowerConsumption']

In [8]:

# Normalization (Min-Max Scaling)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split (75% training, 25% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)

In [9]:

# Placeholder for results
results = {
    'Model': [],
    'Train_RMSE': [],
    'Test_RMSE': [],
    'Train_MAE': [],
    'Test_MAE': []
}

In [10]:
# Random Forest Model
rf = RandomForestRegressor(n_estimators=30, max_depth=None, min_samples_split=2, min_samples_leaf=1, random_state=42)
rf.fit(X_train, y_train)
rf_train_preds = rf.predict(X_train)
rf_test_preds = rf.predict(X_test)
results['Model'].append('Random Forest')
results['Train_RMSE'].append(np.sqrt(mean_squared_error(y_train, rf_train_preds)))
results['Test_RMSE'].append(np.sqrt(mean_squared_error(y_test, rf_test_preds)))
results['Train_MAE'].append(mean_absolute_error(y_train, rf_train_preds))
results['Test_MAE'].append(mean_absolute_error(y_test, rf_test_preds))


In [11]:

# Decision Tree Model
dt = DecisionTreeRegressor(max_depth=None, min_samples_split=2, min_samples_leaf=10, random_state=42)
dt.fit(X_train, y_train)
dt_train_preds = dt.predict(X_train)
dt_test_preds = dt.predict(X_test)
results['Model'].append('Decision Tree')
results['Train_RMSE'].append(np.sqrt(mean_squared_error(y_train, dt_train_preds)))
results['Test_RMSE'].append(np.sqrt(mean_squared_error(y_test, dt_test_preds)))
results['Train_MAE'].append(mean_absolute_error(y_train, dt_train_preds))
results['Test_MAE'].append(mean_absolute_error(y_test, dt_test_preds))

In [12]:
# Support Vector Regression (SVR)
svr = SVR(C=10, gamma=0.01)
svr.fit(X_train, y_train)
svr_train_preds = svr.predict(X_train)
svr_test_preds = svr.predict(X_test)
results['Model'].append('Support Vector Regression')
results['Train_RMSE'].append(np.sqrt(mean_squared_error(y_train, svr_train_preds)))
results['Test_RMSE'].append(np.sqrt(mean_squared_error(y_test, svr_test_preds)))
results['Train_MAE'].append(mean_absolute_error(y_train, svr_train_preds))
results['Test_MAE'].append(mean_absolute_error(y_test, svr_test_preds))

In [13]:
# Linear Regression Model
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_train_preds = lr.predict(X_train)
lr_test_preds = lr.predict(X_test)
results['Model'].append('Linear Regression')
results['Train_RMSE'].append(np.sqrt(mean_squared_error(y_train, lr_train_preds)))
results['Test_RMSE'].append(np.sqrt(mean_squared_error(y_test, lr_test_preds)))
results['Train_MAE'].append(mean_absolute_error(y_train, lr_train_preds))
results['Test_MAE'].append(mean_absolute_error(y_test, lr_test_preds))

In [18]:
# Convert results to DataFrame and display
results_df = pd.DataFrame(results)
results_df  # This will display the DataFrame in the output

Unnamed: 0,Model,Train_RMSE,Test_RMSE,Train_MAE,Test_MAE
0,Random Forest,5644.49622,14588.541293,4685.553822,12523.367224
1,Decision Tree,11705.333385,16232.879349,9636.605264,13533.655662
2,Support Vector Regression,14136.165731,14163.079352,12247.777208,12286.49971
3,Linear Regression,14134.944333,14163.216567,12246.48327,12288.021105
