In [1]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# Load datasets
train_data = pd.read_csv('sensor_dataset.csv')
train_data.head()

Unnamed: 0,last_changed,aqi_sciosense,eco2,humidity,sound,temperature,tvoc,fteg,radar_sensor,occupancy
0,2024-12-13T17:42:39,180,1270,31.36,493.79125,24.62,1494,180.42,1,1
1,2024-12-13T17:42:40,179,1267,31.36,568.813846,24.62,1485,181.36,1,1
2,2024-12-13T17:42:41,179,1267,31.34,554.6325,24.62,1485,182.36,1,1
3,2024-12-13T17:42:42,182,1274,31.34,584.920833,24.62,1506,180.36,1,1
4,2024-12-13T17:42:43,182,1274,31.34,651.671667,24.62,1506,176.12,1,1


In [3]:
# Split features and labels
train_data_X = train_data.iloc[:,1:-1]
train_data_y = train_data.iloc[:,-1]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(train_data_X, train_data_y, test_size=0.35, shuffle=False)

In [5]:
y_train.value_counts()

occupancy
3    265
5    186
2    158
4    132
1    102
0     89
Name: count, dtype: int64

In [6]:
y_test.value_counts()

occupancy
3    194
2    170
5     70
4     54
1     15
Name: count, dtype: int64

In [7]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Normalizes the input features
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [8]:
# Fit model
model = pipeline.fit(X_train, y_train)

# Predict training data labels and get accuracy scores
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)

# Display evaluation metrics
print("Model Performance:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R2): {r2:.2f}")

Model Performance:
Mean Absolute Error (MAE): 0.76
Mean Squared Error (MSE): 0.96
Root Mean Squared Error (RMSE): 0.98
R-squared (R2): 0.15
