In [69]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import resample

# Load the CSV file into a DataFrame
df = pd.read_csv('/content/hvac_data.csv')

# Convert 'Thermal_comfort' column to numeric (if it's not already)
df['Thermal_comfort'] = pd.to_numeric(df['Thermal_comfort'], errors='coerce')

# Fill missing values with mean for each column
df['Air_temp'].fillna(df['Air_temp'].mean(), inplace=True)
df['Relative_humidity'].fillna(df['Relative_humidity'].mean(), inplace=True)
df['Outdoor_temp'].fillna(df['Outdoor_temp'].mean(), inplace=True)
df['Thermal_comfort'].fillna(df['Thermal_comfort'].mean(), inplace=True)


In [79]:
label_encoder = LabelEncoder()
categorical_cols = ['Air_temp', 'Relative_humidity', 'Outdoor_temp', 'Thermal_comfort']

for col in categorical_cols:
    df[col] = label_encoder.fit_transform(df[col])

In [80]:
df.head()

Unnamed: 0,Air_temp,Relative_humidity,Outdoor_temp,Thermal_comfort
0,170,436,282,22
1,170,436,282,22
2,170,436,282,25
3,170,436,282,17
4,170,436,282,25


In [95]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [96]:
# Split the data into features (X) and target (y)
X = df.drop(columns=['Thermal_comfort'])
y = df['Thermal_comfort']

In [97]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

MACHINE LEARNING

In [98]:
# Train Logistic Regression model
logistic_regression = LogisticRegression()
logistic_regression.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [99]:
# Train Decision Tree model
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)

In [105]:
# Train SVM model
svm = SVC()
svm.fit(X_train, y_train)

In [101]:
# Train Random Forest model
random_forest = RandomForestClassifier()
random_forest.fit(X_train, y_train)

In [106]:
# Make predictions
y_pred_lr = logistic_regression.predict(X_test)
y_pred_dt = decision_tree.predict(X_test)
y_pred_svm = svm.predict(X_test)
y_pred_rf = random_forest.predict(X_test)

In [107]:
from sklearn.metrics import accuracy_score
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print("Logistic Regression Accuracy:", accuracy_lr)
from sklearn.metrics import accuracy_score
accuracy_vm = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", accuracy_svm)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Accuracy:", accuracy_dt)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", accuracy_rf)

Logistic Regression Accuracy: 0.6548310638100107
SVM Accuracy: 0.6664962587721337
Decision Tree Accuracy: 0.7485708974299391
Random Forest Accuracy: 0.7608402658363155


DEEP LEARNING

In [108]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Embedding, Conv1D, LSTM, Bidirectional, Flatten, Concatenate
from sklearn.model_selection import train_test_split

In [109]:
# Reshape the data for CNN
X_train_cnn = np.expand_dims(X_train.values, axis=2)
X_test_cnn = np.expand_dims(X_test.values, axis=2)

In [110]:
# CNN Model
cnn_model = Sequential([
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # Linear activation for regression
])

cnn_model.compile(optimizer='adam', loss='mean_squared_error')

In [111]:
# LSTM Model
lstm_model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], 1)),
    Dense(1, activation='linear')  # Linear activation for regression
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')

In [112]:
# Hybrid Model
input_layer = Input(shape=(X_train_cnn.shape[1], 1))
conv_layer = Conv1D(filters=32, kernel_size=3, activation='relu')(input_layer)
lstm_layer = LSTM(64)(conv_layer)
dense_layer = Dense(64, activation='relu')(lstm_layer)
output_layer = Dense(1, activation='linear')(dense_layer)  # Linear activation for regression

hybrid_model = Model(inputs=input_layer, outputs=output_layer)
hybrid_model.compile(optimizer='adam', loss='mean_squared_error')

In [113]:
# Training
cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))
lstm_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))
hybrid_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7b0b32356a10>

In [115]:
# Evaluate and predict
cnn_accuracy = cnn_model.evaluate(X_test_cnn, y_test)
lstm_accuracy = lstm_model.evaluate(X_test_cnn, y_test)
hybrid_accuracy = hybrid_model.evaluate(X_test_cnn, y_test)



In [116]:
print(f'CNN Accuracy: {cnn_accuracy:.2f}')
print(f'LSTM Accuracy: {lstm_accuracy:.2f}')
print(f'Hybrid Accuracy: {hybrid_accuracy:.2f}')


CNN Accuracy: 12.43
LSTM Accuracy: 12.46
Hybrid Accuracy: 12.71
