In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

In [45]:
# Load the datasets
train_file = "datatraining.txt"
test_file1 = "datatest.txt"
test_file2 = "datatest2.txt"

df_train = pd.read_csv(train_file, parse_dates=[0])
df_test1 = pd.read_csv(test_file1, parse_dates=[0])
df_test2 = pd.read_csv(test_file2, parse_dates=[0])

In [46]:
# Preprocessing
X_train = df_train.drop(columns=['date', 'Occupancy'])
y_train = df_train['Occupancy']
X_test1 = df_test1.drop(columns=['date', 'Occupancy'])
y_test1 = df_test1['Occupancy']
X_test2 = df_test2.drop(columns=['date', 'Occupancy'])
y_test2 = df_test2['Occupancy']

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test1_scaled = scaler.transform(X_test1)
X_test2_scaled = scaler.transform(X_test2)

In [43]:
# Machine Learning: Random Forest (for comparison)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled, y_train)
rf_predictions1 = rf_classifier.predict(X_test1_scaled)
rf_accuracy1 = accuracy_score(y_test1, rf_predictions1)
print(f"Random Forest Accuracy on Test Set 1: {rf_accuracy1:.2f}")

rf_predictions2 = rf_classifier.predict(X_test2_scaled)
rf_accuracy2 = accuracy_score(y_test2, rf_predictions2)
print(f"Random Forest Accuracy on Test Set 2: {rf_accuracy2:.2f}")

Random Forest Accuracy on Test Set 1: 0.95
Random Forest Accuracy on Test Set 2: 0.97


In [47]:
# Deep Learning: 1D CNN
X_train_cnn = X_train_scaled.reshape((X_train_scaled.shape[0], X_train_scaled.shape[1], 1))
X_test1_cnn = X_test1_scaled.reshape((X_test1_scaled.shape[0], X_test1_scaled.shape[1], 1))
X_test2_cnn = X_test2_scaled.reshape((X_test2_scaled.shape[0], X_test2_scaled.shape[1], 1))

cnn_model = Sequential()
cnn_model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2))
cnn_model.add(Flatten())
cnn_model.add(Dense(50, activation='relu'))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

cnn_model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_data=(X_test1_cnn, y_test1))
cnn_accuracy1 = cnn_model.evaluate(X_test1_cnn, y_test1)[1]
print(f"1D CNN Accuracy on Test Set 1: {cnn_accuracy1:.2f}")

cnn_accuracy2 = cnn_model.evaluate(X_test2_cnn, y_test2)[1]
print(f"1D CNN Accuracy on Test Set 2: {cnn_accuracy2:.2f}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
1D CNN Accuracy on Test Set 1: 0.97
1D CNN Accuracy on Test Set 2: 0.93


In [48]:
# Print the accuracy of the Random Forest classifier
print(f"Random Forest Accuracy on Test Set 1: {rf_accuracy1:.2f}")
print(f"Random Forest Accuracy on Test Set 2: {rf_accuracy2:.2f}")

# Print the accuracy of the 1D CNN model
print(f"1D CNN Accuracy on Test Set 1: {cnn_accuracy1:.2f}")
print(f"1D CNN Accuracy on Test Set 2: {cnn_accuracy2:.2f}")

# Compare the two methods
print("\nComparison:")
print(f"Random Forest Accuracy on Test Set 1: {rf_accuracy1:.2f}")
print(f"1D CNN Accuracy on Test Set 1: {cnn_accuracy1:.2f}")

print(f"Random Forest Accuracy on Test Set 2: {rf_accuracy2:.2f}")
print(f"1D CNN Accuracy on Test Set 2: {cnn_accuracy2:.2f}")

Random Forest Accuracy on Test Set 1: 0.95
Random Forest Accuracy on Test Set 2: 0.97
1D CNN Accuracy on Test Set 1: 0.97
1D CNN Accuracy on Test Set 2: 0.93

Comparison:
Random Forest Accuracy on Test Set 1: 0.95
1D CNN Accuracy on Test Set 1: 0.97
Random Forest Accuracy on Test Set 2: 0.97
1D CNN Accuracy on Test Set 2: 0.93
