In [None]:
# Importing the libraries
import numpy as np #for numerical operations
import matplotlib.pyplot as plt #for plotting
import pandas as pd  #for data manipulatio
import seaborn as sns  # statistical data visualization
import joblib  # For saving and loading the model

In [None]:
dataset = pd.read_csv('Bunny_Data.csv')
dataset

In [None]:
# Rename the first column to 'Identifier'
dataset.rename(columns={dataset.columns[0]: 'Identifier'}, inplace=True)
dataset['Healthy'] = dataset['Bunny_Data'].apply(lambda x: 1 if x < 20 else 0)

In [None]:
# Remove the Bunny_Data column
dataset.drop(columns=['Bunny_Data'], inplace=True)

In [None]:
dataset.head()

In [None]:
# Features and target variable
x = dataset[['Humidity_Data', 'Light_Data', 'Temperature_Data']].values
y = dataset['Healthy'].values

In [None]:
# Splitting the dataset into the Training set and Test set

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 100)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
print(x_train)
print(x_test)

In [None]:
#Builing and Training the Model using the Logistic regression algorithm

from sklearn.linear_model import LogisticRegression  # Importing Logistic Regression from sklearn
model = LogisticRegression()  # create logistic regression model
model.fit(x_train, y_train)   # train the model with data

In [None]:
#coef - how much y increase with the increasement of one unit of x
#intercept - value of y, when all x are o
model.coef_, model.intercept_

In [None]:
#Evaluate the model
#Calculates the accuracy score, confusion matrix, and classification report to evaluate the model's performance. 
#Plots the confusion matrix using seaborn.

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Uses the trained classifier to make predictions on the test data.
y_pred = model.predict(x_test)
comparison_df = pd.DataFrame({'Actual Y': y_test, 'Predicted Y': y_pred})
print(comparison_df) # do  pd.Dataframe alone to get in table format
print("\n")

# Calculate and print the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy Score: {accuracy}')
print(f'Accuracy Percentage: {accuracy * 100:.2f}%')
print("\n")

# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("\n")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
print("\n")

# Plot Confusion Matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
print("\n")


In [None]:
#Builing and Training the model using Support Vector algorithm
from sklearn import svm
model1 = svm.SVC(kernel='linear')  #create support vector machine model
model1.fit(x_train, y_train)  #train the model with data

In [None]:
#Evaluate the model
#Calculates the accuracy score, confusion matrix, and classification report to evaluate the model's performance. 
#Plots the confusion matrix using seaborn.

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Uses the trained classifier to make predictions on the test data.
y_pred_svm = model1.predict(x_test)

# Create a comparison DataFrame
comparison_df_svm = pd.DataFrame({'Actual Y': y_test, 'Predicted Y': y_pred_svm})
print(comparison_df_svm) # do  pd.Dataframe alone to get in table format
print("\n")

# Calculate and print the accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f'Accuracy Score: {accuracy_svm }')
print(f'Accuracy Percentage: {accuracy_svm * 100:.2f}%')
print("\n")

# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_svm))
print("\n")

# Confusion Matrix
cm_svm = confusion_matrix(y_test, y_pred_svm)
print("Confusion Matrix:")
print(cm_svm)
print("\n")

# Plot Confusion Matrix
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('SVM - Confusion Matrix')
plt.show()
print("\n")


In [None]:
#Builing and Training the model using Decision Tree algorithm
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion='gini',
                                   random_state=100,
                                   max_depth=3, 
                                   min_samples_leaf=3)

#Train the decision tree with training data
classifier.fit(x_train, y_train)

In [None]:
#Plots the trained Decision Tree to visualize its structure.
plt.figure(figsize=(25,20))
from sklearn.tree import plot_tree
plot_tree(classifier,
          feature_names = ['Humidity_Data', 'Light_Data','Temperature_Data'],
          class_names = ['Healthy','Not Healthy'],
         filled=True)

In [None]:
#Evaluate the model
#Calculates the accuracy score, confusion matrix, and classification report to evaluate the model's performance. 
#Plots the confusion matrix using seaborn.

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#Uses the trained classifier to make predictions on the test data.
y_pred_dt = classifier.predict(x_test)

# Create a comparison DataFrame
comparison_df_dt = pd.DataFrame({'Actual Y': y_test, 'Predicted Y': y_pred_dt})
print(comparison_df_dt) 
print("\n")

# Calculate and print the accuracy
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f'Accuracy Score: {accuracy_dt }')
print(f'Accuracy Percentage: {accuracy_dt * 100:.2f}%')
print("\n")

# Detailed classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_dt))
print("\n")

# Confusion Matrix
cm_dt = confusion_matrix(y_test, y_pred_dt)
print("Confusion Matrix:")
print(cm_dt)
print("\n")

# Plot Confusion Matrix
sns.heatmap(cm_dt, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Decision Tree - Confusion Matrix')
plt.show()
print("\n")

In [None]:
'''
Upon evaluating the Logistic Regression algorithm, Support Vector algorithm, and the Decision Tree algorithm,
it is prooven that the Decision Tree algorithm is the best to be deployed in this model, as it gives the 
highest accuracy rates. 
'''

In [None]:
# Saving the model and the scaler
joblib.dump(classifier, 'decision_tree_model.pkl')
joblib.dump(sc, 'scaler.pkl')
print("Model and Scaler have been saved successfully.")