In [None]:
!pip install pandas matplotlib numpy scipy

In [1]:
import pandas as pd
import matplotlib
import numpy as np
import scipy

In [2]:
# For dataset
from sklearn.model_selection import train_test_split

seed = 1234
# Define the column names
column_names = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']

# Load the Boston Housing dataset
boston = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data', header=None, delimiter=r"\s+", names=column_names)

# Split the dataset into features and target variable
X = boston.drop('MEDV', axis=1)
y = boston['MEDV']

In [3]:
# 1. Splitting the dataset into training and testing data for regression (3 marks)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

In [4]:
# 2. Building and training a model using Linear Regression and calculating evaluation metrics (8 marks)
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# Create a Linear Regression model
lr = LinearRegression()

# Train the model
lr.fit(X_train, y_train)

# Predict the test set results
y_pred = lr.predict(X_test)

# Calculate evaluation metrics
mae = metrics.mean_absolute_error(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
r2 = metrics.r2_score(y_test, y_pred)

print("Mean Absolute Error: ", mae)
print("Mean Squared Error: ", mse)
print("Root Mean Squared Error: ", rmse)
print("R^2 Score: ", r2)

Mean Absolute Error:  3.578934913833636
Mean Squared Error:  23.964571384956887
Root Mean Squared Error:  4.895362232251755
R^2 Score:  0.7665382927362872


In [5]:
# 3. Creating a final regression report/table of evaluation metrics (3 marks)
# Create a dictionary of metrics
metrics_dict = {'Mean Absolute Error': [mae], 'Mean Squared Error': [mse], 'Root Mean Squared Error': [rmse], 'R^2 Score': [r2]}

# Convert the dictionary to a pandas DataFrame
metrics_df = pd.DataFrame(metrics_dict)

# Print the DataFrame
print(metrics_df)

   Mean Absolute Error  Mean Squared Error  Root Mean Squared Error  R^2 Score
0             3.578935           23.964571                 4.895362   0.766538


In [6]:
# 4. Building and training a model using KNN and calculating evaluation metrics (8 marks)
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier

iris = load_iris()

X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

accuracy_knn = metrics.accuracy_score(y_test, y_pred)
f1_knn =  metrics.f1_score(y_test, y_pred, average='weighted')
cm_knn = metrics.confusion_matrix(y_test, y_pred)
print("Accuracy: ", accuracy_knn)
print("F1 Score: ", f1_knn)
print("Confusion Matrix: \n", cm_knn)

Accuracy:  1.0
F1 Score:  1.0
Confusion Matrix: 
 [[ 9  0  0]
 [ 0 13  0]
 [ 0  0  8]]


In [7]:
# 5. Building and training a model using Decision Trees and calculating evaluation metrics (8 marks)
from sklearn.tree import DecisionTreeClassifier

# Create a Decision Tree model
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)

accuracy_dt = metrics.accuracy_score(y_test, y_pred)
f1_dt =  metrics.f1_score(y_test, y_pred, average='weighted')
cm_dt = metrics.confusion_matrix(y_test, y_pred)
print("Accuracy: ", accuracy_dt)
print("F1 Score: ", f1_dt)
print("Confusion Matrix: \n", cm_dt)

Accuracy:  1.0
F1 Score:  1.0
Confusion Matrix: 
 [[ 9  0  0]
 [ 0 13  0]
 [ 0  0  8]]


In [8]:
# 6. Building and training a model using Logistic Regression and calculating evaluation metrics (9 marks)
from sklearn.linear_model import LogisticRegression

log_r = LogisticRegression(max_iter=1000)
log_r.fit(X_train, y_train)
y_pred = log_r.predict(X_test)

accuracy_log_r = metrics.accuracy_score(y_test, y_pred)
f1_log_r =  metrics.f1_score(y_test, y_pred, average='weighted')
cm_log_r = metrics.confusion_matrix(y_test, y_pred)
print("Accuracy: ", accuracy_log_r)
print("F1 Score: ", f1_log_r)
print("Confusion Matrix: \n", cm_log_r)

Accuracy:  1.0
F1 Score:  1.0
Confusion Matrix: 
 [[ 9  0  0]
 [ 0 13  0]
 [ 0  0  8]]


In [9]:
# 7. Building and training a model using SVM and calculating evaluation metrics (8 marks)
from sklearn import svm

clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

accuracy_svm = metrics.accuracy_score(y_test, y_pred)
f1_svm =  metrics.f1_score(y_test, y_pred, average='weighted')
cm_svm = metrics.confusion_matrix(y_test, y_pred)
print("Accuracy: ", accuracy_svm)
print("F1 Score: ", f1_svm)
print("Confusion Matrix: \n", cm_svm)

Accuracy:  1.0
F1 Score:  1.0
Confusion Matrix: 
 [[ 9  0  0]
 [ 0 13  0]
 [ 0  0  8]]


In [13]:
# 8. Creating a final classification report/table of evaluation metrics (3 marks)
# for Linear Regression
metrics_dict = {'Model': ['linear_regression'], 'Mean Absolute Error': [mae], 'Mean Squared Error': [mse], 'Root Mean Squared Error': [rmse], 'R^2 Score': [r2]}
metrics_df = pd.DataFrame(metrics_dict)
metrics_str = metrics_df.to_string(index=False)
print(metrics_str)

print('_' * 100)

# for others
metrics_dict = {'Model': ['knn', 'decision_tree', 'logistic_regression', 'svm'], 'Accuracy': [accuracy_knn, accuracy_dt, accuracy_log_r, accuracy_svm], 'F1 Score': [f1_knn, f1_dt, f1_log_r, f1_svm], 'Confusion Matrix': [cm_knn, cm_dt, cm_log_r, cm_svm]}
metrics_df = pd.DataFrame(metrics_dict)
metrics_str = metrics_df.to_string(index=False)
print(metrics_str)

            Model  Mean Absolute Error  Mean Squared Error  Root Mean Squared Error  R^2 Score
linear_regression             3.578935           23.964571                 4.895362   0.766538
____________________________________________________________________________________________________
              Model  Accuracy  F1 Score                   Confusion Matrix
                knn       1.0       1.0 [[9, 0, 0], [0, 13, 0], [0, 0, 8]]
      decision_tree       1.0       1.0 [[9, 0, 0], [0, 13, 0], [0, 0, 8]]
logistic_regression       1.0       1.0 [[9, 0, 0], [0, 13, 0], [0, 0, 8]]
                svm       1.0       1.0 [[9, 0, 0], [0, 13, 0], [0, 0, 8]]
