<h1 style="text-align:center">Evaluate Regression and Classifier Metrics</h1>
<p style="text-align:center">Robert Evans</p>
<p style="text-align:center">School of Technology & Engineering, National University</p>
<p style="text-align:center">DDS-8555: Predictive Analysis</p>
<p style="text-align:center">Dr. Mohammad Yavarimanesh</p>
<p style="text-align:center">January 12, 2025</p>

## Load Required Libraries

In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_percentage_error as MAPE
from sklearn.metrics import classification_report as cr
from numpy import percentile
import numpy as np

## Load the Data Set

In [2]:
iris = pd.DataFrame(datasets.load_iris().data)
iris.columns = datasets.load_iris().feature_names
iris['type'] = datasets.load_iris().target
iris['type'] = iris['type'].astype('object')
iris

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),type
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


## Creating a New Feature

New feature is the sepal length * sepal width divided by petal length * petal width

In [3]:
iris['new'] = (iris.iloc[:,0] * iris.iloc[:,1]) / (iris.iloc[:,2] * iris.iloc[:,3])

In [4]:
# Show the dataset with the new feature
iris

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),type,new
0,5.1,3.5,1.4,0.2,0,63.750000
1,4.9,3.0,1.4,0.2,0,52.500000
2,4.7,3.2,1.3,0.2,0,57.846154
3,4.6,3.1,1.5,0.2,0,47.533333
4,5.0,3.6,1.4,0.2,0,64.285714
...,...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2,1.680602
146,6.3,2.5,5.0,1.9,2,1.657895
147,6.5,3.0,5.2,2.0,2,1.875000
148,6.2,3.4,5.4,2.3,2,1.697262


## Split the data set

This will be an 80/20 train/test split

In [5]:
X_train, X_test, y_train, y_test = tts(iris.iloc[:,0:4], iris.iloc[:,4], test_size=0.2, random_state=42,
stratify=iris.iloc[:,4])

In [6]:
def myf(y, yhat):
  ME = np.round(np.mean(y-yhat), 3)
  MPE = np.round(np.mean((y-yhat)/y), 3)
  myMAE = np.round(MAE(y, yhat), 3)
  myMSE = np.round(MSE(y, yhat), 3)
  myMAPE = np.round(MAPE(y, yhat), 3)
  print(f'\tME: {ME}\n\tMPE: {MPE}\n\tMAE: {myMAE}\n\tMSE: {myMSE}\n\tMAPE: {myMAPE}')

## Evaluating Estimators for Sepal Width

In [7]:
est1=np.mean(X_train['petal length (cm)'])

est2=np.mean(X_train['sepal length (cm)']-X_train['petal width (cm)'])

est1=[est1]*len(y_test)

est2=[est2]*len(y_test)

print("Results for Mean of Petal Length")
myf(X_test['sepal width (cm)'],est1)
print()
print("Results for Mean of Sepal Length minus Mean of Petal Width")
myf(X_test['sepal width (cm)'],est2)

Results for Mean of Petal Length
	ME: -0.677
	MPE: -0.237
	MAE: 0.694
	MSE: 0.602
	MAPE: 0.242

Results for Mean of Sepal Length minus Mean of Petal Width
	ME: -1.543
	MPE: -0.522
	MAE: 1.543
	MSE: 2.526
	MAPE: 0.522


## Evaluating Classifiers

In [8]:
est3=percentile(X_train['sepal length (cm)'], [25, 50])
y_hat=np.zeros(len(y_test))
y_hat[X_test['sepal length (cm)']>est3[0]]=1
y_hat[X_test['sepal length (cm)']>est3[1]]=2
y_hat=y_hat.astype('int')
print("Classification of 1st and 2nd Quartile")
print(cr(y_test.astype('int'),y_hat))

print()

est4=percentile(X_train['sepal length (cm)'], [50,75])
y_hat2=np.zeros(len(y_test))
y_hat2[X_test['sepal length (cm)']>est3[0]]=1
y_hat2[X_test['sepal length (cm)']>est3[1]]=2
y_hat2=y_hat2.astype('int')
print("Classification of 3rd and 4th Quartile")
print(cr(y_test.astype('int'),y_hat2))

Classification of 1st and 2nd Quartile
              precision    recall  f1-score   support

           0       0.71      0.50      0.59        10
           1       0.33      0.20      0.25        10
           2       0.59      1.00      0.74        10

    accuracy                           0.57        30
   macro avg       0.55      0.57      0.53        30
weighted avg       0.55      0.57      0.53        30


Classification of 3rd and 4th Quartile
              precision    recall  f1-score   support

           0       0.71      0.50      0.59        10
           1       0.33      0.20      0.25        10
           2       0.59      1.00      0.74        10

    accuracy                           0.57        30
   macro avg       0.55      0.57      0.53        30
weighted avg       0.55      0.57      0.53        30

