# Code Demo
1. Neural Nets on iris data set
2. Logistic Regression on Diagnostic Wisconsin Breast Cancer Database
3. Linear Regression on Boston Housing Market data set
4. K-Nearest Neighbors on iris data set
5. LVQ on iris data set

In [119]:
#Data set import
from ucimlrepo import fetch_ucirepo # used to fetch data from UCI
import pandas as pd # used to fetch data from lib.stat.cmu.edu

# imports for simpleml
import numpy as np
from simpleml.neuralnet import FFNeuralNet, FFLayer
from simpleml.regression import LogisticRegression, LinearRegression
from simpleml import utilities, misc

# Part 1: Feed Forward Neural Net

In [120]:
#Fetch and re-arrange the data nicely for neural net

iris = fetch_ucirepo(id = 53)

X = iris.data.features.to_numpy()
target = iris.data.targets.to_numpy()

y = np.zeros(target.shape, dtype=np.int32)

y[target=='Iris-setosa']=1
y[target=='Iris-versicolor']=2
y[target=='Iris-virginica']=3


i = np.arange(target.size)
target=np.zeros((target.size, 3))
y = np.squeeze(y)
target[i, y-1] = 1

In [121]:
# perform scaling and splitting using simpleml utilities
scaler = utilities.Scaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = utilities.split(X, target, train_split=0.7)



In [122]:
#application of feed forward neural net
nn = FFNeuralNet()
nn.add_layer(FFLayer(5,4))
nn.add_layer(FFLayer(6,5))
nn.add_layer(FFLayer(3,6))

nn.compile()

nn.fit(X_train, y_train)
y_pred = nn.predict(X_test)


In [123]:
# compute a few metrics for ffnn
acc = utilities.accuracy(y_test, y_pred)
f1_score = utilities.f1(y_test, y_pred, kind="MICRO")

print(f'simpleml feed forward neural net accuracy on Iris dataset: {acc}')
print(f'simpleml feed foward neural net f1 score on Iris dataset: {f1_score}')



simpleml feed forward neural net accuracy on Iris dataset: 0.9555555555555556
simpleml feed foward neural net f1 score on Iris dataset: 0.9333333333333333


# Part 2: Logistic Regression

In [124]:
cancer = fetch_ucirepo(id = 17)

X = cancer.data.features.to_numpy()
target = cancer.data.targets.to_numpy()

y = np.zeros(target.shape, dtype=np.int32)

y[target == 'B'] = 1
y[target == 'M'] = 0

In [125]:
# perform scaling and splitting using simpleml utilities
scaler = utilities.Scaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = utilities.split(X, y, train_split=0.7)


In [126]:
#apply logistic regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)



In [127]:
# compute a few metrics for logistic regression
acc = utilities.accuracy(y_test, y_pred)
f1_score = utilities.f1(y_test, y_pred, kind="MICRO")

print(f'simpleml logistic regression accuracy on Wisconsin Breast Cancer dataset: {acc}')
print(f'simpleml logistic regression f1 score on Wisconsin Breast Cancer dataset: {f1_score}')



simpleml logistic regression accuracy on Wisconsin Breast Cancer dataset: 0.9766081871345029
simpleml logistic regression f1 score on Wisconsin Breast Cancer dataset: 0.9811320754716981


# Part 3: Linear Regression


In [128]:

dump = pd.read_csv('http://lib.stat.cmu.edu/datasets/boston', skiprows=21, sep="\s+", header=None).values
cleaned = np.hstack((dump[0::2, :], dump[1::2, :3]))

X = cleaned[:, :-1]
target = cleaned[:, -1:]

In [129]:
# perform scaling and splitting using simpleml utilities
scaler = utilities.Scaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = utilities.split(X, target, train_split=0.7)

In [130]:
#apply linear regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)



In [131]:
mse = utilities.mean_squared_error(y_test, y_pred)
r2 = utilities.r2(y_test, y_pred)

print(f'simpleml linear regression r2 on Boston housing dataset: {r2}')
print(f'simpleml linear regression mean squared error on Boston housing dataset: {mse}')




simpleml linear regression r2 on Boston housing dataset: 0.714990549737271
simpleml linear regression mean squared error on Boston housing dataset: 29.707766192792143


# Part 4: K-Nearest Neighbors

In [132]:
#Fetch and re-arrange the data nicely for KNN (and later LVQ)

iris = fetch_ucirepo(id = 53)

X = iris.data.features.to_numpy()
target = iris.data.targets.to_numpy()

y = np.zeros(target.shape, dtype=np.int32)

y[target=='Iris-setosa']=1
y[target=='Iris-versicolor']=2
y[target=='Iris-virginica']=3


i = np.arange(target.size)
target=np.zeros((target.size, 3))
y = np.squeeze(y)
target[i, y-1] = 1

In [133]:
# perform scaling and splitting using simpleml utilities
scaler = utilities.Scaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = utilities.split(X, target, train_split=0.7)


In [134]:
knn = misc.KNN()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)


In [135]:
# compute a few metrics for KNN
acc = utilities.accuracy(y_test, y_pred)
f1_score = utilities.f1(y_test, y_pred, kind="MICRO")

print(f'simpleml K Nearest Neighbors accuracy on Iris dataset: {acc}')
print(f'simpleml K Nearest Neighbors f1 score on Iris dataset: {f1_score}')

simpleml K Nearest Neighbors accuracy on Iris dataset: 0.9555555555555556
simpleml K Nearest Neighbors f1 score on Iris dataset: 0.9333333333333333


# Part 5: LVQ


In [136]:
#using the same X_train, X_test, y_train, y_test from KNN

lvq = misc.LVQ()
lvq.fit(X_train, y_train, num_neurons=6)
y_pred = lvq.predict(X_test)


In [137]:
# compute a few metrics for LVQ
acc = utilities.accuracy(y_test, y_pred)
f1_score = utilities.f1(y_test, y_pred, kind="MICRO")

print(f'simpleml LVQ accuracy on Iris dataset: {acc}')
print(f'simpleml LVQ F1 score on Iris dataset: {f1_score}')

simpleml LVQ accuracy on Iris dataset: 0.9851851851851852
simpleml LVQ F1 score on Iris dataset: 0.9777777777777777



## References

Fisher,R. A.. (1988). Iris. UCI Machine Learning Repository. https://doi.org/10.24432/C56C76.

Wolberg,William, Mangasarian,Olvi, Street,Nick, and Street,W.. (1995). Breast Cancer Wisconsin (Diagnostic). UCI Machine Learning Repository. https://doi.org/10.24432/C5DW2B.

http://lib.stat.cmu.edu/datasets/boston
