In [None]:
import numpy as np
import matplotlib.pyplot as plt

# import from different modules
from sklearn.neural_network import MLPClassifier 
#note you could also just import sklearn and then call sklearn.neural_network.MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn import linear_model
%matplotlib inline

In [None]:
# images are uploaded on brightspace, make sure images is in the current directory
from IPython.display import Image, display
display(Image(filename='images/iris_setosa.jpg'))
print("Iris Setosa\n")

display(Image(filename='images/iris_versicolor.jpg'))
print("Iris Versicolor\n")

display(Image(filename='images/iris_virginica.jpg'))
print("Iris Virginica")

# THREE KINDS OF IRIS FLOWERS

What would be useful features to discriminate them on? 


In [None]:
from sklearn.datasets import load_iris
iris = load_iris()

In [None]:
iris.keys()

In [None]:
iris['feature_names'] 

In [None]:
display(Image(filename='images/iris_features.jpg'))
print("Iris Features")

In [None]:
iris['target_names']

In [None]:
X = iris.data
type(X)

In [None]:
X.shape

In [None]:
y = iris.target
type(y)

In [None]:
y.shape

# Create test and training set
Here we create a test and training set, for more options see train_test_split documentation

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0)

In [None]:
X_train.shape, X_test.shape

In [None]:
# this formatter will label the colorbar with the correct target names
x_index = 2 # change this to see different features
y_index = 3 # change this to see different features
formatter = plt.FuncFormatter(lambda i, *args: iris.target_names[int(i)])

plt.scatter(iris.data[:, x_index], iris.data[:, y_index],
            c=iris.target, cmap=plt.colormaps.get_cmap('RdYlBu'))
plt.colorbar(ticks=[0, 1, 2], format=formatter)
plt.clim(-0.5, 2.5)
plt.xlabel(iris.feature_names[x_index])
plt.ylabel(iris.feature_names[y_index]);

# We are going to scale our data, which is z-scoring

Note that we use only the training data to scale! 

The reason is that we do not want to have any influence of the test data on the training data (using .fit)

The test data is therefore scaled again, but according to the standard deviation of the training data (using .transform). 


In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
np.std(X_train,axis=0)

# Load an example classifier

Note the standard sequence of .fit, .predict, and evaluate performance

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
classifier = knn.fit(X_train,y_train)
pred_knn = knn.predict(X_test)
print(classification_report(y_test, pred_knn))
print(confusion_matrix(y_test, pred_knn))

# Load another example classifier, note that we have the same structure of calling the model

Note the standard sequence of .fit, .predict, and evaluate performance

In [None]:
mlpc = MLPClassifier(hidden_layer_sizes=(11,11,11),max_iter=2000)
mlpc.fit(X_train,y_train)
pred_mlpc = mlpc.predict(X_test)
print(classification_report(y_test,pred_mlpc))
print(confusion_matrix(y_test,pred_mlpc))

# LET US VISUALIZE DIFFERENT CLASSIFIERS TO UNDERSTAND THEIR PROPERTIES


In [None]:
fet_1 = 2
fet_2 = 3
nums = [0,1,2,3]
for num in nums: 
    if num==0: 
        clf = KNeighborsClassifier(n_neighbors=5)
#alternative model: outcomment
    elif num==1: 
        clf = DecisionTreeClassifier(max_depth=4)
    elif num==2:  
        clf = linear_model.SGDClassifier()
    elif num==3:
        clf = MLPClassifier(hidden_layer_sizes=(11,11,11),max_iter=2000)       
    classifier = clf.fit(X_train[:,[fet_1,fet_2]],y_train)
    disp = DecisionBoundaryDisplay.from_estimator(classifier, 
                                              X_train[:,[fet_1,fet_2]], 
                                              response_method="predict",
                                              xlabel=iris.feature_names[2], ylabel=iris.feature_names[3],
                                              alpha=0.5,cmap = plt.cm.coolwarm)

    # Plotting the data points    
    disp.ax_.scatter(X_train[:, fet_1], X_train[:, fet_2], 
                 c=y_train, edgecolor="k",cmap = plt.cm.coolwarm)

    plt.title(num)
    plt.show()

# Data can also be transformed to pandas format, which is nice to inspect and use

In [None]:
import pandas as pd 
df = pd.DataFrame(np.concatenate((iris.data, np.array([iris.target]).T), axis=1),
                  columns=iris.feature_names + ['target'])
df.head(100)

# We can compute the means of the data to see which features pop out

In [None]:
print(np.mean(df[df['target']==0],axis=0))
print(np.mean(df[df['target']==1],axis=0))
print(np.mean(df[df['target']==2],axis=0))