# Tour of machine learning

In [None]:
import numpy as np
from sklearn.datasets import load_iris 

obj = load_iris()
X = obj.data
y = obj.target

In [None]:
import pandas as pd

# Combine data
comb = np.concatenate((X,y.reshape(-1,1)),axis=1)

cols = obj.feature_names.copy()
cols.append('Class')

df = pd.DataFrame(comb,columns=cols)
display(df)

## Explore the data

In [None]:
df.head(10)

In [None]:
print(df.shape)

In [None]:
print(df.describe())

In [None]:
# Class distribution
df['Class'].value_counts()

In [None]:
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize':(6,6), 'figure.dpi':120})

In [None]:
import matplotlib.pyplot as plt
# Box and whisker plots
df.iloc[:,0:4].plot(kind='box', subplots=True, layout=(2,2), sharex=False, sharey=False)
plt.show()

In [None]:
# Histograms
df.iloc[:,0:4].hist()
plt.show()

In [None]:
# Scatter plot matrix
pd.plotting.scatter_matrix(df.iloc[:,0:4])
plt.show()

## Logistic regression with single train/test data

In [None]:
from sklearn.model_selection import train_test_split

# Split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
from sklearn.linear_model import LogisticRegression

# Choose a logistic regression model
model = LogisticRegression(solver='liblinear', multi_class='ovr')

In [None]:
# Fit the model to the training data
model.fit(X_train, y_train)

In [None]:
# Make predictions with the test data
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

# Evaluate predictions
print(accuracy_score(y_test, y_pred))

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

#model = KNeighborsClassifier()
#model = DecisionTreeClassifier()
#model = GaussianNB()
model = SVC(gamma='auto')

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))

In [None]:
models = []
models.append(LogisticRegression(solver='liblinear', multi_class='ovr'))
models.append(KNeighborsClassifier())
models.append(DecisionTreeClassifier())
models.append(GaussianNB())
models.append(SVC(gamma='auto'))

names = ['LR', 'KNN', 'DT', 'NB', 'SVM']

for name, model in zip(names, models):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    ACC = accuracy_score(y_test, y_pred)
    print(f"{name:5s}: acc = {ACC:6.3f}")
