# Assignment 2
Team Members:
1. Khushvind Maurya (2021MT10238)
2. Aniket Singh (2021MT10256)
2. Rishabh Jaiswal (2021MT10924)

# Import Libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.metrics import classification_report,accuracy_score,precision_score
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
import matplotlib.pyplot as plt

# Import Data

In [None]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [None]:
plt.figure(figsize=(10, 10))
for i in range(5):
    plt.subplot(5, 5, i + 1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(X_train[i], cmap="gray")
plt.show()

In [None]:
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [None]:
print (X_train.shape)

In [None]:
print (y_train.shape)

Before training the data, we need to flatten the data.

In [None]:
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

#Tasks
### A. Using MNIST Handwritten digits DATASET
Implement
1. Decision Tree
2. Random Forest
3. Naïve Bayes Classifier
4. KNN Classifier
5. Neural Network Classifier

And compare the performances using k-fold cross validation and other tuning techniques to do multi-class classification where the idea is to classify the image to one of the ten digits (0-9).

### B. Exploration of Different Evaluation Metrics.
Evaluate your methods using different evaluation metrics.

### C. Parameter Tuning through Grid Search/Cross Validation
Tune the parameters using two powerful techniques of grid search and parameter search.

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

A. Implementing a Decision Tree Classifier, and checking the accuracy using k-fold Cross Validation.

In [None]:
model = DecisionTreeClassifier()
cross_val = (cross_val_score(model,X_train_flat,y_train,cv= 5))
print ("Cross Validation Score =",cross_val.mean())

B. Evaluating the method using different evaluation metrics.

In [None]:
model.fit(X_train_flat,y_train)
predictions = model.predict(X_test_flat)
print (classification_report(y_test,predictions))

C. Parameter Tuning through Grid Search and Random Search

In [None]:
model = DecisionTreeClassifier()
param_grid = {'max_depth': [None, 2, 4, 6, 8],
              'min_samples_split': [2, 4, 6, 8],
              }

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_flat, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

In [None]:
randomized_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=5, scoring='accuracy')
randomized_search.fit(X_train_flat, y_train)

best_params = randomized_search.best_params_
best_score = randomized_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

A. Implementing a random Forest Classifier, and checking the accuracy using k-fold Cross Validation.

In [None]:
model = RandomForestClassifier()
cross_val = (cross_val_score(model,X_train_flat,y_train,cv= 5))
print ("Cross Validation Score =",cross_val.mean())

B. Evaluating the method using different evaluation metrics.

In [None]:
model.fit(X_train_flat,y_train)
predictions = model.predict(X_test_flat)
print (classification_report(y_test,predictions))

C. Parameter Tuning through Grid Search and Random Search

In [None]:
model = RandomForestClassifier()
param_grid = {'max_depth': [None, 2, 4, 6, 8],
              'n_estimators': [80, 90, 100, 110, 120]
              }

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_flat, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

In [None]:
randomized_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=5, cv=5, scoring='accuracy')
randomized_search.fit(X_train_flat, y_train)

best_params = randomized_search.best_params_
best_score = randomized_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Naïve Bayes Classifier

In [None]:
from sklearn.naive_bayes import GaussianNB

A. Implementing a Naive Bayes Classifier, and checking the accuracy using k-fold Cross Validation.

In [None]:
model = GaussianNB()
cross_val = (cross_val_score(model,X_train_flat,y_train,cv= 5))
print ("Cross Validation Score =",cross_val.mean())

B. Evaluating the method using different evaluation metrics.

In [None]:
model.fit(X_train_flat,y_train)
predictions = model.predict(X_test_flat)
print (classification_report(y_test,predictions))

C. Parameter Tuning through Grid Search and Random Search

Since Naive Bayes models in scikit-learn do not typically have hyperparameters to tune through traditional methods like grid search or random search, we have not shown it here.

# KNN Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier

A. Implementing a KNN Classifier, and checking the accuracy using k-fold Cross Validation.

In [None]:
model = KNeighborsClassifier()
cross_val = (cross_val_score(model,X_train_flat,y_train,cv= 5))
print ("Cross Validation Score =",cross_val.mean())

B. Evaluating the method using different evaluation metrics.

In [None]:
model.fit(X_train_flat,y_train)
predictions = model.predict(X_test_flat)
print (classification_report(y_test,predictions))

C. Parameter Tuning through Grid Search and Random Search

In [None]:
model = KNeighborsClassifier()
param_grid = {'n_neighbors': [i for i in range(1,11)]}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_flat, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

In [None]:
randomized_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=5, cv=5, scoring='accuracy')
randomized_search.fit(X_train_flat, y_train)

best_params = randomized_search.best_params_
best_score = randomized_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

# Neural Network Classifier

In [None]:
from sklearn.neural_network import MLPClassifier

A. Implementing a Neural Network Classifier, and checking the accuracy using k-fold Cross Validation.

In [None]:
model = MLPClassifier()
cross_val = (cross_val_score(model,X_train_flat,y_train,cv= 5))
print ("Cross Validation Score =",cross_val.mean())


B. Evaluating the method using different evaluation metrics.

In [None]:
model.fit(X_train_flat,y_train)
predictions = model.predict(X_test_flat)
print (classification_report(y_test,predictions))

C. Parameter Tuning through Grid Search and Random Search

In [None]:
model = MLPClassifier()
param_grid = {'hidden_layer_sizes': [(50,), (100,), (200,), (50,50), (10,10)]}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_flat, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

In [None]:
randomized_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=3, cv=5, scoring='accuracy')
randomized_search.fit(X_train_flat, y_train)

best_params = randomized_search.best_params_
best_score = randomized_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)