# Imports

In [None]:
import pandas as pd 
import numpy as np
import pickle

from PIL import Image

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.neural_network import MLPClassifier

import matplotlib.pyplot as plt

# Reading csv and spliting data

In [None]:
df_train = pd.read_csv('../input/mnist-in-csv/mnist_train.csv')
df_train

In [None]:
df_train = df_train.reindex(np.random.permutation(df_train.index))

In [None]:
df_train.shape

In [None]:
X_train = df_train.drop(columns = ['label'])
X_train

In [None]:
y_train = df_train['label']
y_train

In [None]:
df_test = pd.read_csv('../input/mnist-in-csv/mnist_test.csv')
df_test

In [None]:
df_test = df_test.reindex(np.random.permutation(df_test.index))

In [None]:
# df_test = df_test.drop(df_test.index[999:9999]) # Don't run more than once

In [None]:
df_test.shape

In [None]:
X_test = df_test.drop(columns = ['label'])
X_test

In [None]:
y_test = df_test['label']
y_test

# Random Forest Classifier


In [None]:
normal_forest = RandomForestClassifier(random_state = 1)

In [None]:
nf_model = normal_forest.fit(X_train, y_train)

In [None]:
nf_predictions = nf_model.predict(X_test)

In [None]:
mean_absolute_error(y_test, nf_predictions)

In [None]:
accuracy_score(y_test, nf_predictions)*100 # %

In [None]:
precision_score(y_test, nf_predictions, average='macro')*100 # %

In [None]:
recall_score(y_test, nf_predictions, average='macro')*100 # %

# Optimizing Hyperparameters for Random Forest



# Neural Network (in progress)


In [None]:
nn = MLPClassifier()

In [None]:
nn.fit(X_train, y_train)

In [None]:
nn_predictions = nn.predict(X_test)

In [None]:
mean_absolute_error(y_test, nn_predictions)

In [None]:
accuracy_score(y_test, nn_predictions)*100 # %

In [None]:
precision_score(y_test, nn_predictions, average='macro')*100 # %

In [None]:
recall_score(y_test, nn_predictions, average='macro')*100 # %

# Saving models


In [None]:
filename = 'rf_model.sav'
pickle.dump(nf_model, open(filename, 'wb'))

In [None]:
filename2 = 'good_nn.sav'
pickle.dump(nn, open(filename2, 'wb'))

# Testing with my own digits

In [None]:
img1 = Image.open("../input/shapes/0.png").convert(mode="1")
array1 = np.array(img1.getdata())

In [None]:
img2 = Image.open("../input/shapes/1.png").convert(mode="1")
array2 = np.array(img2.getdata())

In [None]:
df_myimages = pd.DataFrame(np.array([array1,array2]))

In [None]:
df_myimages

**Predict with my images**

In [None]:
nf_model.predict(df_myimages)

# Predicting using MINST images

In [None]:
i = X_test.loc[200]
plt.imshow(np.array(i).reshape(28,28))

In [None]:
df = pd.DataFrame(i)
df = df.transpose()

In [None]:
nf_model.predict(df)

In [None]:
nn.predict(df)