<img src="./images/logo.png" alt="utech academy">

### Persian Handwritten Digit Classifier

In [None]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
import cv2
from scipy import io
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
def load_hoda(training_sample_size=1000, test_sample_size=200, size=5):
    # Load the dataset
    trs = training_sample_size
    tes = test_sample_size
    dataset = io.loadmat('./datasets/hoda.mat')

    # Split training and test set
    X_train_orginal = np.squeeze(dataset['Data'][:trs])
    y_train = np.squeeze(dataset['labels'][:trs])
    X_test_original = np.squeeze(dataset['Data'][trs:trs+tes])
    y_test = np.squeeze(dataset['labels'][trs:trs+tes])

    # Resize the dataset
    X_train_5by5 = [cv2.resize(img, dsize=(size, size)) for img in X_train_orginal]
    X_test_5by_5 = [cv2.resize(img, dsize=(size, size)) for img in X_test_original]
    
    # Reshape the dataset
    X_train = [x.reshape(size*size) for x in X_train_5by5]
    X_test = [x.reshape(size*size) for x in X_test_5by_5]
    
    return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)

In [None]:
X_train, y_train, X_test, y_test = load_hoda(training_sample_size=1000, test_sample_size=200, size=5)

In [None]:
X_train.shape

In [None]:
X_train[0]

In [None]:
y_train.shape

In [None]:
y_train[0]

In [None]:
pca = PCA(n_components=2)
X_r = pca.fit(X_train).transform(X_train)

plt.figure()
target_name = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
lw = 2

for i, target_name in zip([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], target_name):
    plt.scatter(X_r[y_train == i, 0], X_r[y_train == i, 1], alpha=.8, label=target_name)

plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of Hoda dataset')

plt.show()

In [None]:
plt.imshow(X_train[0].reshape((5, 5)), cmap='gray')

In [None]:
clf = LogisticRegression(random_state=42, max_iter=500, verbose=1, multi_class='ovr')

In [None]:
clf.fit(X_train, y_train)

In [None]:
clf.score(X_train, y_train)

In [None]:
clf.score(X_test, y_test)

In [None]:
plt.imshow(X_test[0].reshape((5, 5)), cmap='gray')

In [None]:
clf.predict(X_test[:1, :])

In [None]:
clf.predict_proba(X_test[:1, :])

In [None]:
np.argmax(clf.predict_proba(X_test[:1, :]))

In [None]:
y_pred = clf.predict(X_test)

In [None]:
y_pred

In [None]:
y_test

<div class="alert">
<div style="direction:ltr;text-align:left;font-family:B Tahoma"> Machine Learning Course
<br>Vahid Reza Khazaie<br>
</div>
<a href="https://www.linkedin.com/in/vahidrezakhazaie/">LinkedIn</a> - <a href="https://github.com/vrkh1996">GitHub</a>

</div>