In [45]:
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.image as mpimg
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pickle

In [2]:
def read_and_preprocess(file):
    img = mpimg.imread(file)
    return np.reshape(img, -1)

In [3]:
def get_data():
    PATH = '../data/symbols/extracted_images/'
    labels = os.listdir(PATH)
    classes=[]
    data=[]
    for label in labels:
        print(label)
        folder=PATH+label+"/"
        images= os.listdir(folder)
        for image in images:
            img = read_and_preprocess(folder+image)
            data.append(img)
            classes.append(label)
    return data, classes
    

In [6]:
data, labels = get_data()

beta
pm
Delta
gamma
infty
rightarrow
div
gt
forward_slash
leq
mu
exists
in
times
sin
R
u
9
0
{
7
i
N
G
+
,
6
z
}
1
8
T
S
cos
A
-
f
o
H
sigma
sqrt
pi
int
sum
lim
lambda
neq
log
ldots
forall
lt
theta
ascii_124
M
!
alpha
j
C
]
(
d
v
prime
q
=
4
X
phi
3
tan
e
)
[
b
k
l
geq
2
y
5
p
w


In [7]:
ignore2, X_train, ignore, y_train = train_test_split(np.array(data), np.array(labels), test_size=0.1, shuffle=True,stratify=np.array(labels))

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33, shuffle=True,stratify=y_train)

# SVM

In [9]:
clf = SVC(gamma='auto')

In [10]:
clf.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
print(classification_report(y_test, clf.predict(X_test)))

               precision    recall  f1-score   support

            !       1.00      0.44      0.61        43
            (       1.00      0.20      0.34       472
            )       1.00      0.23      0.37       474
            +       1.00      0.30      0.46       829
            ,       1.00      0.10      0.17        63
            -       0.12      1.00      0.21      1122
            0       1.00      0.23      0.37       228
            1       0.99      0.27      0.42       875
            2       1.00      0.26      0.41       863
            3       1.00      0.28      0.44       360
            4       1.00      0.27      0.43       244
            5       1.00      0.29      0.45       117
            6       1.00      0.23      0.38       103
            7       1.00      0.26      0.41        96
            8       1.00      0.29      0.45       101
            9       1.00      0.33      0.49       123
            =       1.00      0.18      0.31       432
         

  'precision', 'predicted', average, warn_for)


In [14]:
cls_report = classification_report(y_test, clf.predict(X_test),output_dict=True)

  'precision', 'predicted', average, warn_for)


In [15]:
type(cls_report)

dict

In [18]:
pickle.dump(cls_report,open('cls_report_svm.pkl','wb'))

# Naive Bayes

In [22]:
nb_clf = MultinomialNB()

In [23]:
nb_clf.fit(X_train,y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [24]:
print(classification_report(y_test,nb_clf.predict(X_test)))

               precision    recall  f1-score   support

            !       0.08      0.51      0.14        43
            (       0.58      0.72      0.64       472
            )       0.60      0.71      0.65       474
            +       0.59      0.28      0.38       829
            ,       0.14      0.35      0.20        63
            -       0.85      0.75      0.80      1122
            0       0.59      0.38      0.46       228
            1       0.49      0.09      0.15       875
            2       0.81      0.32      0.46       863
            3       0.48      0.61      0.53       360
            4       0.37      0.36      0.36       244
            5       0.37      0.43      0.40       117
            6       0.41      0.57      0.48       103
            7       0.46      0.46      0.46        96
            8       0.44      0.48      0.45       101
            9       0.43      0.55      0.48       123
            =       0.66      0.73      0.69       432
         

  'precision', 'predicted', average, warn_for)


In [25]:
cls_report_nb = classification_report(y_test, nb_clf.predict(X_test),output_dict=True)

  'precision', 'predicted', average, warn_for)


In [26]:
pickle.dump(cls_report_nb,open('cls_report_nb.pkl','wb'))

# Random Forest

In [28]:
clf_rf = RandomForestClassifier()

In [29]:
clf_rf.fit(X_train,y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [30]:
print(classification_report(y_test,clf_rf.predict(X_test)))

               precision    recall  f1-score   support

            !       0.51      0.47      0.49        43
            (       0.55      0.93      0.69       472
            )       0.55      0.93      0.69       474
            +       0.51      0.82      0.63       829
            ,       0.15      0.10      0.12        63
            -       0.85      0.99      0.91      1122
            0       0.45      0.51      0.48       228
            1       0.58      0.77      0.66       875
            2       0.47      0.73      0.57       863
            3       0.55      0.68      0.60       360
            4       0.52      0.37      0.43       244
            5       0.59      0.35      0.44       117
            6       0.64      0.29      0.40       103
            7       0.53      0.33      0.41        96
            8       0.58      0.30      0.39       101
            9       0.64      0.36      0.46       123
            =       0.72      0.77      0.74       432
         

  'precision', 'predicted', average, warn_for)


In [31]:
cls_report_rf = classification_report(y_test, clf_rf.predict(X_test),output_dict=True)

  'precision', 'predicted', average, warn_for)


In [32]:
pickle.dump(cls_report_rf,open('cls_report_rf.pkl','wb'))

# Logistic Regression

In [46]:
clf_lr = LogisticRegression(solver='lbfgs',multi_class='multinomial')

In [47]:
clf_lr.fit(X_train,y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)

In [48]:
print(classification_report(y_test,clf_lr.predict(X_test)))

               precision    recall  f1-score   support

            !       0.00      0.00      0.00        43
            (       0.68      0.86      0.76       472
            )       0.73      0.84      0.78       474
            +       0.58      0.69      0.63       829
            ,       0.29      0.06      0.10        63
            -       0.89      0.93      0.91      1122
            0       0.63      0.69      0.66       228
            1       0.52      0.76      0.61       875
            2       0.64      0.74      0.68       863
            3       0.68      0.78      0.73       360
            4       0.42      0.47      0.44       244
            5       0.47      0.47      0.47       117
            6       0.51      0.40      0.45       103
            7       0.57      0.42      0.48        96
            8       0.52      0.43      0.47       101
            9       0.53      0.63      0.58       123
            =       0.69      0.75      0.72       432
         

  'precision', 'predicted', average, warn_for)


In [49]:
cls_report_lr = classification_report(y_test, clf_lr.predict(X_test),output_dict=True)

  'precision', 'predicted', average, warn_for)


In [50]:
pickle.dump(cls_report_lr,open('cls_report_lr.pkl','wb'))