# Devanagari Character Recognition using Machine Learning

## Reading Dataset Images into a CSV file and Creating Sample Datasets

In [None]:
import os
import numpy as np
import pandas as pd
from scipy import misc

root_dir = '/home/rishi/Projects/Doing/Devanagari-Character-Recognition'

image_dir = os.path.join(root_dir, 'Images')

for dir_index, dir_name in enumerate(os.listdir(image_dir)):
    image_df = pd.DataFrame()
    dir_path = os.path.join(image_dir, dir_name)

    for filename in os.listdir(dir_path):
        image_list = misc.imread(os.path.join(dir_path, filename)).flatten().tolist()
        image_list.append(dir_index)
        image_ds = pd.Series(image_list).T
        image_df = image_df.append(image_ds, ignore_index=True).astype(dtype=int)
        
    image_df.to_csv('dataset.csv', index=False, mode='a')
    print("The Knights who say Ni " + str(dir_index))  

df = pd.read_csv('dataset.csv')

df_sample = df.sample(frac=0.5, random_state=10)
df_sample.to_csv('one-half-dataset.csv', index=False)

df_sample = df.sample(frac=0.33, random_state=10)
df_sample.to_csv('one-third-dataset.csv', index=False)

df_sample = df.sample(frac=0.1, random_state=10)
df_sample.to_csv('one-tenth-dataset.csv', index=False)

## Random Forest Classifier Using One-Tenth Dataset sample

In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

df = pd.read_csv('one-tenth-dataset.csv')
lb = preprocessing.LabelBinarizer()

# 64 trees in the forest
clf = RandomForestClassifier(n_estimators=64)
scores = cross_val_score(clf, df.iloc[:, :-1], lb.fit_transform(df.iloc[:, -1])[:, :-1], cv=10)
print("Mean Score : ", scores.mean())

In [None]:
# 128 trees in the forest
clf = RandomForestClassifier(n_estimators=128)
scores = cross_val_score(clf, df.iloc[:, :-1], lb.fit_transform(df.iloc[:, -1])[:, :-1], cv=10)
print("Mean Score : ", scores.mean())

## Multi-Perceptron Neural Network Classifier using One-Tenth Dataset sample

In [None]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPClassifier

df = pd.read_csv('one-tenth-dataset.csv')
lb = preprocessing.LabelBinarizer()

clf = MLPClassifier()
scores = cross_val_score(clf, df.iloc[:, :-1], lb.fit_transform(df.iloc[:, -1])[:, :-1], cv=10)
print("Mean Score : ", scores.mean())

## Random Decision Forest Regressor Using One-Tenth Dataset sample

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor

df = pd.read_csv('one-tenth-dataset.csv')

# 64 trees in the forest
clf = RandomForestRegressor(n_estimators=64)
scores = cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=10)
print("Mean Score : ", scores.mean())

In [None]:
# 128 trees in the forest
clf = RandomForestRegressor(n_estimators=128)
scores = cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=10)
print("Mean Score : ", scores.mean())

## Random Decision Forest Regressor Using One-Third Dataset sample

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor

df = pd.read_csv('one-third-dataset.csv')

# 64 trees in the forest
clf = RandomForestRegressor(n_estimators=64)
scores = cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=10)
print("Mean Score : ", scores.mean())

In [None]:
# 128 trees in the forest
clf = RandomForestRegressor(n_estimators=128)
scores = cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=10)
print("Mean Score : ", scores.mean())

## Random Decision Forest Regressor Using Whole Dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor

df = pd.read_csv('dataset.csv').sample(frac=1, random_state=0)

# 64 trees in the forest
clf = RandomForestRegressor(n_estimators=64)
scores = cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=3)
print("Mean Score : ", scores.mean())

In [None]:
# 128 trees in the forest
clf = RandomForestRegressor(n_estimators=128)
scores = cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=3)
print("Mean Score : ", scores.mean())

## Tuning Parameters

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

df = pd.read_csv('one-tenth-dataset.csv')

parameters = {'n_estimators': range(10, 100, 10)}
clf = GridSearchCV(RandomForestRegressor(), parameters)

clf.fit(df.iloc[:, :-1], df.iloc[:, -1])

print('Best Parameters', clf.best_params_)
print('Best Score : ', clf.best_score_)
print(pd.DataFrame(clf.cv_results_))