In [2]:
######## import all libraries #########
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,MinMaxScaler,scale
import pandas as pd

from keras.utils import to_categorical
import keras
from keras import activations
from keras.layers import Input, Dense, Dropout, Flatten, Activation
from keras.models import Model, Sequential
from sklearn.metrics import accuracy_score

In [3]:
######## Load Dataset ##########
path = 'letter-recognition.txt'
# read .txt file using pandas.
data=pd.read_csv(path)
# last 16 colomnn as a inputs
X=data.iloc[:,1:]
# first column as a labels.
Y=data.iloc[:,0]
# total number of classes
classes = len(np.unique(Y))
# convert inputs in the range of 0 to 1.
X = MinMaxScaler().fit_transform(X)
# convert letter type labels into numeric type( 0 to 25)
Y = LabelEncoder().fit_transform(Y)
# convert labels into one-hot encode respresntation. if label=2 then [0 0 1....0]
Y = to_categorical(Y,classes)
print(X.shape,Y.shape)

(20000, 16) (20000, 26)


In [4]:
########### train test split ##############
# 70 % training and 30% testing
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.3, random_state = 0)

In [27]:
################ feature-extraction(ANN model) #############
dim = X.shape[1]
# building custom model using keras
model = Sequential()
# first hidden layer with 300 nodes
model.add(Dense(300,activation='relu',input_shape=(dim,)))
# disable 20% of the nodes to reduce overfitting.
model.add(Dropout(0.2))
# first hidden layer with 150nodes
model.add(Dense(150,name="feature",activation='relu'))
# last layers with total classes.
model.add(Dense(classes,activation='softmax'))
model.summary()
# compile the model using these parameters.
model.compile(loss='categorical_crossentropy',optimizer=keras.optimizers.Adam(lr=0.01),metrics=['accuracy'])
# fit the model for training with perticular setup of the parameter.
model.fit(X_train,Y_train,batch_size=2096, epochs=150,verbose=1,validation_data=(X_test,Y_test))

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_18 (Dense)             (None, 300)               5100      
_________________________________________________________________
dropout_9 (Dropout)          (None, 300)               0         
_________________________________________________________________
feature (Dense)              (None, 150)               45150     
_________________________________________________________________
dense_19 (Dense)             (None, 26)                3926      
Total params: 54,176
Trainable params: 54,176
Non-trainable params: 0
_________________________________________________________________
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/1

Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


<tensorflow.python.keras.callbacks.History at 0x1e274aa8f40>

In [28]:
######### extract the features from an intemediate layers ##########
# generate new model from the above model with the output layer of the hidden layer.
extract = Model(model.inputs, model.get_layer('feature').output)
# predict the training and testing data. Generate new features.
features1 = extract.predict(X_train)
features2 = extract.predict(X_test)

In [29]:
########### scaling the data ###############
scaler = MinMaxScaler(feature_range=(0,1))
# scaling the training/teting data into range of 0 to 1.
features1 = scaler.fit_transform(features1)
features2 = scaler.transform(features2)
# convert back to the original labels.
y_train = np.argmax(Y_train, axis=-1)
y_test = np.argmax(Y_test, axis=-1)

In [30]:
######### SVM classifier ##########
from sklearn.svm import SVC
# SVC classifier using 'rbf' kernel because multi-label classes with perticular parameter after hyper-tuning.
model = SVC(C=1000, gamma=0.01,kernel='rbf')
model.fit(features1,y_train)
score = model.score(features2, y_test)
print("SVM score:",score*100)
#print("accuracy :", accuracy_score(y_pred,y_test), "\n")

SVM score: 97.55


In [31]:
######## KNN classifier ##########
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(features1, y_train)
acc = neigh.score(features2, y_test)
print("KNN score:",acc*100)

KNN score: 97.16666666666667


In [32]:
######### Logistic Regression ########
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(features1,y_train)
score = model.score(features2, y_test)
print("logistic Regression score:",score*100)

logistic Regression score: 94.33333333333334


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [33]:
######## DecisionTree Classifier #########
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(features1,y_train)
score = model.score(features2, y_test)
print("Decision Tree score:",score*100)

Decision Tree score: 86.21666666666667


In [34]:
######## Naive bayes Classifier #########
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(features1,y_train)
score = model.score(features2, y_test)
print("Naive bayes score:",score*100)

Naive bayes score: 77.55


In [35]:
######## RF Classifier #########
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(features1,y_train)
score = model.score(features2, y_test)
print("Random Forest score:",score*100)

Random Forest score: 96.78333333333333


In [36]:
######## ELM classifier ##########
from sklearn_extensions.extreme_learning_machines.elm import GenELMClassifier
from sklearn_extensions.extreme_learning_machines.random_layer import RBFRandomLayer, MLPRandomLayer
clf = GenELMClassifier(hidden_layer=MLPRandomLayer(n_hidden=5000, activation_func='tanh')) 
clf.fit(features1, y_train)
res = clf.score(features2, y_test)
print("ELM score:",res*100)

ELM score: 98.31666666666666
