# **ARTIFICIAL NEURAL NETWORKS**

**Data Exploration and Preprocessing**

In [147]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Dense
# Dense: to add layers(hidden, output)
from tensorflow.keras import Sequential
# Sequential: to add layers in sequence, to initialize ann model i.e. initially random weight will be assigned
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [148]:
# Load the dataset
df = pd.read_csv('Alphabets_data.csv')

In [149]:
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [150]:
# Display basic information about the dataset
print(df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB
None


In [154]:
# Number of samples and features
num_samples = df.shape[0]
num_features = df.shape[1] - 1

In [155]:
# Number of classes
num_classes = df['letter'].nunique()

In [156]:
print(f'Number of samples: {num_samples}')
print(f'Number of features: {num_features}')
print(f'Number of classes: {num_classes}')

Number of samples: 20000
Number of features: 16
Number of classes: 26


In [157]:
#Data Preprocessing
from sklearn.preprocessing import StandardScaler

In [158]:
# Check for missing values
missing_values = df.isnull().sum()
print('Missing values in each column:')
print(missing_values)

Missing values in each column:
letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64


In [159]:
# Drop rows with missing values (if any)
df.dropna(inplace=True)

In [160]:

# Separate features and target variable
X = df.drop('letter', axis=1)
y = df['letter']


In [161]:
# Normalize the features
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

In [162]:
# Encode the target variable (if it's not already numerical)
from sklearn.preprocessing import LabelEncoder

In [163]:
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# **Model Implementation**

 Construct a Basic ANN Model

In [164]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [165]:
# Define the ANN model
model = Sequential()
model.add(Dense(64, input_dim=num_features, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))


In [166]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

 Split the Dataset

In [167]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_normalized, y_encoded, test_size=0.2, random_state=42)


Train and Evaluate the Model

In [168]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [169]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy}')

Test accuracy: 0.9382500052452087


# **Hyperparameter Tuning**

In [170]:
!pip install tensorflow



In [171]:
!pip install -U keras-tuner



In [172]:
from sklearn.model_selection import GridSearchCV
import keras_tuner as kt


In [173]:
# tuning for optimizer(adam,rmsprop or sgd:stochastic gradient descent) only
def optimizer_selection(hp):
    #create instance of sequential class
    model = Sequential()
    #Add hidden layer
    model.add(Dense(units=10, activation='relu'))
    #Add output layer
    model.add(Dense(units=1, activation='sigmoid'))
    #Optimizer selection
    optim = hp.Choice('optimizer', values = ['sgd','adam','rmsprop']) #value of optimizer is categorical so use choice()function else use hp.Int()
    model.compile(optimizer=optim, loss = 'binary_crossentropy', metrics = 'accuracy')
    return model

In [174]:
# similar to GridSearchCV() we will use here RandomSearch()
tuner = kt.RandomSearch(
    optimizer_selection,# model name-here Sequential() class is our model
    objective='val_accuracy', # increase accuracy of test data i.e. validation test accuracy
    max_trials=3 # for each optimizer take 3 trials, can select 3 to 10
)

Reloading Tuner from ./untitled_project/tuner0.json


In [175]:
tuner.search(X_train,y_train, epochs = 3, validation_data = (X_test,y_test))

In [176]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'adam'}

In [177]:
model = tuner.get_best_models(num_models=1)[0] # get best model: rmsprop
model.fit(X_train,y_train, epochs = 100, validation_data = (X_test,y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7b1eba2de830>

In [178]:
model.evaluate(X_train,y_train)



[-939571.5, 0.03831249848008156]

# Evaluation

In [180]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Predict the labels on the test set
y_pred = model.predict(X_test)




In [181]:
# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')


In [182]:
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-score: {f1}')

Accuracy: 0.03825
Precision: 0.0014630625
Recall: 0.03825
F1-score: 0.0028183241030580305
