## Feed-Forward Neural Network

In [102]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

In [103]:
!pip install scikeras keras



In [104]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier, KerasRegressor
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score

In [105]:
# Define hyperparameters

TEST_SIZE = 0.4
LEARN_RATE = 0.001
FIRST_LAYER = 128 
ACTIVATION_1 = 'relu'
SECOND_LAYER = 64
ACTIVATION_2 = 'relu'
ACTIVATION_OUT = 'softmax'
LOSS_TYPE = 'categorical_crossentropy'
METRICS = ['accuracy']
EPOCHS = 20


In [106]:
df = pd.read_csv('features.csv')
display(df)

Unnamed: 0,Breed,Feature 0,Feature 1,Feature 2,Feature 3,Feature 4,Feature 5,Feature 6,Feature 7,Feature 8,...,Feature 90,Feature 91,Feature 92,Feature 93,Feature 94,Feature 95,Feature 96,Feature 97,Feature 98,Feature 99
0,brittany_spaniel,-5.279870,-2.208426,5.211814,-8.099501,-13.185220,-0.753768,-3.015471,0.825370,-4.269372,...,-0.465695,-0.674558,0.123558,-1.240681,-1.069499,-0.714136,1.011624,-0.225715,-0.220330,1.140998
1,brittany_spaniel,0.524020,-4.321692,-5.808849,-0.097443,-13.157566,0.439865,-4.521323,-1.382092,-1.304059,...,-0.410429,0.396908,0.344853,0.243527,-0.416206,0.310021,0.025371,-0.221647,-0.306602,-0.855617
2,brittany_spaniel,4.484349,-11.409184,-2.061785,-8.795961,-10.736951,-0.650287,0.697348,8.464226,-2.961214,...,-1.486026,0.636264,-0.315544,1.400241,-0.116869,-0.253275,0.918089,-0.645903,0.545067,-1.274210
3,brittany_spaniel,8.630311,-9.028896,-4.177602,-3.575223,-7.698362,-5.857273,-3.473359,6.915802,1.972978,...,-1.718268,0.056995,0.152345,0.562226,0.279578,-0.277104,-0.956852,-0.531012,0.249064,-1.877944
4,brittany_spaniel,-6.459163,-5.178344,3.182314,-6.884826,-2.663270,-0.779802,-0.788943,4.521932,-3.636744,...,-0.964177,0.368517,-0.274313,-0.534803,0.797063,-0.156896,1.027882,1.130965,0.696718,0.098893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20423,basenji,9.544751,-11.946300,6.532518,6.032665,-0.054745,-2.835599,8.757513,2.183208,-3.308503,...,2.823255,1.785358,-0.389777,2.354301,-0.200981,1.370857,0.069524,0.747879,-1.344105,-1.186251
20424,basenji,-14.772338,-8.129835,6.266210,9.255338,-0.755042,-10.281703,-4.626710,3.558872,-6.597734,...,-1.323281,-0.496995,-1.813071,0.129572,-0.680327,0.759279,-0.898638,0.825506,-1.794241,0.535946
20425,basenji,-0.651727,-14.567093,19.406995,0.646407,10.396984,-6.700563,-4.514317,13.566748,-2.614418,...,-0.489337,2.132769,0.234950,-1.367633,-1.451568,0.228860,-0.094061,0.696946,-0.543697,-1.004654
20426,basenji,-1.703676,-2.682114,14.418438,7.169161,6.828974,-6.077620,-2.837851,11.472995,-8.041656,...,1.308288,-0.401849,1.429980,-0.723179,-2.111574,-1.035175,-0.324995,0.698054,0.101706,0.878638


In [107]:
# # Calculate the number of samples to take for each breed
# breed_counts = df['Breed'].value_counts()
# max_count = breed_counts.max()
# sample_sizes = np.maximum(20, 20 + (breed_counts / max_count * 20).astype(int))

# # Create an empty list to store sampled dataframes
# sampled_dfs = []

# # Iterate over each breed to sample data
# for breed, sample_size in sample_sizes.items():
#     # Get indices of rows corresponding to the current breed
#     breed_indices = df[df['Breed'] == breed].index
    
#     # Randomly sample rows for the current breed
#     sampled_indices = np.random.choice(breed_indices, size=sample_size, replace=False)
    
#     # Append sampled rows to the list
#     sampled_dfs.append(df.loc[sampled_indices])

# # Concatenate all sampled dataframes into one
# sampled_df = pd.concat(sampled_dfs, ignore_index=True)

# display(sample_sizes.head())
# display(sampled_df.head())

# x = sampled_df.drop(columns=['Breed'])
# y = sampled_df['Breed']

In [108]:
x = df.drop(columns=['Breed'])
y = df['Breed']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = TEST_SIZE)
num_classes = 120
print(num_classes)

120


In [109]:
# Encode the categorical target variable
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Convert the encoded labels to one-hot encoding
y_train = to_categorical(y_train_encoded, num_classes=num_classes)
y_test = to_categorical(y_test_encoded, num_classes=num_classes)

In [110]:
model = Sequential()
model.add(Dense(FIRST_LAYER, input_shape=(100,), activation=ACTIVATION_1)) # Assuming 100 features
model.add(Dense(SECOND_LAYER, activation=ACTIVATION_2))
model.add(Dense(num_classes, activation=ACTIVATION_OUT)) # num_classes is the number of unique breed labels
model.compile(optimizer=Adam(learning_rate=LEARN_RATE), loss=LOSS_TYPE, metrics=METRICS)

# Train the model
model.fit(x_train, y_train, epochs=EPOCHS, batch_size=32, validation_data=(x_test, y_test))

# Make predictions
y_pred = model.predict(x_test)





Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [112]:
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score

# Convert continuous predictions to class labels
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert one-hot encoded y_test back to class labels
y_test_classes = np.argmax(y_test, axis=1)

# Inverse transform the predicted and ground truth class labels to original breed names
y_pred_breed = label_encoder.inverse_transform(y_pred_classes)
y_test_breed = label_encoder.inverse_transform(y_test_classes)

# Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test Accuracy: {accuracy}')

#print("Accuracy : ", accuracy_score(y_test, y_pred))
#print("Mean Square Error : ", mean_squared_error(y_test, y_pred))

print("Confusion Matrix for each label : ")
print(multilabel_confusion_matrix(y_test_breed, y_pred_breed))

print("Classification Report : ")
print(classification_report(y_test_breed, y_pred_breed))

Test Accuracy: 0.7689672112464905
Confusion Matrix for each label : 
[[[8107    6]
  [   9   50]]

 [[8059    5]
  [  14   94]]

 [[8104    7]
  [   7   54]]

 [[8068   19]
  [  20   65]]

 [[8074   30]
  [  22   46]]

 [[8081   26]
  [  31   34]]

 [[8084   16]
  [  27   45]]

 [[8069   12]
  [  18   73]]

 [[8085   13]
  [  11   63]]

 [[8070   27]
  [  15   60]]

 [[8093   12]
  [   5   62]]

 [[8072   15]
  [   6   79]]

 [[8087   10]
  [   5   70]]

 [[8093    8]
  [  20   51]]

 [[8089   12]
  [   8   63]]

 [[8087   31]
  [  20   34]]

 [[8095    8]
  [  10   59]]

 [[8111    7]
  [  13   41]]

 [[8071   27]
  [   3   71]]

 [[8082   25]
  [  19   46]]

 [[8112   15]
  [   7   38]]

 [[8097   14]
  [   8   53]]

 [[8093   22]
  [  18   39]]

 [[8104    8]
  [  16   44]]

 [[8100   13]
  [   7   52]]

 [[8078   13]
  [  20   61]]

 [[8067   38]
  [  13   54]]

 [[8096   11]
  [  16   49]]

 [[8092   23]
  [  15   42]]

 [[8082    3]
  [   5   82]]

 [[8111    4]
  [   2   55]]

 