In [44]:
# Dependencies

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Sequential
from keras.layers import Activation, Dense, Embedding, LSTM, SpatialDropout1D, Dropout, Flatten, GRU, Conv1D, MaxPooling1D, Bidirectional


# NLP

from sklearn.feature_extraction .text import TfidfVectorizer

# Processing

from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [45]:
df = pd.read_csv(r"../Data/processedData.csv")

X_train, X_test, y_train, y_test = train_test_split(df[['processedMessage','fingers','tail']],
                                                    df['species_group'])

# Vectorization #

In [46]:
vectorizer = TfidfVectorizer(ngram_range=(1,2),max_features=128)
vectorizer.fit(df['processedMessage'])

tfidf_X_train = (vectorizer.transform(X_train['processedMessage'])).toarray()
tfidf_X_test = (vectorizer.transform(X_test['processedMessage'])).toarray()

label_encoder = LabelEncoder()
label_encoder.fit(df['species_group'])

y_train = to_categorical(label_encoder.transform(y_train),num_classes=12)
y_test = to_categorical(label_encoder.transform(y_test),num_classes=12)

# 1-D CNN #

In [47]:
model_CNN = Sequential()
model_CNN.add(Conv1D(filters=32, kernel_size=3, activation='leaky_relu', input_shape=(tfidf_X_train.shape[1],1)))
model_CNN.add(MaxPooling1D(pool_size=3))
model_CNN.add(Flatten())
model_CNN.add(Dense(units=32, activation = 'leaky_relu'))
model_CNN.add(Dense(units = 16, activation = 'leaky_relu'))
model_CNN.add(Dropout(0.2))
model_CNN.add(Dense(units=12, activation='softmax'))

model_CNN.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model_CNN.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [48]:
# Training

model_CNN.fit(tfidf_X_train, y_train, epochs=200, validation_data=(tfidf_X_test, y_test))

Epoch 1/200


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 65ms/step - accuracy: 0.1223 - loss: 2.4775 - val_accuracy: 0.1520 - val_loss: 2.4492
Epoch 2/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.1945 - loss: 2.4108 - val_accuracy: 0.3120 - val_loss: 2.3613
Epoch 3/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.2558 - loss: 2.2885 - val_accuracy: 0.3120 - val_loss: 2.2332
Epoch 4/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.3140 - loss: 2.1677 - val_accuracy: 0.2800 - val_loss: 2.1220
Epoch 5/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2982 - loss: 2.0367 - val_accuracy: 0.3120 - val_loss: 1.9792
Epoch 6/200
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.3870 - loss: 1.9249 - val_accuracy: 0.3360 - val_loss: 1.8787
Epoch 7/200
[1m12/12[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x247cd553320>

In [49]:
from sklearn.metrics import classification_report

# Testing

y_pred_msg_CNN = model_CNN.predict(tfidf_X_test)
y_pred_classes = y_pred_msg_CNN.argmax(axis=1)
y_test_classes = (y_test).argmax(axis=1)

print(classification_report(y_test_classes, y_pred_classes))

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
              precision    recall  f1-score   support

           0       0.18      0.25      0.21         8
           1       0.70      0.50      0.58        14
           2       0.27      0.60      0.38         5
           3       0.25      0.08      0.12        13
           4       0.12      0.25      0.17         4
           5       0.25      0.12      0.17         8
           6       0.57      0.44      0.50        18
           7       0.62      0.62      0.62        13
           8       0.22      0.40      0.29         5
           9       0.50      0.70      0.58        10
          10       0.47      0.53      0.50        17
          11       0.50      0.40      0.44        10

    accuracy                           0.42       125
   macro avg       0.39      0.41      0.38       125
weighted avg       0.45      0.42      0.42       125



# Gaussian Classifier #

In [50]:
# Finger probability array

from sklearn.naive_bayes import GaussianNB

y_train_class = np.argmax(y_train,axis=1)
y_test_class = np.argmax(y_test,axis=1)

gnb_fingers = GaussianNB()
gnb_fingers.fit(X_train['fingers'].values.reshape(-1, 1),y_train_class)

y_pred_fingers = gnb_fingers.predict_proba(X_test['fingers'].values.reshape(-1, 1))

In [51]:
# Tail probability array

from sklearn.naive_bayes import MultinomialNB

label_encoder = LabelEncoder()
label_encoder.fit(df['tail'])

X_train_tail = (label_encoder.transform(X_train['tail'])).reshape(-1,1)
X_test_tail = label_encoder.transform(X_test['tail']).reshape(-1,1)

mnb_tail = MultinomialNB()
mnb_tail.fit(X_train_tail, y_train_class)

y_pred_tail = mnb_tail.predict_proba(X_test_tail)

y_pred_tail

array([[0.10666667, 0.10933333, 0.04      , ..., 0.13333333, 0.072     ,
        0.10933333],
       [0.10666667, 0.10933333, 0.04      , ..., 0.13333333, 0.072     ,
        0.10933333],
       [0.10666667, 0.10933333, 0.04      , ..., 0.13333333, 0.072     ,
        0.10933333],
       ...,
       [0.10666667, 0.10933333, 0.04      , ..., 0.13333333, 0.072     ,
        0.10933333],
       [0.10666667, 0.10933333, 0.04      , ..., 0.13333333, 0.072     ,
        0.10933333],
       [0.10666667, 0.10933333, 0.04      , ..., 0.13333333, 0.072     ,
        0.10933333]])

In [56]:
y_pred_msg_log = np.log(y_pred_msg_CNN)
y_pred_fingers_log = np.log(y_pred_fingers)
y_pred_tail_log = np.log(y_pred_tail)

arrays = (y_pred_msg_log, y_pred_fingers_log, y_pred_tail_log)
final_probabilities = np.prod(arrays, axis=0)

y_pred_final_classes = np.argmax(final_probabilities, axis=1)
print(classification_report(y_test_classes,y_pred_final_classes))

              precision    recall  f1-score   support

           0       0.27      0.38      0.32         8
           1       0.70      0.50      0.58        14
           2       0.43      0.60      0.50         5
           3       0.50      0.23      0.32        13
           4       0.25      0.25      0.25         4
           5       0.50      0.25      0.33         8
           6       0.64      0.50      0.56        18
           7       0.67      0.92      0.77        13
           8       0.22      0.40      0.29         5
           9       0.54      0.70      0.61        10
          10       0.55      0.65      0.59        17
          11       0.56      0.50      0.53        10

    accuracy                           0.52       125
   macro avg       0.49      0.49      0.47       125
weighted avg       0.54      0.52      0.51       125

