In [1]:
import numpy as np
import pandas as pd


In [3]:
train_b = pd.read_csv("../data/train_symp_path.csv")
test_b = pd.read_csv("../data/test_symp_path.csv")
def process(df):
    df['Symptoms'] = df['Symptoms'].str.lower()  # Convert to lower case
    df['Symptoms'] = df['Symptoms'].str.replace('[^\w\s]', '')  # Remove punctuation
    df['Symptoms'] = df['Symptoms'].str.replace('\n', ' ')  # Remove punctuation
    return df
train_label = process(train_b)['Disease']
test_label = process(test_b)['Disease']

In [77]:
text_train = pd.read_csv("../data/X_train_probabilities.csv")
text_test = pd.read_csv("../data/X_test_probabilities.csv")

bert_train = pd.read_csv("../data/train_probabilities.csv")
bert_test = pd.read_csv("../data/test_probabilities.csv")

p_train = pd.read_csv("../data/train_combined_p.csv",header=None)
p_test = pd.read_csv("../data/test_combined_p.csv",header=None)

In [25]:
stacked_probabilities_train = np.hstack((p_train, text_train))
stacked_probabilities_test = np.hstack((p_test, text_test))

In [27]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded_train = label_encoder.fit_transform(train_label)
y_encoded_test = label_encoder.fit_transform(test_label)

In [28]:
from sklearn.linear_model import LogisticRegression

fusion_model = LogisticRegression().fit(stacked_probabilities_train, y_encoded_train)

In [30]:
from sklearn.metrics import classification_report, accuracy_score

y_pred_log = fusion_model.predict(stacked_probabilities_test)

accuracy = accuracy_score(y_encoded_test, y_pred_log)
print(f'Accuracy of the fusion model: {accuracy * 100:.2f}%')


Accuracy of the fusion model: 87.80%


In [32]:
print(classification_report(y_encoded_test, y_pred_log, target_names=label_encoder.classes_))


                                                      precision    recall  f1-score   support

                                   Atopic Dermatitis       0.84      1.00      0.91        32
          Lupus and other Connective Tissue diseases       0.94      0.94      0.94        17
Scabies Lyme Diease and other Infestations and Bites       0.90      0.72      0.80        25
        Seborrheic Keratoses and other Benign Tumors       0.86      0.75      0.80         8

                                            accuracy                           0.88        82
                                           macro avg       0.89      0.85      0.86        82
                                        weighted avg       0.88      0.88      0.87        82



In [53]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2


In [34]:
# Number of classes
num_classes = 4  # Assuming y_train is one-hot encoded

fusion_model = Sequential([
    Dense(64, input_shape=(stacked_probabilities_train.shape[1],), activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

fusion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
from tensorflow.keras.utils import to_categorical

y_train_encoded = to_categorical(y_encoded_train, num_classes=4)
y_val_encoded = to_categorical(y_encoded_test, num_classes=4)

In [36]:
fusion_model.fit(stacked_probabilities_train, y_train_encoded, validation_data=(stacked_probabilities_test, y_val_encoded), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13d227e50>

In [37]:
val_loss, val_accuracy = fusion_model.evaluate(stacked_probabilities_test, y_val_encoded)
print(f"Validation Accuracy: {val_accuracy:.4f}")


Validation Accuracy: 0.8049


In [39]:
num_classes = 4  # Assuming y_train is one-hot encoded

fusion_model = Sequential([
    Dense(64, input_shape=(stacked_probabilities_train.shape[1],), activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dense(num_classes, activation='softmax')
])

fusion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [40]:
fusion_model.fit(stacked_probabilities_train, y_train_encoded, validation_data=(stacked_probabilities_test, y_val_encoded), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13df1abd0>

In [41]:
val_loss, val_accuracy = fusion_model.evaluate(stacked_probabilities_test, y_val_encoded)
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.8659


In [60]:
num_classes = 4  # Assuming y_train is one-hot encoded

fusion_model = Sequential([
    Dense(64, input_shape=(stacked_probabilities_train.shape[1],), activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

fusion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [61]:
fusion_model.fit(stacked_probabilities_train, y_train_encoded, validation_data=(stacked_probabilities_test, y_val_encoded), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13e08b4d0>

In [62]:
val_loss, val_accuracy = fusion_model.evaluate(stacked_probabilities_test, y_val_encoded)
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.8659


In [75]:
y_pred_test_nn = fusion_model.predict(stacked_probabilities_test)
predicted_classes = np.argmax(y_pred_test_nn, axis=1)




In [73]:
y_encoded_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3])

In [76]:
print(classification_report(y_encoded_test, predicted_classes, target_names=label_encoder.classes_))


                                                      precision    recall  f1-score   support

                                   Atopic Dermatitis       0.82      1.00      0.90        32
          Lupus and other Connective Tissue diseases       0.94      0.88      0.91        17
Scabies Lyme Diease and other Infestations and Bites       0.86      0.72      0.78        25
        Seborrheic Keratoses and other Benign Tumors       1.00      0.75      0.86         8

                                            accuracy                           0.87        82
                                           macro avg       0.90      0.84      0.86        82
                                        weighted avg       0.87      0.87      0.86        82



In [54]:
num_classes = 4  # Assuming y_train is one-hot encoded

fusion_model = Sequential([
    Dense(64, input_shape=(stacked_probabilities_train.shape[1],), activation='relu',kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

fusion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [55]:
fusion_model.fit(stacked_probabilities_train, y_train_encoded, validation_data=(stacked_probabilities_test, y_val_encoded), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13e4b5dd0>

In [56]:
val_loss, val_accuracy = fusion_model.evaluate(stacked_probabilities_test, y_val_encoded)
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.8293
