# Text Model

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder



In [3]:
# Load your dataset
train_t = pd.read_csv("../data/train_symp.csv",encoding = 'unicode_escape')
test_t = pd.read_csv("../data/test_symp.csv",encoding = 'unicode_escape')
train_t.head()
test_t.head()

Unnamed: 0.1,Unnamed: 0,Disease,Symptoms
0,148,Atopic Dermatitis,"Breast eczema may affect the nipples, areolae..."
1,150,Atopic Dermatitis,\nSymptoms refer to the sensations that people...
2,201,Atopic Dermatitis,"Dry, itchy skin\nThick, rough skin that looks ..."
3,151,Atopic Dermatitis,The major symptoms of XLI include scaling of t...
4,153,Atopic Dermatitis,The dry scaling appearance is most noticeable ...


In [4]:
train_t = train_t[['Disease','Symptoms']]
test_t = test_t[['Disease','Symptoms']]

train_t.dropna()
test_t.dropna()


Unnamed: 0,Disease,Symptoms
0,Atopic Dermatitis,"Breast eczema may affect the nipples, areolae..."
1,Atopic Dermatitis,\nSymptoms refer to the sensations that people...
2,Atopic Dermatitis,"Dry, itchy skin\nThick, rough skin that looks ..."
3,Atopic Dermatitis,The major symptoms of XLI include scaling of t...
4,Atopic Dermatitis,The dry scaling appearance is most noticeable ...
...,...,...
77,Seborrheic Keratoses and other Benign Tumors,Symptoms of seborrheic keratosis are skin grow...
78,Seborrheic Keratoses and other Benign Tumors,Seborrheic keratoses can occur anywhere on the...
79,Seborrheic Keratoses and other Benign Tumors,Dermatofibromas are most often found on the ar...
80,Seborrheic Keratoses and other Benign Tumors,Rosacea is limited to the face and scalp and...


In [5]:
train_t

Unnamed: 0,Disease,Symptoms
0,Atopic Dermatitis,Symptoms of keratosis pilaris may include:You ...
1,Atopic Dermatitis,"Keratosis pilaris can occur at any age, but it..."
2,Atopic Dermatitis,Actinic keratoses vary in appearance. Symptoms...
3,Atopic Dermatitis,Symptoms of seborrheic keratosis are skin grow...
4,Atopic Dermatitis,Symptoms may include:Small bumps that look lik...
...,...,...
326,Seborrheic Keratoses and other Benign Tumors,"On an earlobe, youÂll likely see a round, sol..."
327,Seborrheic Keratoses and other Benign Tumors,A keratoacanthoma appears and grows rapidly ov...
328,Seborrheic Keratoses and other Benign Tumors,Feeling a lump just beneath the skin\nIt may b...
329,Seborrheic Keratoses and other Benign Tumors,The growths can:\n\nBe slightly raised from th...


In [6]:
test_t.groupby(test_t['Disease']).count()

Unnamed: 0_level_0,Symptoms
Disease,Unnamed: 1_level_1
Atopic Dermatitis,32
Lupus and other Connective Tissue diseases,17
Scabies Lyme Diease and other Infestations and Bites,25
Seborrheic Keratoses and other Benign Tumors,8


In [7]:
train_t.groupby(train_t['Disease']).count()

Unnamed: 0_level_0,Symptoms
Disease,Unnamed: 1_level_1
Atopic Dermatitis,128
Lupus and other Connective Tissue diseases,68
Scabies Lyme Diease and other Infestations and Bites,102
Seborrheic Keratoses and other Benign Tumors,33


In [8]:
def process(df):
    df['Symptoms'] = df['Symptoms'].str.lower()  # Convert to lower case
    df['Symptoms'] = df['Symptoms'].str.replace('[^\w\s]', '')  # Remove punctuation
    df['Symptoms'] = df['Symptoms'].str.replace('\n', ' ')  # Remove punctuation
    return df

In [9]:
train_t = process(train_t)
test_t = process(test_t)

In [10]:
Xtrain = train_t['Symptoms']  # Features (text data)
ytrain = train_t['Disease']     # Labels (class for each text entry)
label_encoder = LabelEncoder()
ytrain_e = label_encoder.fit_transform(ytrain)

In [11]:
Xtest = test_t['Symptoms']  # Features (text data)
ytest = test_t['Disease']     # Labels (class for each text entry)
label_encoder = LabelEncoder()
ytest_e = label_encoder.fit_transform(ytest)

In [65]:
Xtest

0     breast eczema  may affect the nipples, areolae...
1      symptoms refer to the sensations that people ...
2     dry, itchy skin thick, rough skin that looks d...
3     the major symptoms of xli include scaling of t...
4     the dry scaling appearance is most noticeable ...
                            ...                        
77    symptoms of seborrheic keratosis are skin grow...
78    seborrheic keratoses can occur anywhere on the...
79    dermatofibromas are most often found on the ar...
80      rosacea is limited to the face and scalp and...
81    a round or oval-shaped waxy or rough bump, typ...
Name: Symptoms, Length: 82, dtype: object

In [66]:
t

NameError: name 'Ytest' is not defined

In [16]:
Xtest.shape

(82,)

In [17]:
Xtrain.shape

(331,)

In [12]:
vectorizer = TfidfVectorizer(max_features=810)
X_train_vectors = vectorizer.fit_transform(Xtrain.values.astype('U'))
X_test_vectors = vectorizer.transform(Xtest.values.astype('U'))

In [13]:
X_test_vectors

<82x810 sparse matrix of type '<class 'numpy.float64'>'
	with 4533 stored elements in Compressed Sparse Row format>

In [14]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn import svm

In [15]:
svmmodel = svm.SVC(kernel='linear', probability=True)  # SVC is often a good starting point

In [16]:
svmmodel.fit(X_train_vectors, ytrain)


In [17]:
from sklearn.metrics import classification_report, accuracy_score

# Predict the labels for the test set
y_pred = svmmodel.predict(X_test_vectors)

# Evaluate the model
print(f'Accuracy: {accuracy_score(ytest, y_pred)}')
print(classification_report(ytest, y_pred, target_names=label_encoder.classes_))


Accuracy: 0.8780487804878049
                                                      precision    recall  f1-score   support

                                   Atopic Dermatitis       0.84      1.00      0.91        32
          Lupus and other Connective Tissue diseases       1.00      0.88      0.94        17
Scabies Lyme Diease and other Infestations and Bites       0.86      0.76      0.81        25
        Seborrheic Keratoses and other Benign Tumors       0.86      0.75      0.80         8

                                            accuracy                           0.88        82
                                           macro avg       0.89      0.85      0.87        82
                                        weighted avg       0.88      0.88      0.88        82



In [18]:
# Predict the labels for the test set
y_pred = svmmodel.predict(X_train_vectors)

# Evaluate the model
print(f'Accuracy: {accuracy_score(ytrain, y_pred)}')
print(classification_report(ytrain, y_pred, target_names=label_encoder.classes_))

Accuracy: 0.9879154078549849
                                                      precision    recall  f1-score   support

                                   Atopic Dermatitis       0.98      0.99      0.99       128
          Lupus and other Connective Tissue diseases       0.99      0.99      0.99        68
Scabies Lyme Diease and other Infestations and Bites       0.99      0.98      0.99       102
        Seborrheic Keratoses and other Benign Tumors       1.00      1.00      1.00        33

                                            accuracy                           0.99       331
                                           macro avg       0.99      0.99      0.99       331
                                        weighted avg       0.99      0.99      0.99       331



In [19]:
text_probabilities = svmmodel.predict_proba(X_test_vectors)
text_probabilities

array([[7.89768558e-01, 1.05959867e-01, 6.34288353e-02, 4.08427401e-02],
       [6.09160603e-01, 3.97346107e-02, 3.45450748e-01, 5.65403866e-03],
       [9.65676989e-01, 8.61786901e-03, 2.02498771e-02, 5.45526521e-03],
       [5.80781693e-01, 8.23347204e-02, 2.91142892e-01, 4.57406951e-02],
       [9.75094957e-01, 1.53221077e-02, 6.75406309e-03, 2.82887179e-03],
       [5.80781693e-01, 8.23347204e-02, 2.91142892e-01, 4.57406951e-02],
       [8.17980382e-01, 2.83172198e-02, 1.49344024e-01, 4.35837422e-03],
       [9.59067418e-01, 1.27298365e-02, 2.46999993e-02, 3.50274619e-03],
       [7.37818945e-01, 4.37009423e-03, 2.00261489e-01, 5.75494724e-02],
       [9.66116027e-01, 1.51200793e-02, 1.43739508e-02, 4.38994256e-03],
       [9.79581561e-01, 3.16949368e-03, 1.44033604e-02, 2.84558533e-03],
       [7.37818945e-01, 4.37009423e-03, 2.00261489e-01, 5.75494724e-02],
       [9.78916525e-01, 5.79538562e-03, 1.26528715e-02, 2.63521815e-03],
       [8.35051873e-01, 6.28509520e-02, 4.88799704e

# CNN Model

In [20]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# CNN libraries
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [21]:
train_data = "../data/images/dermnet/train1"
test_data = "../data/images/dermnet/test1/"

In [22]:
# Preprocess
input_shape = (224, 224, 3) 
num_classes = 4

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_data,
        target_size=input_shape[:2],
        batch_size=32,
        class_mode='categorical')

validation_generator = validation_datagen.flow_from_directory(
        test_data,
        target_size=input_shape[:2],
        batch_size=32,
        class_mode='categorical')

Found 2711 images belonging to 4 classes.
Found 679 images belonging to 4 classes.


In [23]:
def build_model(input_shape, num_classes):
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    return model

In [24]:
model = build_model(input_shape, num_classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [25]:
history = model.fit(
      train_generator,
      steps_per_epoch=50, 
      epochs=10,
      validation_data=validation_generator,
      validation_steps=25)

Epoch 1/10


2023-11-10 23:44:24.977084: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-11-10 23:45:19.391347: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [31]:
test_loss, test_acc = model.evaluate(validation_generator, steps=25)
print('\nTest accuracy:', test_acc)

2023-11-10 15:49:00.563270: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Test accuracy: 0.5375552177429199


In [10]:
image_probabilities = model.predict(validation_generator)


2023-11-04 23:44:10.495731: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




In [11]:
image_probabilities

array([[0.17795499, 0.14731216, 0.29345915, 0.38127375],
       [0.0707379 , 0.0477184 , 0.29970348, 0.5818402 ],
       [0.03478829, 0.13660417, 0.45125857, 0.37734893],
       ...,
       [0.02061159, 0.00706999, 0.11229225, 0.8600261 ],
       [0.65240234, 0.05708424, 0.21682031, 0.07369312],
       [0.725041  , 0.13644284, 0.11727757, 0.02123863]], dtype=float32)

# Combining models

In [26]:
train_b = pd.read_csv("../data/train_symp_path.csv")
test_b = pd.read_csv("../data/test_symp_path.csv")

In [27]:
train_text = process(train_b)['Symptoms']
test_text = process(test_b)['Symptoms']

In [28]:
train_label = process(train_b)['Disease']
test_label = process(test_b)['Disease']

In [29]:
train_c = "../data/images/dermnet/train_combined/"
test_c = "../data/images/dermnet/test_combined/"

In [30]:
# Preprocess
input_shape = (224, 224, 3) 
num_classes = 4

train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

c_train_generator = train_datagen.flow_from_directory(
        train_c,
        target_size=input_shape[:2],
        batch_size=32,
        class_mode='categorical')

c_validation_generator = validation_datagen.flow_from_directory(
        test_c,
        target_size=input_shape[:2],
        batch_size=32,
        class_mode='categorical')

Found 331 images belonging to 4 classes.
Found 82 images belonging to 4 classes.


In [31]:
test_loss, test_acc = model.evaluate(c_validation_generator, steps=25)
print('\nTest accuracy:', test_acc)

2023-11-11 02:21:13.317124: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]



Test accuracy: 0.26829269528388977


In [32]:
image_probabilities_test= model.predict(c_validation_generator)
image_probabilities_train = model.predict(c_train_generator)



2023-11-11 02:21:20.012529: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-11-11 02:21:21.829456: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




In [33]:
label_encoder = LabelEncoder()
y_encoded_train = label_encoder.fit_transform(train_label)
y_encoded_test = label_encoder.fit_transform(test_label)

In [34]:
c_train_vector = vectorizer.fit_transform(train_text.values.astype('U'))


In [35]:
c_test_vector = vectorizer.fit_transform(test_text.values.astype('U'))


In [36]:
y_pred_train_c = svmmodel.predict(X_train_vectors)

# Evaluate the model
print(f'Accuracy: {accuracy_score(train_label, y_pred_train_c)}')
print(classification_report(train_label, y_pred_train_c, target_names=label_encoder.classes_))


Accuracy: 0.9879154078549849
                                                      precision    recall  f1-score   support

                                   Atopic Dermatitis       0.98      0.99      0.99       128
          Lupus and other Connective Tissue diseases       0.99      0.99      0.99        68
Scabies Lyme Diease and other Infestations and Bites       0.99      0.98      0.99       102
        Seborrheic Keratoses and other Benign Tumors       1.00      1.00      1.00        33

                                            accuracy                           0.99       331
                                           macro avg       0.99      0.99      0.99       331
                                        weighted avg       0.99      0.99      0.99       331



In [37]:
c_test_vector

<82x810 sparse matrix of type '<class 'numpy.float64'>'
	with 5573 stored elements in Compressed Sparse Row format>

In [38]:
y_pred_test_c = svmmodel.predict(X_test_vectors)

# Evaluate the model
print(f'Accuracy: {accuracy_score(test_label, y_pred_test_c)}')
print(classification_report(test_label, y_pred_test_c, target_names=label_encoder.classes_))


Accuracy: 0.8780487804878049
                                                      precision    recall  f1-score   support

                                   Atopic Dermatitis       0.84      1.00      0.91        32
          Lupus and other Connective Tissue diseases       1.00      0.88      0.94        17
Scabies Lyme Diease and other Infestations and Bites       0.86      0.76      0.81        25
        Seborrheic Keratoses and other Benign Tumors       0.86      0.75      0.80         8

                                            accuracy                           0.88        82
                                           macro avg       0.89      0.85      0.87        82
                                        weighted avg       0.88      0.88      0.88        82



In [39]:
train_text_probabilities = svmmodel.predict_proba(X_train_vectors)
test_text_probabilities = svmmodel.predict_proba(X_test_vectors)


In [40]:
stacked_probabilities_train = np.hstack((image_probabilities_train, train_text_probabilities))
stacked_probabilities_test = np.hstack((image_probabilities_test, test_text_probabilities))

In [41]:
from sklearn.linear_model import LogisticRegression

fusion_model = LogisticRegression().fit(stacked_probabilities_train, y_encoded_train)

In [42]:
y_pred_log = fusion_model.predict(stacked_probabilities_test)


accuracy = accuracy_score(y_encoded_test, y_pred_log)
print(f'Accuracy of the fusion model: {accuracy * 100:.2f}%')


Accuracy of the fusion model: 86.59%


In [70]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Number of classes
num_classes = 4  # Assuming y_train is one-hot encoded

fusion_model = Sequential([
    Dense(64, input_shape=(stacked_probabilities_train.shape[1],), activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

fusion_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [71]:
from tensorflow.keras.utils import to_categorical

y_train_encoded = to_categorical(y_encoded_train, num_classes=4)
y_val_encoded = to_categorical(y_encoded_test, num_classes=4)

In [72]:
fusion_model.fit(stacked_probabilities_train, y_train_encoded, validation_data=(stacked_probabilities_test, y_val_encoded), epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x165c03d50>

In [74]:
val_loss, val_accuracy = fusion_model.evaluate(stacked_probabilities_test, y_val_encoded)
print(f"Validation Accuracy: {val_accuracy:.4f}")


Validation Accuracy: 0.8537


In [63]:
(stacked_probabilities_train.shape[1],)

(8,)

In [66]:
print(stacked_probabilities_train.T.shape, y_train_encoded.shape)
print(stacked_probabilities_test.shape, y_val_encoded.shape)


(8, 331) (331, 4)
(82, 8) (82, 4)


In [69]:
y_val_encoded.shape[1]

4