In [1]:
#Dataset: https://www.kaggle.com/datasets/ehababoelnaga/anemia-types-classification
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [2]:
#load the data
data = pd.read_csv('Data/diagnosed_cbc_data_v4.csv')
print(data.head())

    WBC  LYMp  NEUTp  LYMn  NEUTn   RBC  HGB    HCT   MCV   MCH  MCHC    PLT  \
0  10.0  43.2   50.1   4.3    5.0  2.77  7.3   24.2  87.7  26.3  30.1  189.0   
1  10.0  42.4   52.3   4.2    5.3  2.84  7.3   25.0  88.2  25.7  20.2  180.0   
2   7.2  30.7   60.7   2.2    4.4  3.97  9.0   30.5  77.0  22.6  29.5  148.0   
3   6.0  30.2   63.5   1.8    3.8  4.22  3.8   32.8  77.9  23.2  29.8  143.0   
4   4.2  39.1   53.7   1.6    2.3  3.93  0.4  316.0  80.6  23.9  29.7  236.0   

    PDW   PCT                      Diagnosis  
0  12.5  0.17  Normocytic hypochromic anemia  
1  12.5  0.16  Normocytic hypochromic anemia  
2  14.3  0.14         Iron deficiency anemia  
3  11.3  0.12         Iron deficiency anemia  
4  12.8  0.22  Normocytic hypochromic anemia  


In [24]:
#encode the diagnosis column
label_encoder = LabelEncoder()
data['Diagnosis'] = label_encoder.fit_transform(data['Diagnosis'])

In [25]:
#separate features and target
X = data.drop('Diagnosis', axis=1)
y = data['Diagnosis']

In [26]:
#standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [27]:
#one-hot encode the target
y_encoded = to_categorical(y)

In [28]:
#split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

In [40]:
#build the model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(y_encoded.shape[1], activation='softmax'))

In [41]:
#compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [42]:
#train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 200ms/step - accuracy: 0.1945 - loss: 2.1245 - val_accuracy: 0.4293 - val_loss: 2.0198
Epoch 2/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 133ms/step - accuracy: 0.4691 - loss: 1.8935 - val_accuracy: 0.4341 - val_loss: 1.8435
Epoch 3/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 96ms/step - accuracy: 0.4855 - loss: 1.6130 - val_accuracy: 0.4732 - val_loss: 1.6845
Epoch 4/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 111ms/step - accuracy: 0.5665 - loss: 1.3943 - val_accuracy: 0.5268 - val_loss: 1.5735
Epoch 5/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 84ms/step - accuracy: 0.6008 - loss: 1.3215 - val_accuracy: 0.5317 - val_loss: 1.4637
Epoch 6/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 124ms/step - accuracy: 0.6477 - loss: 1.1214 - val_accuracy: 0.6439 - val_loss: 1.3947
Epoch 7/50
[1m26/26[0m [32

<keras.src.callbacks.history.History at 0x1fecdb04c10>

In [43]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 97ms/step - accuracy: 0.8841 - loss: 0.3449
Test Accuracy: 0.89


In [44]:
#make predictions
y_pred = model.predict(X_test)

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 362ms/step
