Importing libraries

In [64]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

Read Data

In [65]:
data = pd.read_csv(r"E:\my ai projects\NN_classification\drug200.csv")
X_columns = ['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']
X_data = data[X_columns]
y_data = data['Drug']

One hot encoding and splitting to train/test

In [66]:
X_data = pd.get_dummies(columns= ['Sex','BP','Cholesterol'], drop_first= True, data= X_data)
y_data = pd.get_dummies(columns= ['Drug'], drop_first= False, data = y_data)
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)
print(X_train)
print(y_train)

     Age  Na_to_K  Sex_M  BP_LOW  BP_NORMAL  Cholesterol_NORMAL
79    32   10.840  False    True      False                True
197   52    9.894   True   False       True               False
38    39    9.709  False   False       True                True
24    33   33.486  False    True      False               False
122   34   22.456   True   False       True               False
..   ...      ...    ...     ...        ...                 ...
106   22   11.953   True   False       True               False
14    50   12.703  False   False       True               False
92    29   29.450  False   False      False               False
179   67   15.891  False   False       True               False
102   28   13.127  False    True      False               False

[160 rows x 6 columns]
     DrugY  drugA  drugB  drugC  drugX
79   False  False  False  False   True
197  False  False  False  False   True
38   False  False  False  False   True
24    True  False  False  False  False
122   True  F

Normalizing the data

In [67]:
scaler = StandardScaler()

X_train[['Age', 'Na_to_K']] = scaler.fit_transform(X_train[['Age', 'Na_to_K']])
X_test[['Age', 'Na_to_K']] = scaler.transform(X_test[['Age', 'Na_to_K']])

Convert to numpy arrays

In [68]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
X_train = X_train.astype(float)
X_test = X_test.astype(float)
y_train = y_train.astype(float)
y_test = y_test.astype(float)

Building the model

In [82]:
model = Sequential([
    tf.keras.Input(shape=(6,)), 
    Dense(units = 50, activation= 'relu'),
    Dense(units = 20, activation= 'relu'),
    Dense(units = 5, activation= 'linear')
])

model.compile(loss = tf.keras.losses.CategoricalCrossentropy(from_logits = True), optimizer = tf.keras.optimizers.Adam(0.001), metrics = ['accuracy'])
model.fit(X_train, y_train, epochs= 40, batch_size= 16)
loss, accuracy = model.evaluate(X_test,y_test)
print(f"loss: {loss}, accuracy: {accuracy}")

Epoch 1/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.1609 - loss: 1.6886  
Epoch 2/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2285 - loss: 1.5557 
Epoch 3/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4529 - loss: 1.4556 
Epoch 4/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6074 - loss: 1.3598 
Epoch 5/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6679 - loss: 1.2879 
Epoch 6/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6955 - loss: 1.1957 
Epoch 7/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7176 - loss: 1.1079 
Epoch 8/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7283 - loss: 1.0487 
Epoch 9/40
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━

In [85]:
prediction = model.predict(X_train[0].reshape(1,-1))
prediction = tf.nn.softmax(prediction)
print(np.argmax(prediction, axis = 1))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[4]
