# Import library (Basic)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("diabetes.csv")

In [3]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
data.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


# Correlation

In [6]:
data.corr()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
Pregnancies,1.0,0.129459,0.141282,-0.081672,-0.073535,0.017683,-0.033523,0.544341,0.221898
Glucose,0.129459,1.0,0.15259,0.057328,0.331357,0.221071,0.137337,0.263514,0.466581
BloodPressure,0.141282,0.15259,1.0,0.207371,0.088933,0.281805,0.041265,0.239528,0.065068
SkinThickness,-0.081672,0.057328,0.207371,1.0,0.436783,0.392573,0.183928,-0.11397,0.074752
Insulin,-0.073535,0.331357,0.088933,0.436783,1.0,0.197859,0.185071,-0.042163,0.130548
BMI,0.017683,0.221071,0.281805,0.392573,0.197859,1.0,0.140647,0.036242,0.292695
DiabetesPedigreeFunction,-0.033523,0.137337,0.041265,0.183928,0.185071,0.140647,1.0,0.033561,0.173844
Age,0.544341,0.263514,0.239528,-0.11397,-0.042163,0.036242,0.033561,1.0,0.238356
Outcome,0.221898,0.466581,0.065068,0.074752,0.130548,0.292695,0.173844,0.238356,1.0


# Data Preprocessing

In [7]:
X = data.iloc[:,:-1]
y = data.iloc[:,-1]

In [8]:
X

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [9]:
y

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64

## Train - Test Split

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=10)

In [12]:
from collections import Counter

In [13]:
print(f'Banyak train: {Counter(y_train)}')
print(f'Banyak test: {Counter(y_test)}')

Banyak train: Counter({0: 405, 1: 209})
Banyak test: Counter({0: 95, 1: 59})


## Scaling `[0,1]`

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [15]:
sc = MinMaxScaler()
sc.fit(X_train)

MinMaxScaler()

In [16]:
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [17]:
(X_train, X_test)

(array([[0.        , 0.81407035, 0.62295082, ..., 0.7928465 , 0.29077711,
         0.06666667],
        [0.11764706, 0.43718593, 0.        , ..., 0.43070045, 0.29675491,
         0.06666667],
        [0.        , 0.68844221, 0.55737705, ..., 0.36959762, 0.02775406,
         0.        ],
        ...,
        [0.17647059, 0.58291457, 0.60655738, ..., 0.39195231, 0.01238258,
         0.05      ],
        [0.05882353, 0.44221106, 0.24590164, ..., 0.81967213, 0.17847993,
         0.08333333],
        [0.29411765, 0.48241206, 0.60655738, ..., 0.50074516, 0.39239966,
         0.36666667]]),
 array([[0.23529412, 0.77386935, 0.59016393, ..., 0.46646796, 0.11101623,
         0.26666667],
        [0.11764706, 0.56281407, 0.70491803, ..., 0.57228018, 0.07173356,
         0.11666667],
        [0.05882353, 0.67839196, 0.44262295, ..., 0.39791356, 0.26003416,
         0.68333333],
        ...,
        [0.47058824, 0.42713568, 0.45081967, ..., 0.36363636, 0.02476516,
         0.35      ],
        [0.2

# Deep Neural Network

In [18]:
X.shape

(768, 8)

In [19]:
from keras.models import Sequential
from keras.layers import Dense

ModuleNotFoundError: No module named 'keras'

In [None]:
model = Sequential()

model.add(Dense(15, input_dim=8, activation='relu'))
model.add(Dense(40, activation='relu'))
model.add(Dense(20, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [None]:
model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint
import os

In [None]:
checkpoint_path = "./Hasil Model/cp-{epoch:04d}.hdf5"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = ModelCheckpoint(
    checkpoint_path, verbose = 1, save_weights_only = True,
    save_freq = 'epoch'
)

In [None]:
model.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
history = model.fit(X_train,y_train,validation_split=0.15, epochs = 50, batch_size = 16, verbose=1, callbacks = [cp_callback])

In [None]:
plt.plot(history.history['accuracy'], label = 'train accuracy')
plt.plot(history.history['val_accuracy'], label = 'val accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuraacy')
plt.xlabel('Epoch')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['loss'], label = 'train loss')
plt.plot(history.history['val_loss'], label = 'val loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()

In [None]:
pred = np.argmax(model.predict(X_test),axis=1)
pred

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test,pred))

# Kasus Klasifikasi Gambar Covid-19

In [None]:
import cv2

In [None]:
train_covid = 'Covid19-dataset/train/Covid'
train_normal = 'Covid19-dataset/train/Normal'
train_pneumonia = 'Covid19-dataset/train/Viral Pneumonia'

test_covid = 'Covid19-dataset/test/Covid'
test_normal = 'Covid19-dataset/test/Normal'
test_pneumonia = 'Covid19-dataset/test/Viral Pneumonia'

## Load Train Data

In [None]:
covid_train = []
covid_label = np.ones(len(os.listdir(train_covid)))

for i in os.listdir(train_covid):
    img = cv2.imread(train_covid + '/' + i)
    img = cv2.resize(img, (50,50), interpolation = cv2.INTER_AREA)
    img = img / 255
    covid_train.append(img)

In [None]:
normal_train = []
normal_label = np.zeros(len(os.listdir(train_normal)))

for i in os.listdir(train_normal):
    img = cv2.imread(train_normal + '/' + i)
    img = cv2.resize(img, (50,50), interpolation = cv2.INTER_AREA)
    img = img / 255
    normal_train.append(img)

In [None]:
pneumonia_train = []
pneumonia_label = np.full(len(os.listdir(train_pneumonia)),2)

for i in os.listdir(train_pneumonia):
    img = cv2.imread(train_pneumonia + '/' + i)
    img = cv2.resize(img, (50,50), interpolation = cv2.INTER_AREA)
    img = img / 255
    pneumonia_train.append(img)

## Load Data Test

In [None]:
covid_test = []
covid_label_test = np.ones(len(os.listdir(test_covid)))

for i in os.listdir(test_covid):
    img = cv2.imread(test_covid + '/' + i)
    img = cv2.resize(img, (50,50), interpolation = cv2.INTER_AREA)
    img = img / 255
    covid_test.append(img)

In [None]:
normal_test = []
normal_label_test = np.zeros(len(os.listdir(test_normal)))

for i in os.listdir(test_normal):
    img = cv2.imread(test_normal + '/' + i)
    img = cv2.resize(img, (50,50), interpolation = cv2.INTER_AREA)
    img = img / 255
    normal_test.append(img)

In [None]:
pneumonia_test = []
pneumonia_label_test = np.full(len(os.listdir(test_pneumonia)),2)

for i in os.listdir(test_pneumonia):
    img = cv2.imread(test_pneumonia + '/' + i)
    img = cv2.resize(img, (50,50), interpolation = cv2.INTER_AREA)
    img = img / 255
    pneumonia_test.append(img)

In [None]:
plt.imshow(covid_train[0])

# Construct Data

In [None]:
X_train = np.concatenate((covid_train, normal_train, pneumonia_train), axis = 0)
y_train = np.concatenate((covid_label, normal_label, pneumonia_label), axis = 0)

In [None]:
len(y_train)

In [None]:
X_test = np.concatenate((covid_test, normal_test, pneumonia_test), axis = 0)
y_test = np.concatenate((covid_label_test, normal_label_test, pneumonia_label_test), axis = 0)

In [None]:
len(y_test)

In [None]:
X_train.shape

In [None]:
X_test.shape

# CNN

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPool2D

In [None]:
models = Sequential()

models.add(Conv2D(filters = 16, kernel_size = (4,4), activation = 'relu', input_shape = (50,50,3)))
models.add(MaxPool2D(pool_size = (2,2)))
models.add(Conv2D(filters = 16, kernel_size = (3,3), activation = 'relu'))
models.add(MaxPool2D(pool_size = (2,2), strides = (2,2)))

models.add(Flatten())

models.add(Dense(256, activation = 'relu'))
models.add(Dense(3, activation = 'softmax'))

In [None]:
models.summary()

In [None]:
models.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
history1 = models.fit(X_train, y_train, epochs = 50, validation_split = 0.1, batch_size = 10)

In [None]:
pred1 = np.argmax(models.predict(X_test), axis = 1)
pred1

In [None]:
print(classification_report(y_test, pred1))