In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os

In [3]:
base_dir = '/content/drive/MyDrive/Pembelajaran Mesin/Tugas Kelompok Praktikum/Dataset/chest_xray'

In [4]:
train_dir = base_dir + '/train'
norm_train_dir = train_dir + '/NORMAL'
pneu_train_dir = train_dir + '/PNEUMONIA'

test_dir = base_dir + '/test'
norm_test_dir = test_dir + '/NORMAL'
pneu_test_dir = test_dir + '/PNEUMONIA'

val_dir = base_dir + '/val'
norm_val_dir = val_dir + '/NORMAL'
pneu_val_dir = val_dir + '/PNEUMONIA'

In [5]:
print("Total Training NORMAL:", len(os.listdir(norm_train_dir)))
print("Total Training PNEUMONIA:", len(os.listdir(pneu_train_dir)))
print('-'*30)
print("Total Testing NORMAL:", len(os.listdir(norm_test_dir)))
print("Total Testing PNEUMONIA:", len(os.listdir(pneu_test_dir)))
print('-'*30)
print("Total validation NORMAL:", len(os.listdir(norm_val_dir)))
print("Total validation PNEUMONIA:", len(os.listdir(pneu_val_dir)))

Total Training NORMAL: 1341
Total Training PNEUMONIA: 3875
------------------------------
Total Testing NORMAL: 234
Total Testing PNEUMONIA: 390
------------------------------
Total validation NORMAL: 8
Total validation PNEUMONIA: 8


In [6]:
print(os.listdir(norm_train_dir)[:3])
print(os.listdir(pneu_train_dir)[:3])

['IM-0524-0001.jpeg', 'IM-0525-0001-0001.jpeg', 'IM-0525-0001-0002.jpeg']
['person540_bacteria_2271.jpeg', 'person540_bacteria_2272.jpeg', 'person540_bacteria_2273.jpeg']


In [7]:
import cv2
import numpy as np

In [8]:
"""
Fungsi untuk gather data,
Menerima 1 parameter berupa direktori data,
Return 2 list,
"""
def gather_data(data_dir):
    dir_data = []
    dir_label = []

    for dirpath, dirnames, filenames in os.walk(data_dir):
        for file in filenames:
            if ".jpeg" in file:
                imagePath = os.path.join(dirpath, file)
                image = cv2.imread(imagePath)
                image = cv2.resize(image, (250,250))
                dir_data.append(image)
                label = imagePath.split(os.path.sep)[-2]
                dir_label.append(label)

    dir_data = np.array(dir_data)
    dir_label = np.array(dir_label)

    return dir_data, dir_label

In [9]:
# Gather data train
train_data, train_label = gather_data(train_dir)
# Gather data test
test_data, test_label = gather_data(test_dir)
# Gather data val
val_data, val_label = gather_data(val_dir)

In [10]:
print("Train Data = ", train_data.shape)
print("Train Label = ", train_label.shape)
print('-'*30)
print("Test Data = ", test_data.shape)
print("Test Label = ", test_label.shape)
print('-'*30)
print("Val Data = ", val_data.shape)
print("Val Label = ", val_label.shape)

Train Data =  (5216, 250, 250, 3)
Train Label =  (5216,)
------------------------------
Test Data =  (624, 250, 250, 3)
Test Label =  (624,)
------------------------------
Val Data =  (16, 250, 250, 3)
Val Label =  (16,)


In [11]:
# Normalisasi dataset
print("Data sebelum di-normalisasi ", train_data[0][0][0])

x_train = train_data.astype('float32') / 255.0
x_test = test_data.astype('float32') / 255.0
x_val = val_data.astype('float32') / 255.0
print("Data setelah di-normalisasi ", x_train[0][0][0])

Data sebelum di-normalisasi  [0 0 0]
Data setelah di-normalisasi  [0. 0. 0.]


In [12]:
train_label[1336:1346]

array(['NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'NORMAL', 'PNEUMONIA',
       'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA', 'PNEUMONIA'], dtype='<U9')

In [18]:
# Transformasi label encoder
from sklearn.preprocessing import LabelEncoder

print("Label sebelum di-encoder ", train_label[1336:1346])

lb = LabelEncoder()
y_train = lb.fit_transform(train_label)
y_test = lb.fit_transform(test_label)
y_val = lb.fit_transform(val_label)

print("Label setelah di-encoder ", y_train[1336:1346])

Label sebelum di-encoder  ['NORMAL' 'NORMAL' 'NORMAL' 'NORMAL' 'NORMAL' 'PNEUMONIA' 'PNEUMONIA'
 'PNEUMONIA' 'PNEUMONIA' 'PNEUMONIA']
Label setelah di-encoder  [0 0 0 0 0 1 1 1 1 1]


In [24]:
data_label_save_dir = '/content/drive/MyDrive/Pembelajaran Mesin/Tugas Kelompok Praktikum/Hasil Preprocessing - Skenario Praktikum Modul 2'
preprocessed_data = [x_train, y_train, x_test, y_test, x_val, y_val]
preprocessed_file_name = ['x_train', 'y_train', 'x_test', 'y_test', 'x_val', 'y_val']
for i, file_name in enumerate(preprocessed_file_name):
    np.save(data_label_save_dir + '/' + file_name + '.npy', preprocessed_data[i])