## **Notebooks Setup**

In [9]:
import sys
from pathlib import Path

cwd = Path().resolve()

for parent in [cwd] + list(cwd.parents):
    if (parent / "src").exists():
        ROOT_DIR = parent
        break

sys.path.insert(0, str(ROOT_DIR))

print("ROOT_DIR:", ROOT_DIR)

ROOT_DIR: D:\Klasifikasi_Pneumonia


## **Import Required Libraries**

In [10]:
import os
import pandas as pd

from src.data.resize_image import resize_image
from src.config.load_config import load_config
from src.data.build_image_to_csv import build_image_to_csv

config = load_config()

import warnings
warnings.filterwarnings("ignore")

## **Data Preparation**

In [11]:
data_dir = ROOT_DIR / config["paths"]["raw_data"]
output_dir = os.path.join(ROOT_DIR, "data", "splits").replace(os.sep, "/")
namespace_df = "chest_xray_raw"

build_image_to_csv(data_dir, output_dir, namespace_df)

[INFO] Starting process from: D:\Klasifikasi_Pneumonia\data\raw\chest_xray_raw


Processing Normal: 100%|██████████| 10671/10671 [02:32<00:00, 70.00img/s] 
Processing Pneumonia: 100%|██████████| 10097/10097 [01:51<00:00, 90.25img/s] 

[SUCCESS] CSV successfully saved at: D:\Klasifikasi_Pneumonia\data\splits\chest_xray_raw.csv
[INFO] Total Images: 20768
[INFO] Class Distribution:
label
Normal       10671
Pneumonia    10097
Name: count, dtype: int64





In [12]:
df_path = ROOT_DIR  / config["paths"]["dataset_csv_raw"]
df = pd.read_csv(df_path)
df

Unnamed: 0,img_path,height,width,image_size_kb,label
0,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,1279,1954,503.665,Pneumonia
1,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,560,824,40.560,Pneumonia
2,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,1024,1024,123.627,Normal
3,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,1024,1024,137.751,Normal
4,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,256,256,14.295,Normal
...,...,...,...,...,...
20763,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,1080,1376,160.588,Pneumonia
20764,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,1136,1632,127.777,Pneumonia
20765,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,512,512,48.542,Normal
20766,D:/Klasifikasi_Pneumonia/data/raw/chest_xray_r...,512,512,45.410,Normal


In [13]:
output_dir_resized = os.path.join(ROOT_DIR, "data", "processed", "chest_xray_processed").replace(os.sep, "/")

resize_image(input_csv_path=df_path, output_dir=output_dir_resized)

[INFO] Starting process from: D:\Klasifikasi_Pneumonia\data\splits\chest_xray_raw.csv
[INFO] Starting resizing process to size (224x224)...


Resizing Images: 100%|██████████| 20768/20768 [03:05<00:00, 112.09img/s]

[SUCCESS] Images successfully resized and saved at: D:\Klasifikasi_Pneumonia\data\processed\chest_xray_processed





In [14]:
data_dir_resized = ROOT_DIR / config["paths"]["processed_data"]
output_dir_resized = os.path.join(ROOT_DIR, "data", "splits").replace(os.sep, "/")
namespace_df_resized = "chest_xray_dataset"

build_image_to_csv(data_dir_resized, output_dir_resized, namespace_df_resized)

[INFO] Starting process from: D:\Klasifikasi_Pneumonia\data\processed\chest_xray_processed


Processing Normal: 100%|██████████| 10671/10671 [02:19<00:00, 76.55img/s]
Processing Pneumonia: 100%|██████████| 10097/10097 [02:11<00:00, 77.02img/s]

[SUCCESS] CSV successfully saved at: D:\Klasifikasi_Pneumonia\data\splits\chest_xray_dataset.csv
[INFO] Total Images: 20768
[INFO] Class Distribution:
label
Normal       10671
Pneumonia    10097
Name: count, dtype: int64





In [17]:
df_path_resized = ROOT_DIR  / config["paths"]["dataset_csv"]
df_resized = pd.read_csv(df_path_resized)
df_resized.head()

Unnamed: 0,img_path,height,width,image_size_kb,label
0,D:/Klasifikasi_Pneumonia/data/processed/chest_...,224,224,17.817,Pneumonia
1,D:/Klasifikasi_Pneumonia/data/processed/chest_...,224,224,16.907,Pneumonia
2,D:/Klasifikasi_Pneumonia/data/processed/chest_...,224,224,11.37,Normal
3,D:/Klasifikasi_Pneumonia/data/processed/chest_...,224,224,13.966,Normal
4,D:/Klasifikasi_Pneumonia/data/processed/chest_...,224,224,18.032,Normal
