# Pre-Processing


In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
directory = "/content/drive/MyDrive/Liver Tumor Classification/Cholangiocarcinoma"
# Directory where preprocessed images will be saved
preprocessed_directory = os.path.join(directory, "preprocessed")

# Create the "preprocessed" directory if it doesn't exist
if not os.path.exists(preprocessed_directory):
    os.makedirs(preprocessed_directory)


In [None]:
# Function to perform image preprocessing
def preprocess_image(image_path, save_path):
    # Read the input image
    image = cv2.imread(image_path)

    # Perform preprocessing steps
    # Image normalization
    normalized_image = cv2.normalize(image, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    # Noise removal (Gaussian blur)
    blurred_image = cv2.GaussianBlur(normalized_image, (5, 5), 0)

    # Resize the image
    resized_image = cv2.resize(blurred_image, (640, 640))

    # Save the preprocessed image
    cv2.imwrite(save_path, resized_image)

In [None]:
def show_images(image_path,save_path):
    original_image = cv2.imread(image_path)
    preprocessed_image = cv2.imread(save_path)

    # Plot the original and preprocessed images side by side
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB))
    plt.title('Original Image')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(preprocessed_image, cv2.COLOR_BGR2RGB))
    plt.title('Preprocessed Image')
    plt.axis('off')

    plt.show()

In [None]:
# Loop through all files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".jpg"):  # Assuming all images are in JPG format
        # Get the full path of the image
        image_path = os.path.join(directory, filename)

        # Define the path to save the preprocessed image
        save_path = os.path.join(preprocessed_directory, filename)

        # Perform preprocessing for the current image
        preprocess_image(image_path, save_path)

        # show_images(image_path, save_path)



# Augmentation
Creating 3X images after augmentation

In [None]:
import os
import glob
from PIL import Image
from torchvision import transforms

dir = "/content/drive/MyDrive/Liver Tumor Classification/Cholangiocarcinoma"
# Define the directory containing the original images
image_dir = dir + "/preprocessed"

# Define the output directory for augmented images
output_dir = dir + "/augmentated"

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Define the transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),       # Random horizontal flipping with probability 0.5
    transforms.RandomVerticalFlip(p=0.5),         # Random vertical flipping with probability 0.5
    transforms.RandomResizedCrop(size=(224, 224)), # Random resized crop to 224x224
    transforms.ColorJitter(brightness=0.05),       # Random brightness adjustment (-5%, +5%)
    transforms.ToTensor(),                         # Convert PIL Image to tensor
])

# Apply transformations to each image in the directory
for image_path in glob.glob(os.path.join(image_dir, "*.jpg")):
    # Open the image
    image = Image.open(image_path)

    # Apply the transformations three times
    for i in range(3):
        # Apply the transformations
        augmented_image = transform(image)

        # Save the augmented image
        filename = os.path.splitext(os.path.basename(image_path))[0]  # Get the filename without extension
        output_path = os.path.join(output_dir, f"{filename}_aug_{i}.jpg")
        transforms.functional.to_pil_image(augmented_image).save(output_path)



# Splitting Data into Train, Test, Val

In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the directories for each class
class1_dir = "/content/drive/MyDrive/Liver Tumor Classification/Cholangiocarcinoma/augmentated"
class2_dir = "/content/drive/MyDrive/Liver Tumor Classification/HCC/augmentated"
class3_dir = "/content/drive/MyDrive/Liver Tumor Classification/Normal Liver/augmentated"

# Define the output directories for train, test, and val sets
train_dir = "/content/drive/MyDrive/Liver Tumor Classification/train"
test_dir = "/content/drive/MyDrive/Liver Tumor Classification/test"
val_dir = "/content/drive/MyDrive/Liver Tumor Classification/val"

# Function to split data and copy images to respective directories
def split_and_copy_images(src_dir, dst_train_dir, dst_test_dir, dst_val_dir):
    # Create destination directories if they don't exist
    os.makedirs(dst_train_dir, exist_ok=True)
    os.makedirs(dst_test_dir, exist_ok=True)
    os.makedirs(dst_val_dir, exist_ok=True)

    # Get the list of image filenames
    image_files = os.listdir(src_dir)
    # Split the data into train and test-val sets
    train_files, test_val_files = train_test_split(image_files, test_size=0.2, random_state=42)
    # Further split the test_val set into test and val sets
    test_files, val_files = train_test_split(test_val_files, test_size=0.5, random_state=42)

    print("Total:",len(image_files))
    print("Train:",len(train_files))
    print("Test:",len(test_files))
    print("Val:",len(val_files))
    print("-------------")

    # Copy images to train directory
    for file in train_files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(dst_train_dir, file))

    # Copy images to test directory
    for file in test_files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(dst_test_dir, file))

    # Copy images to val directory
    for file in val_files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(dst_val_dir, file))

# Split and copy images for each class
split_and_copy_images(class1_dir, os.path.join(train_dir, "Cholangiocarcinoma"), os.path.join(test_dir, "Cholangiocarcinoma"), os.path.join(val_dir, "Cholangiocarcinoma"))
split_and_copy_images(class2_dir, os.path.join(train_dir, "HCC"), os.path.join(test_dir, "HCC"), os.path.join(val_dir, "HCC"))
split_and_copy_images(class3_dir, os.path.join(train_dir, "Normal"), os.path.join(test_dir, "Normal"), os.path.join(val_dir, "Normal"))

print("Data split into train, test, and val sets.")


Total: 210
Train: 168
Test: 21
Val: 21
-------------
Total: 210
Train: 168
Test: 21
Val: 21
-------------
Total: 210
Train: 168
Test: 21
Val: 21
-------------
Data split into train, test, and val sets.
