# Image pre-processing

In [1]:
import os
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
# Paths to the original images
path_orig_train_non = "./original_images/train/non-bio/"
path_orig_train_bio = "./original_images/train/bio/"
path_orig_test_non = "./original_images/test/non-bio/"
path_orig_test_bio = "./original_images/test/bio/"

# New directories for the pre-processed images
os.makedirs("./preprocessed_images/train/non-bio", exist_ok=True)
os.makedirs("./preprocessed_images/train/bio", exist_ok=True)
os.makedirs("./preprocessed_images/test/non-bio", exist_ok=True)
os.makedirs("./preprocessed_images/test/bio", exist_ok=True)

# Paths to the pre-processed images
path_prep_train_non = "./preprocessed_images/train/non-bio/"
path_prep_train_bio = "./preprocessed_images/train/bio/"
path_prep_test_non = "./preprocessed_images/test/non-bio/"
path_prep_test_bio = "./preprocessed_images/test/bio/"

In [3]:
def img_preprocessing(img_path):
    # Read the image
    img = cv.imread(img_path)
    
    # Convert the image to grayscale
    gray_img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
    
    # Normalize the image
    norm_img = cv.normalize(gray_img, None, 0, 255, norm_type=cv.NORM_MINMAX, dtype=cv.CV_8UC1)
    
    # Apply opening
    kernel = np.ones((1, 1), np.uint8)
    open_img = cv.morphologyEx(norm_img, cv.MORPH_OPEN, kernel)
    
    # Apply CLAHE
    clahe = cv.createCLAHE(clipLimit=5.0, tileGridSize=(10, 10))
    clah_img = clahe.apply(open_img)
    
    # Apply Gaussian blurring
    blur_img = cv.GaussianBlur(clah_img, (9, 9), 0)
    
    # Apply Canny Edge Detection
    cann_img = cv.Canny(blur_img, np.percentile(blur_img, 50), np.percentile(blur_img, 50))
    
    return cann_img

## Pre-processing the Training Data (Images)

In [4]:
# Dataframe for the pre-processed training data
df_train = pd.DataFrame(columns=['img_data', 'label'])

### Non-biodegradable

In [5]:
for file in os.listdir(path_orig_train_non):
    img_path = os.path.join(path_orig_train_non, file)
    img = img_preprocessing(img_path)
    cv.imwrite(os.path.join(path_prep_train_non, file), img)
    df_train = pd.concat([df_train, pd.DataFrame({'img_data' : [img], 'label' : [1]})], ignore_index=True)

In [6]:
df_train

Unnamed: 0,img_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
1995,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1996,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1997,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1998,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1


### Biodegradable

In [7]:
for file in os.listdir(path_orig_train_bio):
    img_path = os.path.join(path_orig_train_bio, file)
    img = img_preprocessing(img_path)
    cv.imwrite(os.path.join(path_prep_train_bio, file), img)
    df_train = pd.concat([df_train, pd.DataFrame({'img_data' : [img], 'label' : [0]})], ignore_index=True)

In [8]:
df_train

Unnamed: 0,img_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
3995,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, ...",0
3996,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
3997,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
3998,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0


In [9]:
# Create a directory that will store the pickled dataframe
# We are "pickling" so we don't have to preprocess the images every single time
os.makedirs("./pickled_data", exist_ok=True)
df_train.to_pickle("./pickled_data/df_train.pkl")

## Pre-processing the Testing Data (Images)

In [10]:
# Dataframe for the pre-processed testing data
df_test = pd.DataFrame(columns=['img_data', 'label'])

### Non-biodegradable

In [11]:
for file in os.listdir(path_orig_test_non):
    img_path = os.path.join(path_orig_test_non, file)
    img = img_preprocessing(img_path)
    cv.imwrite(os.path.join(path_prep_test_non, file), img)
    df_test = pd.concat([df_test, pd.DataFrame({'img_data' : [img], 'label' : [1]})], ignore_index=True)

In [12]:
df_test

Unnamed: 0,img_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
245,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
246,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
247,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
248,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1


### Biodegradable 

In [13]:
for file in os.listdir(path_orig_test_bio):
    img_path = os.path.join(path_orig_test_bio, file)
    img = img_preprocessing(img_path)
    cv.imwrite(os.path.join(path_prep_test_bio, file), img)
    df_test = pd.concat([df_test, pd.DataFrame({'img_data' : [img], 'label' : [0]})], ignore_index=True)

In [14]:
df_test

Unnamed: 0,img_data,label
0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
1,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",1
...,...,...
495,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
496,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
497,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0
498,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",0


In [15]:
# Create a directory that will store the pickled dataframe
# We are "pickling" so we don't have to preprocess the images every single time
os.makedirs("./pickled_data", exist_ok=True)
df_test.to_pickle("./pickled_data/df_test.pkl")