In [10]:
import os, io, math
import pandas as pd
from rembg.bg import remove
import numpy as np
from multiprocessing import Pool
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [3]:
PATH_DATA_TRAIN = 'data/data-raw/train'
PATH_DATA_TEST = 'data/data-raw/test'
PATH_DATA_PREPROCESSED_TRAIN = 'data/data-preprocessed/train'
PATH_DATA_PREPROCESSED_TEST = 'data/data-preprocessed/test'

In [6]:
def generate_paths(path_data, path_data_remove_bg):

    df = pd.DataFrame()

    files = [i for i in os.listdir(path_data) if '.DS_Store' not in i]

    for file in files:
        df_file = pd.DataFrame()
        images = [i for i in os.listdir(f'{path_data}/{file}') if '.DS_Store' not in i]
        df_file['path_original'] = [f'{path_data}/{file}/{i}' for i in images]
        df_file['path_remove_bg'] = [f'{path_data_remove_bg}/{file}/{i}' for i in images]
        df = pd.concat([df, df_file])
    
    return df

In [11]:
df_path_train = generate_paths(PATH_DATA_TRAIN, PATH_DATA_PREPROCESSED_TRAIN)
df_path_train

Unnamed: 0,path_original,path_remove_bg
0,data/data-raw/train/hatchback/02731.jpg,data/data-preprocessed/train/hatchback/02731.jpg
1,data/data-raw/train/hatchback/07113.jpg,data/data-preprocessed/train/hatchback/07113.jpg
2,data/data-raw/train/hatchback/07846.jpg,data/data-preprocessed/train/hatchback/07846.jpg
3,data/data-raw/train/hatchback/01748.jpg,data/data-preprocessed/train/hatchback/01748.jpg
4,data/data-raw/train/hatchback/00442.jpg,data/data-preprocessed/train/hatchback/00442.jpg
...,...,...
202,data/data-raw/train/wagon/06610.jpg,data/data-preprocessed/train/wagon/06610.jpg
203,data/data-raw/train/wagon/03032.jpg,data/data-preprocessed/train/wagon/03032.jpg
204,data/data-raw/train/wagon/03387.jpg,data/data-preprocessed/train/wagon/03387.jpg
205,data/data-raw/train/wagon/06599.jpg,data/data-preprocessed/train/wagon/06599.jpg


In [12]:
df_path_test = generate_paths(PATH_DATA_TEST, PATH_DATA_PREPROCESSED_TEST)
df_path_test

Unnamed: 0,path_original,path_remove_bg
0,data/data-raw/test/hatchback/02725.jpg,data/data-preprocessed/test/hatchback/02725.jpg
1,data/data-raw/test/hatchback/01238.jpg,data/data-preprocessed/test/hatchback/01238.jpg
2,data/data-raw/test/hatchback/05089.jpg,data/data-preprocessed/test/hatchback/05089.jpg
3,data/data-raw/test/hatchback/00867.jpg,data/data-preprocessed/test/hatchback/00867.jpg
4,data/data-raw/test/hatchback/06027.jpg,data/data-preprocessed/test/hatchback/06027.jpg
...,...,...
199,data/data-raw/test/wagon/03807.jpg,data/data-preprocessed/test/wagon/03807.jpg
200,data/data-raw/test/wagon/06957.jpg,data/data-preprocessed/test/wagon/06957.jpg
201,data/data-raw/test/wagon/06758.jpg,data/data-preprocessed/test/wagon/06758.jpg
202,data/data-raw/test/wagon/03387.jpg,data/data-preprocessed/test/wagon/03387.jpg


In [13]:
def remove_bg(df):
    
    list_path_original = list(df['path_original'])
    list_path_remove_bg = list(df['path_remove_bg'])

    for i in range(len(df)):

        # display progress log
        if (i+1)%50==0:
            print('Completed:', i+1, '/', len(df))

        # load image
        image = Image.open(list_path_original[i])

        # remove background
        with io.BytesIO() as buf:
            image.save(buf, 'jpeg')
            image = buf.getvalue()
            image = remove(image)

        # convert into icon
        image = Image.open(io.BytesIO(image))
        image = image.convert("RGBA")

        # change background to white
        background = Image.new(image.mode[:-1], image.size, (255, 255, 255))
        background.paste(image, image.split()[-1]) 
        image = background

        # convert back to jpeg
        image = image.convert('RGB')
        image.save(list_path_remove_bg[i], 'JPEG')

In [14]:
remove_bg(df_path_train)

Completed: 50 / 8144
Completed: 100 / 8144
Completed: 150 / 8144
Completed: 200 / 8144
Completed: 250 / 8144
Completed: 300 / 8144
Completed: 350 / 8144
Completed: 400 / 8144
Completed: 450 / 8144
Completed: 500 / 8144
Completed: 550 / 8144
Completed: 600 / 8144
Completed: 650 / 8144
Completed: 700 / 8144
Completed: 750 / 8144
Completed: 800 / 8144
Completed: 850 / 8144
Completed: 900 / 8144
Completed: 950 / 8144
Completed: 1000 / 8144
Completed: 1050 / 8144
Completed: 1100 / 8144
Completed: 1150 / 8144
Completed: 1200 / 8144
Completed: 1250 / 8144
Completed: 1300 / 8144
Completed: 1350 / 8144
Completed: 1400 / 8144
Completed: 1450 / 8144
Completed: 1500 / 8144
Completed: 1550 / 8144
Completed: 1600 / 8144
Completed: 1650 / 8144
Completed: 1700 / 8144
Completed: 1750 / 8144
Completed: 1800 / 8144
Completed: 1850 / 8144
Completed: 1900 / 8144
Completed: 1950 / 8144
Completed: 2000 / 8144
Completed: 2050 / 8144
Completed: 2100 / 8144
Completed: 2150 / 8144
Completed: 2200 / 8144
Complete

In [15]:
remove_bg(df_path_test)

Completed: 50 / 8041
Completed: 100 / 8041
Completed: 150 / 8041
Completed: 200 / 8041
Completed: 250 / 8041
Completed: 300 / 8041
Completed: 350 / 8041
Completed: 400 / 8041
Completed: 450 / 8041
Completed: 500 / 8041
Completed: 550 / 8041
Completed: 600 / 8041
Completed: 650 / 8041
Completed: 700 / 8041
Completed: 750 / 8041
Completed: 800 / 8041
Completed: 850 / 8041
Completed: 900 / 8041
Completed: 950 / 8041
Completed: 1000 / 8041
Completed: 1050 / 8041
Completed: 1100 / 8041
Completed: 1150 / 8041
Completed: 1200 / 8041
Completed: 1250 / 8041
Completed: 1300 / 8041
Completed: 1350 / 8041
Completed: 1400 / 8041
Completed: 1450 / 8041
Completed: 1500 / 8041
Completed: 1550 / 8041
Completed: 1600 / 8041
Completed: 1650 / 8041
Completed: 1700 / 8041
Completed: 1750 / 8041
Completed: 1800 / 8041
Completed: 1850 / 8041
Completed: 1900 / 8041
Completed: 1950 / 8041
Completed: 2000 / 8041
Completed: 2050 / 8041
Completed: 2100 / 8041
Completed: 2150 / 8041
Completed: 2200 / 8041
Complete