# 02 - Pr√©traitement Images: Zoom & Suppression Espaces Blancs

**Objectif:** Zoomer les images pour supprimer les espaces blancs

**Sorties:** Images zoom√©es organis√©es par classe

---

In [None]:
import sys, os, shutil
from pathlib import Path

IS_COLAB = 'google.colab' in sys.modules
if IS_COLAB:
    print("üîµ Colab")
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    PROJECT_ROOT = Path('/content/drive/MyDrive/CLASSIFICATION-PRODUITS-RAKUTEN')
    os.chdir(PROJECT_ROOT)
else:
    print("üü¢ Local")
    PROJECT_ROOT = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
    os.chdir(PROJECT_ROOT)
print(f"üìÅ {PROJECT_ROOT}")

In [None]:
import pandas as pd, numpy as np, cv2, matplotlib.pyplot as plt
from tqdm.auto import tqdm
import warnings; warnings.filterwarnings('ignore')
np.random.seed(42)
INPUT_DIR = Path('output/01_exploration')
OUTPUT_DIR = Path('output/02_preprocessing_images')
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
IMAGE_DIR = Path('data/images/image_train')
OUTPUT_IMAGE_DIR = OUTPUT_DIR / 'images_zoomed'
OUTPUT_IMAGE_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
print("Chargement donn√©es notebook 01...")
df = pd.read_csv(INPUT_DIR / 'df_explored.csv', index_col=0)
print(f"‚úÖ {len(df):,} produits charg√©s")
df.head()

In [None]:
def find_inner_img_box(img):
    for top in range(img.shape[0]):
        if (img[top] != 255).any(): break
    if top == img.shape[0] - 1: return None
    for bottom in range(img.shape[0] - 1, top, -1):
        if (img[bottom] != 255).any(): break
    for left in range(img.shape[1]):
        if (img[top:bottom+1, left] != 255).any(): break
    for right in range(img.shape[1] - 1, left, -1):
        if (img[top:bottom+1, right] != 255).any(): break
    return (top, left, bottom, right)

def zoom_img(img_path, box):
    img = cv2.imread(str(img_path), cv2.IMREAD_COLOR)
    zoomed = img[box[0]:box[2]+1, box[1]:box[3]+1]
    zoomed_h, zoomed_w = zoomed.shape[:2]
    img_h, img_w = img.shape[:2]
    if zoomed_w > zoomed_h:
        new_size = (img_w, int(zoomed_h * (img_w / zoomed_w)))
    else:
        new_size = (int(zoomed_w * (img_h / zoomed_h)), img_h)
    zoomed = cv2.resize(zoomed, dsize=new_size, interpolation=cv2.INTER_LINEAR)
    final = np.full(shape=img.shape, fill_value=255, dtype=np.uint8)
    new_x = (final.shape[1] - zoomed.shape[1]) // 2
    new_y = (final.shape[0] - zoomed.shape[0]) // 2
    final[new_y:new_y + zoomed.shape[0], new_x:new_x + zoomed.shape[1]] = zoomed
    return final

print("‚úÖ Fonctions d√©finies")

In [None]:
print("Cr√©ation dossiers classes...")
for prdtype in df['prdtypecode'].unique():
    (OUTPUT_IMAGE_DIR / str(prdtype)).mkdir(exist_ok=True)
print(f"‚úÖ {df['prdtypecode'].nunique()} dossiers cr√©√©s")

In [None]:
df['imagefile_relative'] = df['prdtypecode'].astype(str) + '/' + df['imagefile']
df['imagefile_output'] = df['imagefile_relative'].apply(lambda x: str(OUTPUT_IMAGE_DIR / x))

In [None]:
print("Traitement images (15-20 min)...\n")
zoom_count, copy_count, error_count = 0, 0, 0

for idx, row in tqdm(df.iterrows(), total=len(df), desc="Traitement"):
    try:
        src = Path(row['imagepath'])
        dst = Path(row['imagefile_output'])
        if dst.exists(): continue
        
        if pd.notna(row['inner_ratio']) and row['inner_ratio'] <= 0.8:
            box = (int(row['top']), int(row['left']), int(row['bottom']), int(row['right']))
            zoomed = zoom_img(src, box)
            cv2.imwrite(str(dst), zoomed)
            zoom_count += 1
        else:
            shutil.copy2(src, dst)
            copy_count += 1
    except:
        error_count += 1

print(f"\n‚úÖ Termin√©!")
print(f"   ‚Ä¢ Zoom√©es: {zoom_count:,}")
print(f"   ‚Ä¢ Copi√©es: {copy_count:,}")
print(f"   ‚Ä¢ Erreurs: {error_count}")

In [None]:
df_output = df[['designation', 'description', 'productid', 'imageid', 'prdtypecode', 'prdtype_label', 'imagefile', 'imagefile_relative']].copy()
df_output.to_csv(OUTPUT_DIR / 'df_preprocessed.csv')
print(f"‚úÖ Sauvegard√©: df_preprocessed.csv")

import json
with open(OUTPUT_DIR / 'summary.json', 'w') as f:
    json.dump({'total':len(df), 'zoomed':zoom_count, 'copied':copy_count, 'errors':error_count}, f, indent=2)
print(f"‚úÖ Sauvegard√©: summary.json")

In [None]:
print("="*60)
print("üñºÔ∏è R√âSUM√â PR√âTRAITEMENT")
print("="*60)
print(f"Total: {len(df):,}")
print(f"Zoom√©es: {zoom_count:,} ({zoom_count/len(df)*100:.1f}%)")
print(f"Copi√©es: {copy_count:,}")
print(f"\n‚úÖ Pr√™t pour notebook 03!")
print("="*60)