#### Image Caching
Download images from the given links, resize them to 128×128, save them as .jpeg files with unique hashed names at 80% quality.

In [1]:

import os
import hashlib
import requests
from io import BytesIO
from PIL import Image
from tqdm import tqdm


In [None]:
#params
CACHE_DIR = "cache_images"
RESIZE = (128, 128)


In [None]:
# use hash encoding to make sure the images align.
def _get_cached_path(cache_dir, url):
    hashed = hashlib.md5(url.encode()).hexdigest() + ".jpeg"
    return os.path.join(cache_dir, hashed)

# save images in a cache folder to avoid downloading again and again.
def precache_images(df_list, cache_dir=CACHE_DIR, resize=RESIZE):
    urls = []
    for df in df_list:
        if "image_link" in df.columns:
            urls.extend(df["image_link"].dropna().astype(str).tolist())
    urls = list(dict.fromkeys(urls))  
    print(f"Pre-caching {len(urls)} unique image URLs to '{cache_dir}'")
 # Iterate through each image URL
    for url in tqdm(urls):
        if not url:
            continue
        try:
            cached = _get_cached_path(cache_dir, url)
        except Exception:
            continue
        if os.path.exists(cached):
            try:
                img = Image.open(cached)
                if img.size != resize:
                    img = img.convert("RGB").resize(resize)
                    img.save(cached, quality=90)
                continue
            except Exception:
                pass  
# Download and save new images in compressed JPEG format
        try:
            response = requests.get(url, timeout=10)
            image = Image.open(BytesIO(response.content)).convert("RGB")
          
            image = image.resize(resize)
            image.save(cached, format="JPEG", quality=80, optimize=True)
# if image doesn't download, save a white image.
        except Exception:
            Image.new("RGB", resize, (255, 255, 255)).save(cached)
    