In [None]:
import os
import time
import requests
import pandas as pd
from tqdm import tqdm
from PIL import Image
from io import BytesIO


In [None]:
TRAIN_DATA_PATH = r"C:\Users\ammym\OneDrive\Desktop\CDC\train_data.csv"
TEST_DATA_PATH = r"C:\Users\ammym\OneDrive\Desktop\CDC\test_data.csv"

TRAIN_IMG_PATH = r"C:\Users\ammym\OneDrive\Desktop\CDC\images\train"
TEST_IMG_PATH = r"C:\Users\ammym\OneDrive\Desktop\CDC\images\test"

In [None]:
ZOOM_LEVEL = 17.5         
IMAGE_SIZE = 256         

SLEEP_TIME = 0.05 

In [None]:
MAPBOX_TOKEN = os.getenv("MAPBOX_TOKEN")
os.makedirs(TRAIN_IMG_PATH, exist_ok=True)

df_train = pd.read_csv(TRAIN_DATA_PATH)
df_test = pd.read_csv(TEST_DATA_PATH)
df_train.shape

In [None]:
required_cols = ["lat","long"]

In [None]:
def build_mapbox_url(lat, lon, zoom=17.5, size=256, style="satellite-v9"):
    """
    Constructs Mapbox Static Image API URL
    """
    return (
        f"https://api.mapbox.com/styles/v1/mapbox/{style}/static/"
        f"{lon},{lat},{zoom}/"
        f"{size}x{size}"
        f"?access_token={MAPBOX_TOKEN}"
    )

def download_image(url, save_path, retries=3):
    """
    Downloads and saves an image with retry support
    """
    for attempt in range(retries):
        try:
            response = requests.get(url, timeout=10)
            if response.status_code == 200:
                img = Image.open(BytesIO(response.content)).convert("RGB")
                img.save(save_path)
                return True
            else:
                print(f"HTTP {response.status_code} for {save_path}")
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            time.sleep(1)
    return False

In [None]:
failed_indices = []

for idx, row in tqdm(df_train.iterrows(), total=len(df_train)):
    lat, lon = row["lat"], row["long"]
    image_path = os.path.join(TRAIN_IMG_PATH, f"{idx}.png")

    if os.path.exists(image_path):
        continue

    url = build_mapbox_url(
        lat=lat,
        lon=lon,
        zoom=ZOOM_LEVEL,
        size=IMAGE_SIZE
    )

    success = download_image(url, image_path)

    if not success:
        failed_indices.append(idx)

    time.sleep(SLEEP_TIME)

In [None]:
if failed_indices:
    print(f"Failed downloads: {len(failed_indices)}")
    failed_df = df_train.iloc[failed_indices]
    failed_df.to_csv("failed_image_downloads.csv", index=False)
    print("Failed indices saved to failed_image_downloads.csv")
else:
    print("All images downloaded successfully!")
