In [4]:
import os
import zipfile
from io import BytesIO

def extract_nested_zip(local_zip_path: str, output_dir: str):
    """
    Recursively extract CSV/CSS files from a local ZIP file that may
    contain other ZIPs (nested).

    :param local_zip_path: Path to the downloaded outer ZIP file.
    :param output_dir: Directory where extracted CSV/CSS files will go.
    """
    os.makedirs(output_dir, exist_ok=True)

    def _process_zip(zip_bytes: bytes, source_name: str):
        """Helper to process a ZIP given its bytes in memory."""
        with zipfile.ZipFile(BytesIO(zip_bytes)) as z:
            for member in z.namelist():
                data = z.read(member)
                name = os.path.basename(member)

                # If this member is itself a ZIP, recurse
                if member.lower().endswith('.zip'):
                    print(f"[+] Found nested ZIP {member} inside {source_name}")
                    _process_zip(data, member)

                # If it's a CSV or CSS, write it out
                elif member.lower().endswith(('.csv', '.css')):
                    out_path = os.path.join(output_dir, name)
                    print(f"    → Extracting {member} → {out_path}")
                    with open(out_path, 'wb') as f:
                        f.write(data)

    # Read the outer ZIP from disk and kick off processing
    with open(local_zip_path, 'rb') as f:
        outer_bytes = f.read()
    print(f"Processing outer ZIP: {local_zip_path}")
    _process_zip(outer_bytes, os.path.basename(local_zip_path))


if __name__ == "__main__":
    LOCAL_ZIP = "/Users/kaushalshivaprakash/Desktop/project3/data/raw/2023-citibike-tripdata.zip"    # replace with your filename
    OUTPUT_DIR = "raw_citibike_csvs"            # where to dump CSV/CSS files
    extract_nested_zip(LOCAL_ZIP, OUTPUT_DIR)

Processing outer ZIP: /Users/kaushalshivaprakash/Desktop/project3/data/raw/2023-citibike-tripdata.zip
[+] Found nested ZIP 2023-citibike-tripdata/202302-citibike-tripdata.zip inside 2023-citibike-tripdata.zip
    → Extracting 202302-citibike-tripdata_2.csv → raw_citibike_csvs/202302-citibike-tripdata_2.csv
    → Extracting 202302-citibike-tripdata_1.csv → raw_citibike_csvs/202302-citibike-tripdata_1.csv
[+] Found nested ZIP 2023-citibike-tripdata/202308-citibike-tripdata.zip inside 2023-citibike-tripdata.zip
    → Extracting 202308-citibike-tripdata_4.csv → raw_citibike_csvs/202308-citibike-tripdata_4.csv
    → Extracting 202308-citibike-tripdata_1.csv → raw_citibike_csvs/202308-citibike-tripdata_1.csv
    → Extracting 202308-citibike-tripdata_2.csv → raw_citibike_csvs/202308-citibike-tripdata_2.csv
    → Extracting 202308-citibike-tripdata_3.csv → raw_citibike_csvs/202308-citibike-tripdata_3.csv
[+] Found nested ZIP 2023-citibike-tripdata/202306-citibike-tripdata.zip inside 2023-citib