# Project Setup

## Data
Use this notebook to unzip the data files.

In [1]:
try:
    import zipfile
    import os
except:
    %pip install zipfile os

In [2]:
# Define the list of zip files to extract
zip_files = [
    "Green Space & Income/data/98-401-X2021007_eng_CSV.zip",
    "Green Space & Income/data/Green Spaces - 4326.zip",
    "Green Space & Income/data/lcsd000b21a_e.zip",
    "Green Space & Income/data/lct_000b21a_e.zip",
    "Green Space & Income/data/Neighbourhoods - 4326.zip",
    "Green Space & Income/data/Parks and Recreation Facilities - 4326.zip",
    "Green Space & Income/data/Property Boundaries - 4326.zip"
]

# Extract each zip file to a folder with the same name (without .zip extension)
for zip_path in zip_files:
    # Get the base name without .zip extension
    folder_name = os.path.splitext(zip_path)[0]
    
    # Create the output directory if it doesn't exist
    os.makedirs(folder_name, exist_ok=True)
    
    # Extract the zip file
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(folder_name)
        print(f"✓ Extracted: {zip_path} -> {folder_name}")
    except FileNotFoundError:
        print(f"✗ File not found: {zip_path}")
    except zipfile.BadZipFile:
        print(f"✗ Invalid zip file: {zip_path}")
    except Exception as e:
        print(f"✗ Error extracting {zip_path}: {str(e)}")

✓ Extracted: Green Space & Income/data/98-401-X2021007_eng_CSV.zip -> Green Space & Income/data/98-401-X2021007_eng_CSV
✓ Extracted: Green Space & Income/data/Green Spaces - 4326.zip -> Green Space & Income/data/Green Spaces - 4326
✓ Extracted: Green Space & Income/data/lcsd000b21a_e.zip -> Green Space & Income/data/lcsd000b21a_e
✓ Extracted: Green Space & Income/data/lcsd000b21a_e.zip -> Green Space & Income/data/lcsd000b21a_e
✓ Extracted: Green Space & Income/data/lct_000b21a_e.zip -> Green Space & Income/data/lct_000b21a_e
✓ Extracted: Green Space & Income/data/Neighbourhoods - 4326.zip -> Green Space & Income/data/Neighbourhoods - 4326
✓ Extracted: Green Space & Income/data/Parks and Recreation Facilities - 4326.zip -> Green Space & Income/data/Parks and Recreation Facilities - 4326
✓ Extracted: Green Space & Income/data/lct_000b21a_e.zip -> Green Space & Income/data/lct_000b21a_e
✓ Extracted: Green Space & Income/data/Neighbourhoods - 4326.zip -> Green Space & Income/data/Neighbou

In [3]:
# Verify the extraction by listing the created folders
print("\nExtracted folders:")
for zip_path in zip_files:
    folder_name = os.path.splitext(zip_path)[0]
    if os.path.exists(folder_name):
        files_in_folder = os.listdir(folder_name)
        print(f"\n{folder_name}:")
        print(f"  Contains {len(files_in_folder)} file(s)/folder(s)")
        for file in files_in_folder[:5]:  # Show first 5 items
            print(f"    - {file}")
        if len(files_in_folder) > 5:
            print(f"    ... and {len(files_in_folder) - 5} more")


Extracted folders:

Green Space & Income/data/98-401-X2021007_eng_CSV:
  Contains 4 file(s)/folder(s)
    - 98-401-X2021007_English_meta.txt
    - 98-401-X2021007_English_CSV_data.csv
    - 98-401-X2021007_Geo_starting_row.CSV
    - README_meta.txt

Green Space & Income/data/Green Spaces - 4326:
  Contains 6 file(s)/folder(s)
    - Green Spaces - 4326.shp
    - Green Spaces - 4326.dbf
    - Green Spaces - 4326.prj
    - Green Spaces fields.csv
    - Green Spaces - 4326.shx
    ... and 1 more

Green Space & Income/data/lcsd000b21a_e:
  Contains 5 file(s)/folder(s)
    - lcsd000b21a_e.shp
    - lcsd000b21a_e.prj
    - lcsd000b21a_e.xml
    - lcsd000b21a_e.dbf
    - lcsd000b21a_e.shx

Green Space & Income/data/lct_000b21a_e:
  Contains 5 file(s)/folder(s)
    - lct_000b21a_e.dbf
    - lct_000b21a_e.prj
    - lct_000b21a_e.xml
    - lct_000b21a_e.shx
    - lct_000b21a_e.shp

Green Space & Income/data/Neighbourhoods - 4326:
  Contains 6 file(s)/folder(s)
    - Neighbourhoods fields.csv
   