In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import requests
import pandas as pd
import json

def fetch_hospital_data(bbox):
    """
    Fetch hospital data from Overpass API
    :param bbox: Bounding box in format (south, west, north, east)
    :return: List of dictionaries containing hospital data
    """
    overpass_url = "https://overpass-api.de/api/interpreter"

    overpass_query = f"""
    [out:json][timeout:300];
    (
      node["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      way["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      relation["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
    );
    out center;
    """

    response = requests.get(overpass_url, params={'data': overpass_query})
    data = response.json()

    hospitals = []
    for element in data['elements']:
        if 'tags' in element and 'name' in element['tags']:
            hospital = {
                'name': element['tags']['name'],
                'latitude': element.get('lat', element.get('center', {}).get('lat')),
                'longitude': element.get('lon', element.get('center', {}).get('lon')),
                'address': element['tags'].get('addr:full', 'N/A')
            }
            hospitals.append(hospital)

    return hospitals

def main():
    # Define the bounding box (south, west, north, east)
    # This example covers the entire USA
    bbox = (24.396308, -125.000000, 49.384358, -66.934570)

    print("Fetching hospital data...")
    hospitals = fetch_hospital_data(bbox)

    print(f"Found {len(hospitals)} hospitals")

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(hospitals)
    df.to_csv('hospitals.csv', index=False)

    print("Done. Hospital data saved in hospitals.csv")

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import box, Point

# Load the patches data
patches_df = pd.read_csv(r"C:\Users\GradStudent\OneDrive\Documents\Madhu RA\area\DeKalb_Illinois_zipcode_60115_patches_info.csv")

# Create a geometry column for patches (bounding boxes)
patches_df['geometry'] = patches_df.apply(
    lambda row: box(row['left'], row['bottom'], row['right'], row['top']),
    axis=1
)

# Convert patches DataFrame to GeoDataFrame
patches_gdf = gpd.GeoDataFrame(patches_df, geometry='geometry')

# Set the Coordinate Reference System (CRS) to WGS84 (EPSG:4326)
patches_gdf.set_crs(epsg=4326, inplace=True)

# Load the hospitals data
hospitals_df = pd.read_csv(r"C:\Users\GradStudent\Documents\Madhu RA\ALL\hospitals_dekalb_60115.csv")

# Create a geometry column for hospitals (point locations)
hospitals_df['geometry'] = hospitals_df.apply(
    lambda row: Point(row['longitude'], row['latitude']),
    axis=1
)

# Convert hospitals DataFrame to GeoDataFrame
hospitals_gdf = gpd.GeoDataFrame(hospitals_df, geometry='geometry')

# Set the CRS to WGS84 (EPSG:4326)
hospitals_gdf.set_crs(epsg=4326, inplace=True)

# Perform a spatial join to find which hospitals are within which patches
joined_gdf = gpd.sjoin(hospitals_gdf, patches_gdf, how='inner', predicate='within')

# Get a list of unique filenames of patches that contain hospitals
patches_with_hospitals = joined_gdf['filename'].unique()

# Filter the patches GeoDataFrame to include only patches with hospitals
patches_with_hospitals_gdf = patches_gdf[patches_gdf['filename'].isin(patches_with_hospitals)]

# Save the result to a new CSV file
output_columns = ['filename', 'left', 'bottom', 'right', 'top', 'zoom', 'pixel_width', 'pixel_height']
patches_with_hospitals_gdf[output_columns].to_csv('patches_with_hospitals.csv', index=False)

print("CSV file 'patches_with_hospitals.csv' has been created with patches containing hospitals.")


In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import time

def fetch_building_data(bbox):
    """
    Fetch building data from Overpass API
    :param bbox: Bounding box in format [south, west, north, east]
    :return: List of dictionaries containing building data
    """
    overpass_url = "https://overpass-api.de/api/interpreter"

    overpass_query = f"""
    [out:json][timeout:300];
    (
      way["building"="commercial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      way["building"="residential"]["building:levels">="4"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      way["building"="apartments"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      way["building"="office"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      relation["building"="commercial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      relation["building"="residential"]["building:levels">="4"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      relation["building"="apartments"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
      relation["building"="office"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});
    );
    out center;
    """

    max_retries = 3
    retry_delay = 5  # seconds

    for attempt in range(max_retries):
        try:
            response = requests.get(overpass_url, params={'data': overpass_query})
            response.raise_for_status()  # Raise an exception for bad status codes

            # Try to decode JSON
            try:
                data = response.json()
            except requests.exceptions.JSONDecodeError:
                print(f"Received non-JSON response (attempt {attempt + 1}):")
                print(response.text[:1000])  # Print first 1000 characters of the response
                if attempt < max_retries - 1:
                    print(f"Retrying in {retry_delay} seconds...")
                    time.sleep(retry_delay)
                    continue
                else:
                    raise

            buildings = []
            for element in data['elements']:
                if 'tags' in element:
                    building = {
                        'name': element['tags'].get('name', 'N/A'),
                        'latitude': element.get('center', {}).get('lat'),
                        'longitude': element.get('center', {}).get('lon'),
                        'address': element['tags'].get('addr:full', 'N/A'),
                        'levels': element['tags'].get('building:levels', 'N/A'),
                        'type': element['tags'].get('building', 'N/A')
                    }
                    buildings.append(building)

            return buildings

        except requests.exceptions.RequestException as e:
            print(f"Request failed (attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached. Could not fetch data.")
                return []

def main():
    zipcode = "60115"  # DeKalb, Illinois

    # Approximate bounding box for DeKalb, IL (60115)
    # Format: [south, west, north, east]
    bbox = [41.885, -88.800, 41.995, -88.700]

    print("Fetching building data...")
    buildings = fetch_building_data(bbox)

    if not buildings:
        print("No building data fetched. Exiting.")
        return

    print(f"Found {len(buildings)} buildings")

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(buildings)
    csv_filename = f'buildings_dekalb_{zipcode}.csv'
    df.to_csv(csv_filename, index=False)

    print(f"Done. Building data saved in {csv_filename}")

    # Create a GeoDataFrame for visualization
    gdf = gpd.GeoDataFrame(
        df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326"
    )

    # Plot the buildings
    fig, ax = plt.subplots(figsize=(12, 12))

    # Plot different types of buildings with different colors
    colors = {'commercial': 'blue', 'residential': 'green', 'apartments': 'red', 'office': 'purple'}
    for building_type, color in colors.items():
        subset = gdf[gdf['type'] == building_type]
        subset.plot(ax=ax, color=color, alpha=0.7, markersize=50, label=building_type)

    # Add labels
    for idx, row in gdf.iterrows():
        if row['name'] != 'N/A':
            ax.annotate(row['name'], xy=(row.longitude, row.latitude), xytext=(3, 3),
                        textcoords="offset points", fontsize=8)

    # Set the extent of the plot to our bounding box
    ax.set_xlim(bbox[1], bbox[3])
    ax.set_ylim(bbox[0], bbox[2])

    # Set a fixed aspect ratio
    ax.set_aspect(1.5)  # Adjust this value if needed to get a good looking map

    ax.set_title(f"High, Commercial, and Multi-Residential Buildings in DeKalb, IL ({zipcode})")
    ax.legend()
    plt.tight_layout()
    plt.savefig(f'buildings_dekalb_{zipcode}.png', dpi=300, bbox_inches='tight')
    print(f"Map saved as buildings_dekalb_{zipcode}.png")

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import box, Point

# Load the patches data
patches_df = pd.read_csv(r"C:\Users\GradStudent\OneDrive\Documents\Madhu RA\area\DeKalb_Illinois_zipcode_60115_patches_info.csv")

# Create a geometry column for patches (bounding boxes)
patches_df['geometry'] = patches_df.apply(
    lambda row: box(row['left'], row['bottom'], row['right'], row['top']),
    axis=1
)

# Convert patches DataFrame to GeoDataFrame
patches_gdf = gpd.GeoDataFrame(patches_df, geometry='geometry')

# Set the Coordinate Reference System (CRS) to WGS84 (EPSG:4326)
patches_gdf.set_crs(epsg=4326, inplace=True)

# Load the buildings data
buildings_df = pd.read_csv(r"C:\Users\GradStudent\Documents\Madhu RA\ALL\buildings_dekalb_60115.csv")

# Create a geometry column for buildings (point locations)
buildings_df['geometry'] = buildings_df.apply(
    lambda row: Point(row['longitude'], row['latitude']),
    axis=1
)

# Convert buildings DataFrame to GeoDataFrame
buildings_gdf = gpd.GeoDataFrame(buildings_df, geometry='geometry')

# Set the CRS to WGS84 (EPSG:4326)
buildings_gdf.set_crs(epsg=4326, inplace=True)

# Function to categorize buildings
def categorize_building(row):
    if row['levels'] != 'N/A' and int(row['levels']) >= 4:
        return 'high'
    elif row['type'] == 'commercial':
        return 'commercial'
    elif row['type'] in ['residential', 'apartments']:
        return 'multi_residential'
    else:
        return 'other'

# Categorize buildings
buildings_gdf['category'] = buildings_gdf.apply(categorize_building, axis=1)

# Perform a spatial join to find which buildings are within which patches
joined_gdf = gpd.sjoin(buildings_gdf, patches_gdf, how='inner', predicate='within')

# Function to save patches for a specific category
def save_patches_for_category(category):
    patches_with_category = joined_gdf[joined_gdf['category'] == category]['filename'].unique()
    patches_with_category_gdf = patches_gdf[patches_gdf['filename'].isin(patches_with_category)]

    output_columns = ['filename', 'left', 'bottom', 'right', 'top', 'zoom', 'pixel_width', 'pixel_height']
    output_filename = f'patches_with_{category}_buildings.csv'
    patches_with_category_gdf[output_columns].to_csv(output_filename, index=False)
    print(f"CSV file '{output_filename}' has been created with patches containing {category} buildings.")

# Save patches for each category
save_patches_for_category('high')
save_patches_for_category('commercial')
save_patches_for_category('multi_residential')

print("Processing complete. Three CSV files have been created for patches containing high, commercial, and multi-residential buildings.")

In [None]:
import requests
import pandas as pd
import time
import math

def fetch_building_data(bbox, building_type):
    """
    Fetch building and structure data from Overpass API for a specific type
    :param bbox: Bounding box in format [south, west, north, east]
    :param building_type: Type of building to fetch
    :return: List of dictionaries containing building and structure data
    """
    overpass_url = "https://overpass-api.de/api/interpreter"

    query_parts = {
        'commercial': f'way["building"="commercial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="commercial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'high': f'way["building"]["building:levels"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"]["building:levels"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'hospital': f'node["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});way["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'industrial': f'way["building"="industrial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="industrial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'multi': f'way["building"="apartments"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="apartments"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'school': f'node["amenity"="school"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});way["amenity"="school"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["amenity"="school"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'single': f'way["building"="house"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="house"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});'
    }

    overpass_query = f"""
    [out:json][timeout:300];
    ({query_parts[building_type]});
    out center;
    """

    max_retries = 3
    retry_delay = 5  # seconds

    for attempt in range(max_retries):
        try:
            response = requests.get(overpass_url, params={'data': overpass_query})
            response.raise_for_status()  # Raise an exception for bad status codes

            data = response.json()

            buildings = []
            for element in data['elements']:
                if 'tags' in element:
                    building = {
                        'name': element['tags'].get('name', 'N/A'),
                        'latitude': element.get('lat', element.get('center', {}).get('lat')),
                        'longitude': element.get('lon', element.get('center', {}).get('lon')),
                        'address': element['tags'].get('addr:full', 'N/A'),
                        'levels': element['tags'].get('building:levels', 'N/A'),
                        'amenity': element['tags'].get('amenity', 'N/A'),
                        'building': element['tags'].get('building', 'N/A'),
                        'type': building_type
                    }
                    if building_type == 'high':
                        try:
                            if building['levels'] != 'N/A' and math.floor(float(building['levels'])) >= 4:
                                buildings.append(building)
                        except ValueError:
                            # If we can't convert the level to a number, skip this building
                            continue
                    else:
                        buildings.append(building)

            return buildings

        except requests.exceptions.RequestException as e:
            print(f"Request failed (attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached. Could not fetch data.")
                print(f"Failed query: {overpass_query}")
                return []

def main():
    # Define a bounding box for Illinois
    # Format: [south, west, north, east]
    bbox = [36.9701, -91.5131, 42.5083, -87.0199]

    building_types = ['commercial', 'high', 'hospital', 'industrial', 'multi', 'school', 'single']

    all_buildings = []

    for building_type in building_types:
        print(f"Fetching {building_type} buildings and structures...")
        buildings = fetch_building_data(bbox, building_type)
        all_buildings.extend(buildings)
        print(f"Found {len(buildings)} {building_type} buildings and structures")
        time.sleep(2)  # Add a delay between requests to avoid overwhelming the API

    if not all_buildings:
        print("No data fetched. Exiting.")
        return

    print(f"\nTotal buildings and structures found: {len(all_buildings)}")

    # Create a DataFrame
    df = pd.DataFrame(all_buildings)

    # Save to CSV
    csv_filename = '/content/drive/MyDrive/Madhu RA Work Folder/xl/buildings_and_structures_il.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Done. Data saved in {csv_filename}")

    # Print summary
    category_counts = df['type'].value_counts()
    print("\nSummary of categories:")
    print(category_counts)

if __name__ == "__main__":
    main()

Fetching commercial buildings and structures...
Found 16645 commercial buildings and structures
Fetching high buildings and structures...
Found 6985 high buildings and structures
Fetching hospital buildings and structures...
Found 387 hospital buildings and structures
Fetching industrial buildings and structures...
Found 11772 industrial buildings and structures
Fetching multi buildings and structures...
Found 25917 multi buildings and structures
Fetching school buildings and structures...
Found 8757 school buildings and structures
Fetching single buildings and structures...
Found 513720 single buildings and structures

Total buildings and structures found: 584183
Done. Data saved in /content/drive/MyDrive/Madhu RA Work Folder/xl/buildings_and_structures_il.csv

Summary of categories:
type
single        513720
multi          25917
commercial     16645
industrial     11772
school          8757
high            6985
hospital         387
Name: count, dtype: int64


In [None]:
import requests
import pandas as pd
import time
import math

def fetch_building_data(bbox, building_type):
    """
    Fetch building and structure data from Overpass API for a specific type
    :param bbox: Bounding box in format [south, west, north, east]
    :param building_type: Type of building to fetch
    :return: List of dictionaries containing building and structure data
    """
    overpass_url = "https://overpass-api.de/api/interpreter"

    query_parts = {
        'commercial': f'way["building"="commercial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="commercial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'high': f'way["building"]["building:levels"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"]["building:levels"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'hospital': f'node["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});way["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["amenity"="hospital"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'industrial': f'way["building"="industrial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="industrial"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'multi': f'way["building"="apartments"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="apartments"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'school': f'node["amenity"="school"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});way["amenity"="school"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["amenity"="school"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});',
        'single': f'way["building"="house"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});relation["building"="house"]({bbox[0]},{bbox[1]},{bbox[2]},{bbox[3]});'
    }

    overpass_query = f"""
    [out:json][timeout:300];
    ({query_parts[building_type]});
    out center;
    """

    max_retries = 3
    retry_delay = 5  # seconds

    for attempt in range(max_retries):
        try:
            response = requests.get(overpass_url, params={'data': overpass_query})
            response.raise_for_status()  # Raise an exception for bad status codes

            data = response.json()

            buildings = []
            for element in data['elements']:
                if 'tags' in element:
                    building = {
                        'name': element['tags'].get('name', 'N/A'),
                        'latitude': element.get('lat', element.get('center', {}).get('lat')),
                        'longitude': element.get('lon', element.get('center', {}).get('lon')),
                        'address': element['tags'].get('addr:full', 'N/A'),
                        'levels': element['tags'].get('building:levels', 'N/A'),
                        'type': building_type
                    }
                    if building_type == 'high':
                        try:
                            if building['levels'] != 'N/A' and math.floor(float(building['levels'])) >= 4:
                                building['type'] = 'High Building'
                                buildings.append(building)
                        except ValueError:
                            # If we can't convert the level to a number, skip this building
                            continue
                    else:
                        buildings.append(building)

            return buildings

        except requests.exceptions.RequestException as e:
            print(f"Request failed (attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached. Could not fetch data.")
                print(f"Failed query: {overpass_query}")
                return []

def main():
    # Define a bounding box for Illinois
    # Format: [south, west, north, east]
    bbox = [36.9701, -91.5131, 42.5083, -87.0199]

    building_types = ['commercial', 'high', 'hospital', 'industrial', 'multi', 'school', 'single']

    all_buildings = []

    for building_type in building_types:
        print(f"Fetching {building_type} buildings and structures...")
        buildings = fetch_building_data(bbox, building_type)
        all_buildings.extend(buildings)
        print(f"Found {len(buildings)} {building_type} buildings and structures")
        time.sleep(2)  # Add a delay between requests to avoid overwhelming the API

    if not all_buildings:
        print("No data fetched. Exiting.")
        return

    print(f"\nTotal buildings and structures found: {len(all_buildings)}")

    # Create a DataFrame
    df = pd.DataFrame(all_buildings)

    # Select and rename columns
    df = df[['name', 'latitude', 'longitude', 'address', 'levels', 'type']]
    df['type'] = df['type'].replace({'high': 'High Building', 'multi': 'Multi', 'single': 'Single'})
    df['type'] = df['type'].str.capitalize()

    # Replace 'N/A' with '0' in the 'levels' column
    df['levels'] = df['levels'].replace('N/A', '0')

    # Save to CSV
    csv_filename = '/content/drive/MyDrive/Madhu RA Work Folder/xl/buildings_and_structures_illinois.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Done. Data saved in {csv_filename}")

    # Print summary
    category_counts = df['type'].value_counts()
    print("\nSummary of categories:")
    print(category_counts)

    # Print the first few rows
    print("\nFirst few rows of the data:")
    print(df.head().to_string(index=False))

if __name__ == "__main__":
    main()

Fetching commercial buildings and structures...
Found 16649 commercial buildings and structures
Fetching high buildings and structures...
Found 6985 high buildings and structures
Fetching hospital buildings and structures...
Found 387 hospital buildings and structures
Fetching industrial buildings and structures...
Found 11772 industrial buildings and structures
Fetching multi buildings and structures...
Found 25929 multi buildings and structures
Fetching school buildings and structures...
Found 8757 school buildings and structures
Fetching single buildings and structures...
Found 514322 single buildings and structures

Total buildings and structures found: 584801
Done. Data saved in /content/drive/MyDrive/Madhu RA Work Folder/xl/buildings_and_structures_illinois.csv

Summary of categories:
type
Single           514322
Multi             25929
Commercial        16649
Industrial        11772
School             8757
High building      6985
Hospital            387
Name: count, dtype: int64

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import box, Point

# Load the patches data
patches_df = pd.read_csv(r"/content/drive/MyDrive/Madhu RA Work Folder/CSV/cook/Cook_Illinois_zipcode_60607_patches_info.csv")

# Create a geometry column for patches (bounding boxes)
patches_df['geometry'] = patches_df.apply(
    lambda row: box(row['left'], row['bottom'], row['right'], row['top']),
    axis=1
)

# Convert patches DataFrame to GeoDataFrame
patches_gdf = gpd.GeoDataFrame(patches_df, geometry='geometry')

# Set the Coordinate Reference System (CRS) to WGS84 (EPSG:4326)
patches_gdf.set_crs(epsg=4326, inplace=True)

# Load the buildings data
buildings_df = pd.read_csv(r'/content/drive/MyDrive/Madhu RA Work Folder/xl/buildings_and_structures_illinois.csv')

# Create a geometry column for buildings (point locations)
buildings_df['geometry'] = buildings_df.apply(
    lambda row: Point(row['longitude'], row['latitude']),
    axis=1
)

# Convert buildings DataFrame to GeoDataFrame
buildings_gdf = gpd.GeoDataFrame(buildings_df, geometry='geometry')

# Set the CRS to WGS84 (EPSG:4326)
buildings_gdf.set_crs(epsg=4326, inplace=True)
# def categorize_building(building):
#     if building['amenity'] == 'hospital':
#         return 'Hospital'
#     elif building['amenity'] == 'school':
#         return 'Schools'
#     elif building['building'] == 'commercial':
#         return 'Commercial'
#     elif building['building'] == 'industrial':
#         return 'Industrial'
#     elif building['building'] == 'apartments':
#         return 'Multi'
#     elif building['building'] == 'house':
#         return 'Single'
#     elif building['levels'] != 'N/A' and int(building['levels']) >= 4:
#         return 'High'
#     else:
#         return 'Other'
def categorize_building(row):
    if row['type'] == 'Hospital':
        return 'Hospital'
    elif row['type'] == 'school':
        return 'Schools'
    elif row['type'] == 'Commercial':
        return 'Commercial'
    elif row['type'] == 'Industrial':
        return 'Industrial'
    elif row['type'] == 'Multi':
        return 'Multi'
    elif row['type'] == 'Single':
        return 'Single'
    elif row['levels'] != 'N/A' and float(row['levels']) >= 4:
        return 'High'
    else:
        return 'Other'

# Categorize buildings
buildings_gdf['category'] = buildings_gdf.apply(categorize_building, axis=1)

# Perform a spatial join to find which buildings are within which patches
joined_gdf = gpd.sjoin(buildings_gdf, patches_gdf, how='inner', predicate='within')

# Function to save patches for a specific category
def save_patches_for_category(category):
    patches_with_category = joined_gdf[joined_gdf['category'] == category]['filename'].unique()
    patches_with_category_gdf = patches_gdf[patches_gdf['filename'].isin(patches_with_category)]

    output_columns = ['filename', 'left', 'bottom', 'right', 'top', 'zoom', 'pixel_width', 'pixel_height']
    output_filename = f'/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_{category.lower()}_buildings_illinois.csv'
    patches_with_category_gdf[output_columns].to_csv(output_filename, index=False)
    print(f"CSV file '{output_filename}' has been created with patches containing {category} buildings.")

# Save patches for each category
categories = ['Commercial', 'High', 'Hospital', 'Industrial', 'Multi', 'Schools', 'Single']
for category in categories:
    save_patches_for_category(category)

print(f"Processing complete. {len(categories)} CSV files have been created for patches containing different types of buildings in Illinois.")

# Print summary of buildings in each category
category_counts = buildings_gdf['category'].value_counts()
print("\nSummary of building categories:")
print(category_counts)

  buildings_df = pd.read_csv(r'/content/drive/MyDrive/Madhu RA Work Folder/xl/buildings_and_structures_illinois.csv')


CSV file '/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_commercial_buildings_illinois.csv' has been created with patches containing Commercial buildings.
CSV file '/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_high_buildings_illinois.csv' has been created with patches containing High buildings.
CSV file '/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_hospital_buildings_illinois.csv' has been created with patches containing Hospital buildings.
CSV file '/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_industrial_buildings_illinois.csv' has been created with patches containing Industrial buildings.
CSV file '/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_multi_buildings_illinois.csv' has been created with patches containing Multi buildings.
CSV file '/content/drive/MyDrive/Madhu RA Work Folder/xl/cook/60607/patches_with_schools_buildings_illinois.csv' has been created w