In [2]:
%pip install geopandas requests shapely pandas tqdm




In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [1]:
# Run this cell to install required packages
%pip install segment-geospatial geopandas folium ipywidgets google-auth-oauthlib google-auth-httplib2 google-api-python-client matplotlib descartes rasterio overpy segment-geospatial


Collecting segment-geospatial
  Downloading segment_geospatial-0.12.2-py2.py3-none-any.whl.metadata (11 kB)
Collecting descartes
  Downloading descartes-1.1.0-py3-none-any.whl.metadata (2.4 kB)
Collecting rasterio
  Downloading rasterio-1.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting overpy
  Downloading overpy-0.7-py3-none-any.whl.metadata (3.5 kB)
Collecting fiona (from segment-geospatial)
  Downloading fiona-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (56 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.6/56.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
Collecting ipympl (from segment-geospatial)
  Downloading ipympl-0.9.4-py3-none-any.whl.metadata (8.7 kB)
Collecting leafmap (from segment-geospatial)
  Downloading leafmap-0.38.16-py2.py3-none-any.whl.metadata (16 kB)
Collecting localtileserver (from segment-geospatial)
  Downloading localtileserver-0.10.5-py3-none-any.whl.metadata (

In [None]:
import geopandas as gpd
import requests
import json
from shapely.geometry import Polygon, MultiPolygon
import pandas as pd
import time
import random
from tqdm import tqdm

# Define the mapping of building types to classes
building_type_mapping = {
    # Residential
    'apartments': 'Multi',
    'residential': 'Multi',
    'house': 'Single',
    'detached': 'Single',
    'terrace': 'Multi',
    'semidetached_house': 'Single',
    'bungalow': 'Single',
    'farm': 'Single',
    'cabin': 'Single',
    # Commercial
    'commercial': 'Commercial',
    'retail': 'Commercial',
    'office': 'Commercial',
    'supermarket': 'Commercial',
    'hotel': 'Commercial',
    'mall': 'Commercial',
    'kiosk': 'Commercial',
    'shop': 'Commercial',
    'store': 'Commercial',
    'bank': 'Commercial',
    'restaurant': 'Commercial',
    'bar': 'Commercial',
    'cafe': 'Commercial',
    'barber_shop': 'Commercial',
    # Industrial
    'industrial': 'Industrial',
    'warehouse': 'Industrial',
    'manufacture': 'Industrial',
    'factory': 'Industrial',
    'depot': 'Industrial',
    'power_station': 'Industrial',
    'refinery': 'Industrial',
    'mining': 'Industrial',
    'mill': 'Industrial',
    'shipyard': 'Industrial',
    # Educational
    'school': 'Schools',
    'university': 'Schools',
    'college': 'Schools',
    'kindergarten': 'Schools',
    'academy': 'Schools',
    'institute': 'Schools',
    'library': 'Schools',
    'research_institute': 'Schools',
    # Healthcare
    'hospital': 'Hospital',
    'clinic': 'Hospital',
    'healthcare': 'Hospital',
    'medical_center': 'Hospital',
    'nursing_home': 'Hospital',
    # High-rise Buildings
    'highrise': 'High',
    'tower': 'High',
    'skyscraper': 'High',
    'high-rise': 'High',
    'office_tower': 'High',
    'residential_tower': 'High',
    # Add more mappings as needed
}

def classify_building(tags):
    """
    Classifies a building into one of the target classes based on tags and other attributes.
    """
    building_type = tags.get('building', '').lower()
    building_levels = tags.get('building:levels')
    building_height = tags.get('height')
    building_use = tags.get('building:use', '').lower()

    # Direct mapping based on building_type
    if building_type in building_type_mapping:
        return building_type_mapping[building_type]

    # Handling 'public' building_type
    if building_type == 'public':
        # Use building_use or building_levels to refine classification
        if building_use:
            if 'school' in building_use or 'education' in building_use:
                return 'Schools'
            elif 'hospital' in building_use or 'healthcare' in building_use:
                return 'Hospital'
            elif 'government' in building_use or 'office' in building_use:
                return 'Commercial'  # Assuming government offices are classified as Commercial
            else:
                return 'Commercial'  # Default to Commercial
        else:
            return 'Commercial'  # Default to Commercial

    # Handling based on building_levels or building_height for 'High' class
    if building_levels:
        try:
            levels = int(building_levels)
            if levels >= 5:
                return 'High'
            elif levels >= 2:
                return 'Multi'
            else:
                return 'Single'
        except ValueError:
            pass  # If building_levels is not an integer, ignore
    elif building_height:
        try:
            # Remove any units (e.g., 'm' for meters)
            height_str = str(building_height).replace('m', '').strip()
            height = float(height_str)
            if height >= 15:  # Approximate height for a high-rise
                return 'High'
            elif height >= 7:
                return 'Multi'
            else:
                return 'Single'
        except ValueError:
            pass  # If building_height is not a float, ignore

    # Default classification
    return 'Single'  # Default to 'Single' if no other classification applies

def get_zip_polygon(zip_code, zcta_gdf):
    """
    Retrieves the polygon geometry for a given ZIP code from the GeoDataFrame.
    """
    zip_code = str(zip_code).zfill(5)
    zip_area = zcta_gdf[zcta_gdf['ZCTA5CE20'] == zip_code]

    if zip_area.empty:
        return None  # ZIP code not found

    return zip_area.geometry.values[0]

def get_poly_coords(polygon):
    """
    Extracts coordinates from a Polygon or MultiPolygon object.
    """
    coords = []
    if isinstance(polygon, Polygon):
        exterior = polygon.exterior.coords
        coords.extend([(y, x) for x, y in exterior])
    elif isinstance(polygon, MultiPolygon):
        for poly in polygon.geoms:
            exterior = poly.exterior.coords
            coords.extend([(y, x) for x, y in exterior])
    else:
        raise ValueError("Geometry must be a Polygon or MultiPolygon.")
    return coords

def fetch_buildings_for_zip(zip_code, zcta_gdf, overpass_url, query_template, max_retries=3, delay=1):
    """
    Fetches building data for a specific ZIP code using Overpass API.
    """
    zip_polygon = get_zip_polygon(zip_code, zcta_gdf)
    if not zip_polygon:
        print(f"ZIP code {zip_code} not found. Skipping.")
        return []

    coords = get_poly_coords(zip_polygon)
    poly_string = ' '.join(['{} {}'.format(lat, lon) for lat, lon in coords])
    query = f'''
    [out:json][timeout:25];
    (
      way(poly:"{poly_string}") [building];
    );
    out geom;
    '''

    for attempt in range(max_retries):
        try:
            response = requests.post(overpass_url, data={'data': query})
            if response.status_code == 200:
                data = response.json()
                return data.get('elements', [])
            else:
                print(f"Overpass API request failed with status code {response.status_code}. Retrying...")
        except Exception as e:
            print(f"Exception during Overpass API request: {e}. Retrying...")
        time.sleep(delay * (attempt + 1))  # Exponential backoff
    print(f"Failed to fetch data for ZIP code {zip_code} after {max_retries} attempts.")
    return []

def process_building_element(element):
    """
    Processes a single building element from Overpass API response.
    """
    building_id = element.get('id')
    tags = element.get('tags', {})
    building_type = tags.get('building')  # Extract the building type
    building_levels = tags.get('building:levels')
    building_material = tags.get('building:material')
    building_height = tags.get('height')
    building_use = tags.get('building:use')

    # Classify the building
    building_class = classify_building(tags)

    if 'geometry' in element:
        coords = [(node['lon'], node['lat']) for node in element['geometry']]
        try:
            poly = Polygon(coords)
            minx, miny, maxx, maxy = poly.bounds  # Bounding box coordinates

            # Define the four corner points of the bounding box
            corner1 = {'lat': miny, 'lon': minx}  # Bottom-left
            corner2 = {'lat': miny, 'lon': maxx}  # Bottom-right
            corner3 = {'lat': maxy, 'lon': maxx}  # Top-right
            corner4 = {'lat': maxy, 'lon': minx}  # Top-left

            return {
                'building_id': building_id,
                'building_type': building_type,  # Store the building type
                'building_class': building_class,  # Store the building class
                'corner1_lat': corner1['lat'],
                'corner1_lon': corner1['lon'],
                'corner2_lat': corner2['lat'],
                'corner2_lon': corner2['lon'],
                'corner3_lat': corner3['lat'],
                'corner3_lon': corner3['lon'],
                'corner4_lat': corner4['lat'],
                'corner4_lon': corner4['lon'],
                'building_levels': building_levels,
                'building_material': building_material,
                'building_height': building_height,
                'building_use': building_use,
            }
        except Exception as e:
            print(f"Error creating polygon for element ID {building_id}: {e}")
    else:
        print(f"No geometry found for element ID {building_id}")

    return None  # Return None if processing fails

def collect_building_data():
    # Define constants
    OVERPASS_URL = 'http://overpass-api.de/api/interpreter'
    SHAPEFILE_PATH = "/content/drive/MyDrive/Madhu RA Work Folder/Zip/tl_2022_us_zcta520.shp"
    OUTPUT_CSV_PATH = '/content/drive/MyDrive/Madhu RA Work Folder/newdata/Buildings_USA.csv'
    DESIRED_COUNT_PER_CLASS = 2000
    TARGET_CLASSES = ['Single', 'Multi', 'Commercial', 'Industrial', 'Schools', 'Hospital', 'High']
    MAX_RETRIES = 3
    REQUEST_DELAY = 1

    # Load the ZIP code shapefile
    print("Loading ZIP code shapefile...")
    zcta_gdf = gpd.read_file(SHAPEFILE_PATH)
    print(f"Total ZIP codes loaded: {len(zcta_gdf)}")

    # Extract all ZIP codes
    all_zip_codes = zcta_gdf['ZCTA5CE20'].tolist()
    print("Total ZIP codes to process:", len(all_zip_codes))

    # Shuffle ZIP codes to ensure random distribution across states
    random.shuffle(all_zip_codes)

    # Initialize data storage
    building_data = []
    class_counts = {cls: 0 for cls in TARGET_CLASSES}

    # Iterate over ZIP codes
    for zip_code in tqdm(all_zip_codes, desc="Processing ZIP codes"):
        if all(count >= DESIRED_COUNT_PER_CLASS for count in class_counts.values()):
            print("Desired number of buildings per class reached. Stopping data collection.")
            break

        elements = fetch_buildings_for_zip(
            zip_code=zip_code,
            zcta_gdf=zcta_gdf,
            overpass_url=OVERPASS_URL,
            query_template=None,  # Not used in current implementation
            max_retries=MAX_RETRIES,
            delay=REQUEST_DELAY
        )
        if not elements:
            continue  # Skip if no data fetched

        for element in elements:
            building = process_building_element(element)
            if building:
                cls = building['building_class']
                if cls in TARGET_CLASSES and class_counts[cls] < DESIRED_COUNT_PER_CLASS:
                    building_data.append(building)
                    class_counts[cls] += 1

                    # Check if the class has reached the desired count
                    if class_counts[cls] >= DESIRED_COUNT_PER_CLASS:
                        print(f"Reached desired count for class: {cls}")

                # Optional: Stop early if all classes are filled
                if all(count >= DESIRED_COUNT_PER_CLASS for count in class_counts.values()):
                    break

        # Respect API rate limits
        time.sleep(REQUEST_DELAY + random.uniform(0, 1))  # Randomize delay to avoid patterns

    print("Data collection completed.")
    print("Building counts per class:")
    for cls, count in class_counts.items():
        print(f"{cls}: {count}")

    # Convert to DataFrame
    df = pd.DataFrame(building_data)

    # Save to CSV
    df.to_csv(OUTPUT_CSV_PATH, index=False)
    print(f"Building data saved to {OUTPUT_CSV_PATH}")
if __name__ == "__main__":
    collect_building_data()


Loading ZIP code shapefile...
Total ZIP codes loaded: 33791
Total ZIP codes to process: 33791


Processing ZIP codes:   0%|          | 1/33791 [00:01<16:59:53,  1.81s/it]

Reached desired count for class: Single


Processing ZIP codes:   0%|          | 14/33791 [00:44<36:44:00,  3.92s/it]

Reached desired count for class: Multi


Processing ZIP codes:   0%|          | 165/33791 [12:58<82:31:29,  8.84s/it]

Reached desired count for class: Commercial


Processing ZIP codes:   1%|          | 343/33791 [27:57<92:53:07, 10.00s/it]

Reached desired count for class: Schools


Processing ZIP codes:   1%|          | 385/33791 [31:48<84:40:21,  9.12s/it] 

Reached desired count for class: Industrial


Processing ZIP codes:   3%|▎         | 1045/33791 [1:28:07<33:58:29,  3.74s/it]

Reached desired count for class: High


Processing ZIP codes:   6%|▌         | 2095/33791 [2:54:29<20:57:40,  2.38s/it]

Overpass API request failed with status code 500. Retrying...
Overpass API request failed with status code 500. Retrying...
Overpass API request failed with status code 500. Retrying...


Processing ZIP codes:   6%|▌         | 2096/33791 [3:00:08<908:09:09, 103.15s/it]

Failed to fetch data for ZIP code 22202 after 3 attempts.


Processing ZIP codes:   9%|▉         | 3031/33791 [4:23:00<86:50:15, 10.16s/it]

Error creating polygon for element ID 195438334: A linearring requires at least 4 coordinates.


Processing ZIP codes:  20%|█▉        | 6625/33791 [9:21:10<20:01:14,  2.65s/it]

Error creating polygon for element ID 833587741: A linearring requires at least 4 coordinates.


Processing ZIP codes:  20%|█▉        | 6671/33791 [9:25:13<47:01:25,  6.24s/it]

Reached desired count for class: Hospital


Processing ZIP codes:  20%|█▉        | 6672/33791 [9:25:36<38:18:58,  5.09s/it]


Desired number of buildings per class reached. Stopping data collection.
Data collection completed.
Building counts per class:
Single: 2000
Multi: 2000
Commercial: 2000
Industrial: 2000
Schools: 2000
Hospital: 2000
High: 2000
Building data saved to /content/drive/MyDrive/Madhu RA Work Folder/newdata/Buildings_USA.csv


In [None]:
import geopandas as gpd
import requests
import json
from shapely.geometry import Polygon, MultiPolygon
import pandas as pd
import time
import random
from tqdm import tqdm

# Define the mapping of building types to classes
building_type_mapping = {
    # Residential
    'apartments': 'Multi',
    'residential': 'Multi',
    'house': 'Single',
    'detached': 'Single',
    'terrace': 'Multi',
    'semidetached_house': 'Single',
    'bungalow': 'Single',
    'farm': 'Single',
    'cabin': 'Single',
    # Commercial
    'commercial': 'Commercial',
    'retail': 'Commercial',
    'office': 'Commercial',
    'supermarket': 'Commercial',
    'hotel': 'Commercial',
    'mall': 'Commercial',
    'kiosk': 'Commercial',
    'shop': 'Commercial',
    'store': 'Commercial',
    'bank': 'Commercial',
    'restaurant': 'Commercial',
    'bar': 'Commercial',
    'cafe': 'Commercial',
    'barber_shop': 'Commercial',
    # Industrial
    'industrial': 'Industrial',
    'warehouse': 'Industrial',
    'manufacture': 'Industrial',
    'factory': 'Industrial',
    'depot': 'Industrial',
    'power_station': 'Industrial',
    'refinery': 'Industrial',
    'mining': 'Industrial',
    'mill': 'Industrial',
    'shipyard': 'Industrial',
    # Educational
    'school': 'Schools',
    'university': 'Schools',
    'college': 'Schools',
    'kindergarten': 'Schools',
    'academy': 'Schools',
    'institute': 'Schools',
    'library': 'Schools',
    'research_institute': 'Schools',
    # Healthcare
    'hospital': 'Hospital',
    'clinic': 'Hospital',
    'healthcare': 'Hospital',
    'medical_center': 'Hospital',
    'nursing_home': 'Hospital',
    # High-rise Buildings
    'highrise': 'High',
    'tower': 'High',
    'skyscraper': 'High',
    'high-rise': 'High',
    'office_tower': 'High',
    'residential_tower': 'High',
    # Add more mappings as needed
}

def classify_building(tags):
    building_type = tags.get('building', '').lower()
    return building_type_mapping.get(building_type, 'Single')

def get_location_info(zip_code, zcta_gdf):
    zip_code = str(zip_code).zfill(5)
    zip_area = zcta_gdf[zcta_gdf['ZCTA5CE20'] == zip_code]

    if zip_area.empty:
        return None, None, None

    state = zip_area.iloc[0]['STATE'] if 'STATE' in zip_area.columns else None
    county = zip_area.iloc[0]['COUNTY'] if 'COUNTY' in zip_area.columns else None
    return zip_code, state, county

def process_building_element(element, zip_code, zcta_gdf):
    building_id = element.get('id')
    tags = element.get('tags', {})
    building_class = classify_building(tags)

    zip_code, state, county = get_location_info(zip_code, zcta_gdf)

    if 'geometry' in element:
        coords = [(node['lon'], node['lat']) for node in element['geometry']]
        try:
            poly = Polygon(coords)
            minx, miny, maxx, maxy = poly.bounds

            corner1 = {'lat': miny, 'lon': minx}
            corner2 = {'lat': miny, 'lon': maxx}
            corner3 = {'lat': maxy, 'lon': maxx}
            corner4 = {'lat': maxy, 'lon': minx}

            return {
                'building_id': building_id,
                'building_class': building_class,
                'corner1_lat': corner1['lat'],
                'corner1_lon': corner1['lon'],
                'corner2_lat': corner2['lat'],
                'corner2_lon': corner2['lon'],
                'corner3_lat': corner3['lat'],
                'corner3_lon': corner3['lon'],
                'corner4_lat': corner4['lat'],
                'corner4_lon': corner4['lon'],
                'zip_code': zip_code,
                'state': state,
                'county': county,
            }
        except Exception as e:
            print(f"Error creating polygon for element ID {building_id}: {e}")
    else:
        print(f"No geometry found for element ID {building_id}")

    return None

def collect_building_data():
    OVERPASS_URL = 'http://overpass-api.de/api/interpreter'
    SHAPEFILE_PATH = "/content/drive/MyDrive/Madhu RA Work Folder/Zip/tl_2022_us_zcta520.shp"
    OUTPUT_CSV_PATH = '/content/drive/MyDrive/Madhu RA Work Folder/newdata/Buildings_USA_st.csv'
    DESIRED_COUNT_PER_CLASS = 2000
    TARGET_CLASSES = ['Single', 'Multi', 'Commercial', 'Industrial', 'Schools', 'Hospital', 'High']
    MAX_RETRIES = 3
    REQUEST_DELAY = 1

    print("Loading ZIP code shapefile...")
    zcta_gdf = gpd.read_file(SHAPEFILE_PATH)
    print(f"Total ZIP codes loaded: {len(zcta_gdf)}")

    all_zip_codes = zcta_gdf['ZCTA5CE20'].tolist()
    print("Total ZIP codes to process:", len(all_zip_codes))

    random.shuffle(all_zip_codes)
    building_data = []
    class_counts = {cls: 0 for cls in TARGET_CLASSES}

    for zip_code in tqdm(all_zip_codes, desc="Processing ZIP codes"):
        if all(count >= DESIRED_COUNT_PER_CLASS for count in class_counts.values()):
            print("Desired number of buildings per class reached. Stopping data collection.")
            break

        elements = fetch_buildings_for_zip(
            zip_code=zip_code,
            zcta_gdf=zcta_gdf,
            overpass_url=OVERPASS_URL,
            query_template=None,
            max_retries=MAX_RETRIES,
            delay=REQUEST_DELAY
        )
        if not elements:
            continue

        for element in elements:
            building = process_building_element(element, zip_code, zcta_gdf)
            if building:
                cls = building['building_class']
                if cls in TARGET_CLASSES and class_counts[cls] < DESIRED_COUNT_PER_CLASS:
                    building_data.append(building)
                    class_counts[cls] += 1

                if all(count >= DESIRED_COUNT_PER_CLASS for count in class_counts.values()):
                    break

        time.sleep(REQUEST_DELAY + random.uniform(0, 1))

    print("Data collection completed.")
    df = pd.DataFrame(building_data)
    df.to_csv(OUTPUT_CSV_PATH, index=False)
    print(f"Building data saved to {OUTPUT_CSV_PATH}")

if __name__ == "__main__":
    collect_building_data()


In [3]:
import os
import pandas as pd
from samgeo import tms_to_geotiff
from PIL import Image
import time
from math import cos, radians

def download_and_resize_image(image_path, bbox, zoom, source, output_size):
    try:
        #Download the image using the geographic bounding box
        tms_to_geotiff(output=image_path, bbox=bbox, zoom=zoom, source=source, overwrite=True)

        #Open the downloaded image
        with Image.open(image_path) as img:
            original_size = img.size

            #Resize the image to the desired dimensions while maintaining aspect ratio
            img_resized = img.resize(output_size, Image.LANCZOS)
            img_resized.save(image_path)

            return original_size, img_resized.size
    except Exception as e:
        print(f"Failed to download or resize image for bbox {bbox}. Error: {e}")
        return None, None

In [2]:
import os
import pandas as pd
import time
from math import radians, cos

def main():
    import os
    import pandas as pd
    import time
    from math import radians, cos

    # Required functions (assuming they are defined elsewhere in your code)
    # from your_module import download_and_resize_image

    zoom = 22  # Adjust zoom level as needed
    source = "Satellite"

    # Bounding box expansion factor (e.g., 0.1 for 10% expansion)
    bbox_expansion_factor = 0.3  # Adjust this value to control the space around the building

    # Desired image height in pixels (you can adjust this)
    desired_image_height = 500  # pixels

    df = pd.read_csv('/content/drive/MyDrive/Madhu RA Work Folder/newdata/Buildings_USA.csv')

    output_base_dir = '/content/drive/MyDrive/Madhu RA Work Folder/newdata'

    # Dictionary to keep track of counts per class
    class_counts = {}
    max_images_per_class = 2000

    # Loop over each building in the DataFrame
    for index, row in df.iterrows():
        building_id = row['building_id']
        building_class = row['building_class']
        if pd.isnull(building_class):
            continue  # Skip buildings with no class

        # Initialize count for this class if not already done
        if building_class not in class_counts:
            class_counts[building_class] = 0

        # Check if we have already downloaded 50 images for this class
        if class_counts[building_class] >= max_images_per_class:
            continue  # Skip to next building

        # Get the corner coordinates from the CSV
        corner1_lat = row['corner1_lat']
        corner1_lon = row['corner1_lon']
        corner2_lat = row['corner2_lat']
        corner2_lon = row['corner2_lon']
        corner3_lat = row['corner3_lat']
        corner3_lon = row['corner3_lon']
        corner4_lat = row['corner4_lat']
        corner4_lon = row['corner4_lon']

        # Compute min and max latitudes and longitudes from the corners
        latitudes = [corner1_lat, corner2_lat, corner3_lat, corner4_lat]
        longitudes = [corner1_lon, corner2_lon, corner3_lon, corner4_lon]
        min_lat = min(latitudes)
        max_lat = max(latitudes)
        min_lon = min(longitudes)
        max_lon = max(longitudes)

        # Calculate the width and height of the bounding box in degrees
        width_deg = max_lon - min_lon
        height_deg = max_lat - min_lat

        # Expand the bounding box by the expansion factor
        min_lon_expanded = min_lon - (width_deg * bbox_expansion_factor / 2)
        max_lon_expanded = max_lon + (width_deg * bbox_expansion_factor / 2)
        min_lat_expanded = min_lat - (height_deg * bbox_expansion_factor / 2)
        max_lat_expanded = max_lat + (height_deg * bbox_expansion_factor / 2)

        # Calculate the center latitude for distance calculations
        center_lat = (min_lat_expanded + max_lat_expanded) / 2.0

        # Convert degrees to radians for trigonometric functions
        lat_rad = radians(center_lat)

        # Approximate meters per degree latitude and longitude
        meters_per_deg_lat = 111320  # meters per degree latitude is approximately constant
        meters_per_deg_lon = 111320 * cos(lat_rad)  # varies with latitude

        # Calculate physical width and height in meters
        width_meters = (max_lon_expanded - min_lon_expanded) * meters_per_deg_lon
        height_meters = (max_lat_expanded - min_lat_expanded) * meters_per_deg_lat

        # Calculate the aspect ratio
        aspect_ratio = width_meters / height_meters

        # Calculate the desired image width based on the aspect ratio
        desired_image_width = int(desired_image_height * aspect_ratio)

        # Ensure the width is at least 1 pixel
        desired_image_width = max(desired_image_width, 1)

        # Define the output image size
        output_size = (desired_image_width, desired_image_height)

        # Create expanded bbox
        bbox = [min_lon_expanded, min_lat_expanded, max_lon_expanded, max_lat_expanded]
        print(f"Processing building ID {building_id} with expanded bbox {bbox}")

        # Create the output directory for this class if it doesn't exist
        class_output_dir = os.path.join(output_base_dir, building_class)
        os.makedirs(class_output_dir, exist_ok=True)

        # Define the image file path using the building ID
        image_filename = f"building_{building_id}.tif"
        image_path = os.path.join(class_output_dir, image_filename)

        # Download and resize the image
        original_size, resized_size = download_and_resize_image(
            image_path, bbox, zoom, source, output_size)

        if original_size and resized_size:
            print(f"Image saved for building ID {building_id}")
            # Increment the count for this class
            class_counts[building_class] += 1
        else:
            print(f"Failed to download image for building ID {building_id}")

        # Check if we have reached the maximum images for this class
        if class_counts[building_class] >= max_images_per_class:
            print(f"Reached maximum images for class '{building_class}'")
            continue  # Continue to next building

        # Sleep to respect server rate limits
        time.sleep(1)

        # Optional: Break if all classes have reached the maximum
        if all(count >= max_images_per_class for count in class_counts.values()):
            print("Reached maximum images for all classes")
            break

if __name__ == "__main__":
    main()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Downloaded image 061/154
Downloaded image 062/154
Downloaded image 063/154
Downloaded image 064/154
Downloaded image 065/154
Downloaded image 066/154
Downloaded image 067/154
Downloaded image 068/154
Downloaded image 069/154
Downloaded image 070/154
Downloaded image 071/154
Downloaded image 072/154
Downloaded image 073/154
Downloaded image 074/154
Downloaded image 075/154
Downloaded image 076/154
Downloaded image 077/154
Downloaded image 078/154
Downloaded image 079/154
Downloaded image 080/154
Downloaded image 081/154
Downloaded image 082/154
Downloaded image 083/154
Downloaded image 084/154
Downloaded image 085/154
Downloaded image 086/154
Downloaded image 087/154
Downloaded image 088/154
Downloaded image 089/154
Downloaded image 090/154
Downloaded image 091/154
Downloaded image 092/154
Downloaded image 093/154
Downloaded image 094/154
Downloaded image 095/154
Downloaded image 096/154
Downloaded image 097/154
Downloaded

KeyboardInterrupt: 

In [5]:
import os
import pandas as pd
import time
from math import radians, cos

def main():
    zoom = 22  # Adjust zoom level as needed
    source = "Satellite"

    # Bounding box expansion factor (e.g., 0.1 for 10% expansion)
    bbox_expansion_factor = 0.3  # Adjust this value to control the space around the building

    # Desired image height in pixels (you can adjust this)
    desired_image_height = 500  # pixels

    df = pd.read_csv('/content/drive/MyDrive/Madhu RA Work Folder/newdata/Buildings_USA.csv')

    output_base_dir = '/content/drive/MyDrive/Madhu RA Work Folder/newdata'

    # Dictionary to keep track of counts per class
    class_counts = {}
    max_images_per_class = 2000

    # Loop over each building in the DataFrame
    for index, row in df.iterrows():
        building_id = row['building_id']
        building_class = row['building_class']
        if pd.isnull(building_class) or building_class.lower() != 'industrial':
            continue  # Skip buildings that are not hospitals

        # Initialize count for this class if not already done
        if building_class not in class_counts:
            class_counts[building_class] = 0

        # Check if we have already downloaded the maximum number of images for this class
        if class_counts[building_class] >= max_images_per_class:
            continue  # Skip to next building

        # Get the corner coordinates from the CSV
        corner1_lat = row['corner1_lat']
        corner1_lon = row['corner1_lon']
        corner2_lat = row['corner2_lat']
        corner2_lon = row['corner2_lon']
        corner3_lat = row['corner3_lat']
        corner3_lon = row['corner3_lon']
        corner4_lat = row['corner4_lat']
        corner4_lon = row['corner4_lon']

        # Compute min and max latitudes and longitudes from the corners
        latitudes = [corner1_lat, corner2_lat, corner3_lat, corner4_lat]
        longitudes = [corner1_lon, corner2_lon, corner3_lon, corner4_lon]
        min_lat = min(latitudes)
        max_lat = max(latitudes)
        min_lon = min(longitudes)
        max_lon = max(longitudes)

        # Calculate the width and height of the bounding box in degrees
        width_deg = max_lon - min_lon
        height_deg = max_lat - min_lat

        # Expand the bounding box by the expansion factor
        min_lon_expanded = min_lon - (width_deg * bbox_expansion_factor / 2)
        max_lon_expanded = max_lon + (width_deg * bbox_expansion_factor / 2)
        min_lat_expanded = min_lat - (height_deg * bbox_expansion_factor / 2)
        max_lat_expanded = max_lat + (height_deg * bbox_expansion_factor / 2)

        # Calculate the center latitude for distance calculations
        center_lat = (min_lat_expanded + max_lat_expanded) / 2.0

        # Convert degrees to radians for trigonometric functions
        lat_rad = radians(center_lat)

        # Approximate meters per degree latitude and longitude
        meters_per_deg_lat = 111320  # meters per degree latitude is approximately constant
        meters_per_deg_lon = 111320 * cos(lat_rad)  # varies with latitude

        # Calculate physical width and height in meters
        width_meters = (max_lon_expanded - min_lon_expanded) * meters_per_deg_lon
        height_meters = (max_lat_expanded - min_lat_expanded) * meters_per_deg_lat

        # Calculate the aspect ratio
        aspect_ratio = width_meters / height_meters

        # Calculate the desired image width based on the aspect ratio
        desired_image_width = int(desired_image_height * aspect_ratio)

        # Ensure the width is at least 1 pixel
        desired_image_width = max(desired_image_width, 1)

        # Define the output image size
        output_size = (desired_image_width, desired_image_height)

        # Create expanded bbox
        bbox = [min_lon_expanded, min_lat_expanded, max_lon_expanded, max_lat_expanded]
        print(f"Processing building ID {building_id} with expanded bbox {bbox}")

        # Create the output directory for this class if it doesn't exist
        class_output_dir = os.path.join(output_base_dir, building_class)
        os.makedirs(class_output_dir, exist_ok=True)

        # Define the image file path using the building ID
        image_filename = f"building_{building_id}.tif"
        image_path = os.path.join(class_output_dir, image_filename)

        # Skip if the image already exists
        if os.path.exists(image_path):
            print(f"Image already exists for building ID {building_id}, skipping.")
            continue

        # Download and resize the image
        original_size, resized_size = download_and_resize_image(
            image_path, bbox, zoom, source, output_size)

        if original_size and resized_size:
            print(f"Image saved for building ID {building_id}")
            # Increment the count for this class
            class_counts[building_class] += 1
        else:
            print(f"Failed to download image for building ID {building_id}")

        # Check if we have reached the maximum images for this class
        if class_counts[building_class] >= max_images_per_class:
            print(f"Reached maximum images for class '{building_class}'")
            continue  # Continue to next building

        # Sleep to respect server rate limits
        time.sleep(1)

if __name__ == "__main__":
    main()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Downloaded image 415/700
Downloaded image 416/700
Downloaded image 417/700
Downloaded image 418/700
Downloaded image 419/700
Downloaded image 420/700
Downloaded image 421/700
Downloaded image 422/700
Downloaded image 423/700
Downloaded image 424/700
Downloaded image 425/700
Downloaded image 426/700
Downloaded image 427/700
Downloaded image 428/700
Downloaded image 429/700
Downloaded image 430/700
Downloaded image 431/700
Downloaded image 432/700
Downloaded image 433/700
Downloaded image 434/700
Downloaded image 435/700
Downloaded image 436/700
Downloaded image 437/700
Downloaded image 438/700
Downloaded image 439/700
Downloaded image 440/700
Downloaded image 441/700
Downloaded image 442/700
Downloaded image 443/700
Downloaded image 444/700
Downloaded image 445/700
Downloaded image 446/700
Downloaded image 447/700
Downloaded image 448/700
Downloaded image 449/700
Downloaded image 450/700
Downloaded image 451/700
Downloaded