# Joining Labeled Data

In [4]:
# Load Libraries 
import geopandas as gpd
import pandas as pd


In [6]:
# Load shapefiles
formal_gdf = gpd.read_file("landuse_class/formal.shp")
informal_gdf = gpd.read_file("landuse_class/informal.shp")
vegetation_gdf = gpd.read_file("landuse_class/vegetation.shp")

# Drop 'Id' columns in formal and vegetation
formal_gdf = formal_gdf.drop(columns=["Id"], errors='ignore')
vegetation_gdf = vegetation_gdf.drop(columns=["Id"], errors='ignore')

# Combine all into a single GeoDataFrame
combined_gdf = pd.concat([formal_gdf, informal_gdf, vegetation_gdf], ignore_index=True)

# Check the result (total of 900 points per lu class)
combined_gdf.head()
print(combined_gdf["landuse"].value_counts())

  landuse  database                   geometry
0  formal      1984  POINT (36.81875 -1.28296)
1  formal      1984  POINT (36.82147 -1.28555)
2  formal      1984  POINT (36.82169 -1.28429)
3  formal      1984  POINT (36.82322 -1.28458)
4  formal      1984  POINT (36.81933 -1.28491)
landuse
formal        900
informal      900
vegetation    900
Name: count, dtype: int64


In [8]:
# Check Projection
print(combined_gdf.crs)

EPSG:4326


# Preprocessing
## Load Landsat TIF files
Load the images through public API from Dropbox

In [11]:
import requests
import os 

In [14]:
# Create new output folder 
output_dir = "landsat_images"
os.makedirs(output_dir, exist_ok=True)

# Download multiple files
landsat_files = {
    "1984": "https://www.dropbox.com/scl/fi/nk6xavounp62ua13knj8z/nairobi_landsat_1984_allbands.tif?rlkey=1hy39oiei4fwcsahydg007n6s&st=smighyej&dl=0",
    "2009": "https://www.dropbox.com/scl/fi/flagfp77yvcyxt0b7plyj/nairobi_landsat_2009_allbands.tif?rlkey=etoqn6whwlkkz08cis68oyp6q&st=gpi4nlk5&dl=0",
    "2019": "https://www.dropbox.com/scl/fi/l09necnzqgyigbd13iddp/nairobi_landsat_2019_allbands.tif?rlkey=0gec6urstoqomk6141f2ag5re&st=4h1715d2&dl=0"
}

for year, url in landsat_files.items():
    output_path = os.path.join(output_dir, f"landsat_{year}.tif")
    r = requests.get(url)
    with open(output_path, 'wb') as f:
        f.write(r.content)
    print(f"Downloaded landsat_{year}.tif to {output_path}")


Downloaded landsat_1984.tif to landsat_images/landsat_1984.tif
Downloaded landsat_2009.tif to landsat_images/landsat_2009.tif
Downloaded landsat_2019.tif to landsat_images/landsat_2019.tif
