<a href="https://colab.research.google.com/github/saudaminisahoo/HACKATHON_2025/blob/main/Unsupervised.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import geopandas as gpd
import folium
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
import os
import glob
import geopandas as gpd
import pandas as pd
import folium
import numpy as np
from yellowbrick.cluster import SilhouetteVisualizer
from folium import Popup
from folium.plugins import HeatMap

## Load the Dataset of all shape files and merge into single one

In [None]:
base_dir = #Enter Location of folder conttaining shape ....\multi_layer_geological_map_of_karnataka_and_andhra_pradesh_25k_scale_v1\25K

# Recursively find all .shp files in the base directory
shapefile_paths = glob.glob(os.path.join(base_dir, "**", "*.shp"), recursive=True)
print("Found shapefiles:", shapefile_paths)

# List to hold each GeoDataFrame and a set to hold all attribute field names
geodfs = []
all_columns = set()

# Load each shapefile, reproject to EPSG:4326, and gather attribute fields
for shp_path in shapefile_paths:
    gdf = gpd.read_file(shp_path)
    if gdf.crs != "EPSG:4326":
        gdf = gdf.to_crs(epsg=4326)
    # Update the set of all columns with the columns from this shapefile
    all_columns.update(gdf.columns.tolist())
    geodfs.append(gdf)

# Convert the union of columns into a list (order is arbitrary)
all_columns = list(all_columns)

# Ensure each GeoDataFrame has all columns; if missing, add them with None as value
aligned_geodfs = []
for gdf in geodfs:
    for col in all_columns:
        if col not in gdf.columns:
            gdf[col] = None
    # Reorder the columns (optional) to match the union
    gdf = gdf[all_columns]
    aligned_geodfs.append(gdf)

# Merge all GeoDataFrames into one merged GeoDataFrame
merged_gdf = pd.concat(aligned_geodfs, ignore_index=True)

# Print the list of attribute fields from the merged shapefile
print("Attributes in the merged shapefile:")
print(merged_gdf.columns)


## Extract the Key Attributes with Meaningful data and Plot them

In [None]:
# After analyzing the data the meaningful columns for classification
columns_to_keep = ['geometry','accessory_', 'major_mine' ,'lithologic' ,'texture']

# Filter the dataframe to keep only these columns
filtered_gdf = merged_gdf[columns_to_keep]

# Drop rows with any null values in the specified columns
filtered_gdf = filtered_gdf.dropna()

print("Number of data points in merged_gdf:", len(merged_gdf))
print("Number of data points in filtered_gdf:", len(filtered_gdf))

## Plot the inputs for classification

In [None]:
# Calculate the center of the map based on the geometry centroids
center = [filtered_gdf.geometry.centroid.y.mean(), filtered_gdf.geometry.centroid.x.mean()]

# Create a Folium map using the CartoDB Positron basemap
m = folium.Map(location=center, zoom_start=10, tiles='CartoDB positron')


# Add the shapefile data to the map as a GeoJSON layer
folium.GeoJson(filtered_gdf).add_to(m)

# Specify the fields you want to display in the tooltip
fields = ['accessory_', 'major_mine' ,'lithologic' ,'texture']

# Create a tooltip that displays all attributes when hovering
tooltip = folium.GeoJsonTooltip(
    fields=fields,
    aliases=fields,  # You can change these to more user-friendly names if needed
    localize=True,
    sticky=False,
    labels=True,
    style="""
        background-color: #F0EFEF;
        border: 2px solid black;
        border-radius: 3px;
        box-shadow: 3px;
    """,
    max_width=800,
)

## Define the Codes for Each Cluster in the Attribute

In [None]:
# Mapping dictionary for the 'supergroup' attribute (case-insensitive)
accessory_mapping = {
 'CHALCOPYRITE':1,
 'IRON OXIDE': 1,
 'iron oxide': 1,
 'iron oxides, epidote, apatite': 1,
 'magnetite': 1,
 'Magnetite': 1,
 'Magnetite, apatite, hornblende and biotite': 1,
 'magnetite, biotite and zircon': 1,
 'opaque iron ores': 1,
 'opaque minerals in which magnetite are the major one': 1,
 'Opaque oxides (magnetite) and sphene': 1,
 'Opaque, Pyrite': 1,
 'Opaque,Sphene,Zircon,Apatite': 1,
 'OPAQUE,SPHENE,ZIRCON,APATITE': 6,
 'opaques': 1,
 'opaques and minor K-feldspar, quartz and carbonates': 1,
 'Opaques and sphenes': 1,
 'opaques in a chlorite or siliceous matrix': 1,
 'opaques with minor amphiboles': 1,
 'opaques, amphiboles': 1,
 'opaques, epidote, sphene': 1,
 'opaques, pyrite': 1,
 'PYRITE': 1,
 'pyrite': 1,
 'PYRITE,CALACITE': 1,
 'biotite':2,
 'biotite, epidote and opaques':2,
 'biotite, hornblende and opaque':2,
 'Biotite, Opaque':2,
 'CHERT':3,
 'Chert and ferruginous zones':3,
 'Chert, siliceous and micaceous impurities':3,
 'chlorite':2,
 'chlorite, biotite and magnetite':2,
 'Chlorite, sericite, epidotes':2,
 'Chlorite,Sericite,Epidote':2,
 'Epidote, sphene and minor opaque':2,
 'epidote, sphene and minor opaque':2,
 'Epidote, sphene, apatite, zircon, magnetite, ilmenite, allanite':2,
 'Hornblende, apatite, zircon, magnetite':2,
 'hornblende, biotite, sphene and opaque':2,
 'MICA':2,
 'Mica, Amphibole, Epidote, Magnetite, Sphene':2,
 'Mica, Chlorite, Lithic Fragment, Zircon':2,
 'minor K-feldspar and opaques':2,
 'muscovite and fuchsite':2,
 'Muscovite and hornblende':2,
 'QUARTZ IN FRINGES':3,
 'quartz, opaques, carbonates':3,
 'sphene and apatite':6,
 'sphene and opaques':6,
 'Sphene, apatite, magnetite':6,
 'Sphene, apatite, zircon and magnetite':6,
 'sphene, epidote and zircon':6,
 'Cacite':4,
 'CALCITE,QUARTZ':4,
 'CARBONATE':4,
 'CLINOZOISITE,ANKERITE':2,
 'ZOISITE,AUGITE,EPIDOTE':2,
 'ZOISITE,CALCITE,PYRITE':2,
 'ZOISITE,PYRITE':2,
 'Apatite, Sphene and magnetite':5,
 'Apatite,Zircon,Rutile':5,
 'Sphene, zircon, allanite, epidote, opaque':6,
 'Sphene, zircon, monazite, apatite, opaques':6,
 'Titanite, Garnet, Magnetite':6,
 'Zircon rutile and apatite':6,
 'dunite-peridotite-pyroxenite':7,
 'epidote and magnetite':2,
 'epidote and opaques':2,
 'Epidote, opaques, pyrite':2,
 'fissile chlorite schist':2,
 'fluorite and sphene':7,
 'Goethite, limonite and chert':1,
 'hematite':1,
 'hematite/magnetite alternating with quartz':1,
 'minor opaques, quartz, K-feldspar':1,
 'minor opaques, quartz, K-feldspar with secondary chlorite and clay minerals':1,
 'myrmakite epidote, sphene, zircon, apatite, opaques':2,
 'occasionally hematite':1,
 'quartz chlorite epidote, garnet and magnetite':3,
 'Rutile and apatite':6,
 'siliceous and sericite rich matrix':3,
 'Siliceous and sericite rich matrix':3,
 'Sphene, magnetite, apatite, zircon':6,
 'Tremolite,Asbestos,Magnetite,Calcite':2,
}

# Apply the mapping to the 'supergroup' column with case insensitivity
filtered_gdf['accessory_labels'] = filtered_gdf['accessory_'].map(accessory_mapping).fillna(0).astype(int)

In [None]:
# Mapping dictionary for the 'supergroup' attribute (case-insensitive)
major_mine_mapping = {
 'K- feldspar and hornblende':1,
 'K-feldspar, Quartz, Plagioclase':1,
 'K-feldspar, Quartz, Plagioclase \r\n':1,
 'K-feldspar, quartz, plagioclase, biotite, hornblende':1,
 'K-feldspar, Quartz, Plagioclase, Hornblende, Biotite':1,
 'K-feldspar,Quartz,Biotite,Plagioclase,Hornblende':1,
 'Plagiclase feldspar, K-feldspar,quartz, hornblende, biotite':1,
 'Plagiclase, quartz, K-Feldspar,hornblende, chlorite, biotite':1,
 'Plagioclase feldspar, alkali feldspar, quartz, hornblende':1,
 'plagioclase feldspar, potash feldspar, biotite and quartz':1,
 'plagioclase feldspar, quartz, microcline, hornblende':1,
 'Plagioclase, actinolite and hornblende':4,
 'plagioclase, actinolite, hornblende':4,
 'plagioclase, fibrous chlorite':1,
 'plagioclase, hornblende and actinolite':4,
 'Plagioclase, orthoclase and microcline, biotite, hornblende, epidote, apatite, sphene, zircon, opaques':1,
 'Plagioclase, orthoclase, microcline, orthoclase microcline perthite, quartz, hornblende, biotite':1,
 'plagioclase, quartz':1,
 'Plagioclase, quartz, hornblende, K-feldspar, biotite':1,
 'Plagioclase, Quartz, K-feldspar, Hornblende, Biotite':1,
 'Plagioclase, quartz, microclene, orthoclase':1,
 'Plagioclase, quartz, microcline':1,
 'Plagioclase, quartz, orthoclase, microclene':1,
 'PLAGIOCLASE,CHLORITE,PYROXENE':1,
 'Plagioclase,potash feldspar,Biotite,Hornblende':1,
 'PLAGIOCLASE,PYROXENE':1,
 'plagioclase,pyroxene':1,
 'PLAGIOCLASE,PYROXENE,AMPHIBOLE,ZOISITE':1,
 'banded ferruginous chert/banded ferruginous quartzite':8,
 'CHERT,QUARTZ':2,
 'CHERT,QUARTZ,ARGILLITE':2,
 'quartz':2,
 'Quartz':2,
 'QUARTZ':2,
 'quartz and feldspar':2,
 'Quartz and plagioclase':2,
 'quartz, biotite, plagioclase, potash feldspar, hornblende':1,
 'quartz, chlorite, actinolite, sericite, biotite, plagioclase, opaques':3,
 'Quartz, chlorite, sericite':2,
 'quartz, feldspar, biotite and hornblend':2,
 'quartz, feldspar, biotite and little hornblende':2,
 'quartz, feldspar, chlorite, biotite, sericite':3,
 'quartz, feldspar, microcline, orthoclase, plagioclase':1,
 'quartz, feldspar, sericite':2,
 'quartz, K- felspar and plagioclase':1,
 'Quartz, K-feldspar':2,
 'Quartz, K-Feldspar, plagiclase, mafics, opaque':1,
 'Quartz, K-Feldspar, plagioclase, hornblende, chlorite, biotite':1,
 'quartz, K-feldspar,plagioclase feldspar':1,
 'quartz, mica and plagioclase':2,
 'quartz, microcline, microcline perthite, orthoclase, plagioclase, biotite, hornblende':1,
 'quartz, Mn-amphibole, garnet and Fe- oxides':2,
 'Quartz, orthoclase, microclene, plagioclase':1,
 'quartz, plagioclase and biotite':2,
 'quartz, plagioclase,  orthoclase  microcline with hornblende and biotite':1,
 'quartz, plagioclase, K- feldspar, sericite, biotite, hornblende,':2,
 'quartz, plagioclase, K-feldspar, sericite':1,
 'quartz, rock fragments':2,
 'quartz, sericite':2,
 'QUARTZ,CHLORITE,CHERT':2,
 'Quartz,Chlorite,Sericite,Opaque and pyrite':2,
 'Quartz,Hematite,Magnetite':8,
 'QUARTZ,K-FELDSPAR,PLAGIOCLASE,BIOTITE,HORNBLENDE':1,
 'Quartz,Plagioclase,K-feldspar,Biotite':1,
 'QUARTZ,SERICITE':2,
 'QUARTZ,SERICITE,MICA':3,
 'QUARTZ,SHALE':2,
 'Quartz,Tourmaline':2,
 'quartz-chlorite-sericite':2,
 'quartzite':2,
 'quartzite and magnetite':2,
 'QUARTZITE,IRON':2,
 'silt':2,
 'subhedral grains of quartz with flakes of fuchsitic mica':2,
 'biotite, hornblende with garnet':3,
 'Biotite, plagioclase, quartz, microcline, orthoclase  and hornblende':1,
 'Biotite,Hornblende,Plahioclase,Quartz':3,
 'mica, chlorite and actinolite':3,
 'minor quartz, biotite':2,
 'actinolite, chlorite, epidote, quartz and opaques':4,
 'ACTINOLITE,CHLORITE,PYROXENE,PLAGIOCLASE':4,
 'actinolite,Chlorite,Pyroxene,Plagioclase and quartz':4,
 'Actinolite,Chlorite,Quartz,Opaque, Kaolinised-Plagioclase':4,
 'amphibole, chlorite, epidote':4,
 'Amphibolite, hornblende, acinolite, plagioclase':4,
 'augite and plagioclase':5,
 'Augite, plagioclase, iron oxide':5,
 'hornblende':4,
 'hornblende and plagioclase':4,
 'hornblende, actinolite and plagioclase':4,
 'hornblende, actinolite, plagioclase, quartz':4,
 'hornblende, actinolite, quartz and plagioclase':4,
 'hornblende, actinolite, tremolite, minor amounts of diopside, augite, epidote, mica, calcite, plagioclase and quartz.':4,
 'hornblende, feldspar and pyroxene':4,
 'Hornblende, plagiclase feldspar, quartz':4,
 'Hornblende, Plagioclase':4,
 'hornblende, plagioclase and quartz':4,
 'hornblende, plagioclase, quartz, epidote, chlorite':4,
 'hornblende, pyroxene, plagioclase, zoisite':4,
 'hornblende, relict pyroxene, plagioclase, quartz':4,
 'HORNBLENDE,PLAGIOCLASE,QUARTZ,MICROCLINE,BIOTITE':4,
 'Serpentine and Talc':6,
 'Talc, tremolite, chlorite, serpentine and carbonate':4,
 'talc, tremolite, serpentine, chlorite, carbonates (calcite), opaques':4,
 'talc-tremolite-chlorite schist with steatite-talc bands, talc- tremolite-chlorite schists and bands of hard serpentinised dunite':4,
 'PYROXENE, PLAGIOCLASE FELDSPAR':5,
 'PYROXENE,CHLORITE,ACTINOLITE':5,
 'chlorite, plagioclase, carbonates':6,
 'chlorite, plagioclase, quartz, actinolite':6,
 'Chlorite, quartz, calcite':6,
 'Chlorite, quartz, plagioclase, actinolite':6,
 'chlorite, quartz,plagioclase felspars':6,
 'Chlorite, sericite, biotite, quartz and plagioclase':3,
 'Chlorite,Actinolite,Epidote,Quartz,Calcite,Kaolinite, Plagioclase':6,
 'CHLORITE,QUARTZ,PLAGIOCLASE':6,
 'CHLORITE,QUARTZ,SERICITE':6,
 'chloritic matrix':6,
 'CALCITE':7,
 'CALCITE,ACTINOLITE,CHLORITE,PALGIOCLASE':7,
 'CALCITE,CHLORITE,FELDSPAR':7,
 'CALCITE,QUARTZ':7,
 'CARBONATE,QUARTZ':7,
 'greywacke, argillite':2,
 'Limestone':7,
 'FERRUGINOUS PHYLLITE, CHERT':8,
 'Haematite, quartzite':8,
 'HEMATITE':8,
 'Hematite and quartzite':8,
 'Iron and Manganese':8,
 'magnetite alternating with white/grayish white layers of quartz':8,
 'Magnetite, Hematite, Limonite, quartz, chert':8,
}

# Apply the mapping to the 'supergroup' column with case insensitivity
filtered_gdf['major_mine_labels'] = filtered_gdf['major_mine'].map(major_mine_mapping).fillna(0).astype(int)

In [None]:
# Mapping dictionary for the 'supergroup' attribute (case-insensitive)
lithologic_mapping = {
 'Dolerite':1,
 'Dolerite/Gabbro':1,
 'Gabbro':1,
 'Massive metabasalt/Meta gabbro':1,
 'Meta gabbro':1,
 'Meta ultramafite':7,
 'Metavolcanics and subvolcanics of Gadwal-Raichur Greenstone Belt':8,
 'Pillowed metabasalt':8,
 'Adamellite-granite Suite':2,
 'Alkali feldspar granite':2,
 'Aplite':2,
 'Banded gneiss':2,
 'Banded gneiss and migmatite (M1)':2,
 'Biotite gneiss':2,
 'Diorite':10,
 'Granite':2,
 'Granite gneiss':2,
 'Granite-Biotite gneiss':2,
 'Granitoid':2,
 'Granodiorite':2,
 'Granodiorite adamellite Suite':2,
 'Grey granite':2,
 'Grey granite and migmatite (M2)':2,
 'Grey hornblende biotite gneiss':2,
 'Grey hornblende biotite granite':2,
 'Hornblende biotite gneiss':2,
 'Hornblendite':10,
 'Migmatite gneiss':2,
 'MME rich Granodiorite':2,
 'Monzogranite':2,
 'Monzogranite Syenogranite':2,
 'Mylonitised granite':2,
 'Mylonitised granite gneiss':2,
 'Pink granite':2,
 'Pink granite and migmatite (M2)':2,
 'Porphyritic monzogranite':2,
 'RAMAGIRI GRANITE':2,
 'TGA Suite_adamelite dominant':2,
 'tonalite-Diorite gneiss':2,
 'Banded ferruginous quartzite':3,
 'Banded Magnetite Quartzite':3,
 'Fuchsite quartzite':3,
 'Quartz':3,
 'Quartz chlorite actinolite schist':4,
 'Quartz Chlorite Sericite Schist':4,
 'Quartz reef':3,
 'Quartz vein':3,
 'Quartz-chlorite schist':4,
 'Quartzite':3,
 'Quartz-mica schist':4,
 'Argillite':4,
 'Chlorite schist':4,
 'Chlorite-Actinolite schist':4,
 'Hornblende actinolite schist':4,
 'INTER BEDDED QUARTZITE & QUARTZ CHLORITE SCHIST':4,
 'Manganiferous phyllite':4,
 'Meta-tuff':8,
 'Quartz sericite schist':4,
 'Quartz-kyanite schist':4,
 'Pegmatite':5,
 'Agglomerate':9,
 'Banded Ferruginous Chert':3,
 'Banded Iron Formation':9,
 'Conglomerate':6,
 'Manganiferous chert':3,
 'Polymictic conglomerate':6,
 'Crystalline limestone':11,
 'Dolomite':11,
 'Dolomitic limestone':11,
 'Graywacke':3,
 'Limestone':11,
 'Mafic dyke':1,
 'Meta basalt':8,
 'Meta rhyolite':8,
 'Migmatite':2,
 'MS Suite':7,
 'Phyllite with bands of dolomite':4,
 'Syenite':2,
 'Tonalite Granodiorite Monzogranite':2,
 'Ultramafite':10,
 'Amphibolite':4
}

# Apply the mapping to the 'supergroup' column with case insensitivity
filtered_gdf['lithologic_labels'] = filtered_gdf['lithologic'].map(lithologic_mapping).fillna(0).astype(int)

In [None]:
# Mapping dictionary for the 'supergroup' attribute (case-insensitive)
texture_mapping = {
 'Banded':1,
 'Banded  texture':1,
 'Banded texture':1,
 'Bandind texture':1,
 'Banding':1,
 'banding':1,
 'carbonated, both schistose, massive':1,
 'fine grained, strongly foliated and carbonated':1,
 'gneissic':1,
 'Gneissic texture':1,
 'Schisose texture':1,
 'schistose':1,
 'Schistose':1,
 'schistose fabric':1,
 'SCHISTOSITY':1,
 'SCHISTOSE':1,
 'FINE':2,
 'FINE GRAINED':2,
 'FINE GRANIED WITH CIRCULAR RING':2,
 'grainy or sandpaper':2,
 'Granular':2,
 'Granular texture':2,
 'granular texture':2,
 'Medium to coarse grain':2,
 'Medium to fine grained':2,
 'holocrystalline porphyritic':3,
 'holocrystalline, equigranular sub-hedral texture':8,
 'porphyritic':3,
 'Porphyritic':3,
 'porphyritic texture':3,
 'Porphyroblasitc':3,
 'xenoblastic':3,
 'HONEY COMB':4,
 'PILLOW':4,
 'Pitted texture':4,
 'VESICULAR':4,
 'ophitic':5,
 'ophitic texture':5,
 'OPHITIC TEXTURE':5,
 'spinifex texture':5,
 'sub-ophitic to ophitic':5,
 'Sub-ophitic to ophitic':5,
 'clastic':6,
 'detrital':6,
 'BOX WORK':7,
 'mosaic':7,
 'MOSAIC':7,
 'PETROMICT':7,
 'RIBBON':7,
 'RIBBON STRETCHED':7,
 'STRECHED RIBBON':7,
 'Equigranula':8,
 'equigranular':8,
 'Equigranular hypidiomrophic':8,
 'Granophyric, myrmekitic, perthitic, granular':3,
 'Hypidiomorphic':8,
 'hypidiomorphic granular texture':8,
 'Hypidiomorphic, granular and inequigranular':8,
 'Inequigranular, Hypidiomorphic':8,
 'Inequigranular, Hypidiomorphic, Perthite':8,
 'Brittle':9,
 'Elephant skin weathering':9,
 'fine grained':2,
 'fine grained, shreds of feldspar and quartz':2,
 'graded':6,
 'Granodioritic texture':2,
 'holocrystalline  and hypidiomorphic':8,
 'IInequigranular, Hypidiomorphic, Perthite':8,
 'ILLSORTED':6,
 'ine grained, strongly foliated and carbonated':1,
 'LAMELLAR':1,
 'MEDIUM':2,
 'Medium to fine granular texture':2,
 'mesocratic, fine-medium grained, feebly foliated':2,
 'Myrmekitic and Graphic':3,
 'Perthitc and Myrmekitic':3,
 'perthite, myrmakite':3,
 'perthitic texture':3,
 'sub-hypidiomorphic and holo-crystalline':8,
 'sugary textures':8,
 'sutured':8,
}

# Apply the mapping to the 'supergroup' column with case insensitivity
filtered_gdf['texture_labels'] = filtered_gdf['texture'].map(texture_mapping).fillna(0).astype(int)

## Perform Clustering

In [None]:
# Define the fields for clustering
n_fields = ['accessory_labels', 'major_mine_labels' ,'lithologic_labels' ,'texture_labels']

# Extract the relevant columns for clustering
X = filtered_gdf_cleaned [n_fields].copy()

# Standardize the features to ensure all features have the same scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Elbow Method - Find the optimal number of clusters
wcss = []  # Within-cluster sum of squares
for i in range(1, 50):  # Test from 1 to 50 clusters
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(X_scaled)
    wcss.append(kmeans.inertia_)

# Plot the Elbow Method
plt.figure(figsize=(8, 6))
plt.plot(range(1, 50), wcss, marker='o')
plt.title('Elbow Method For Optimal K')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()

In [None]:
# Silhouette Score - Find the optimal number of clusters
silhouette_scores = []  # Silhouette score
for i in range(2, 50):  # Silhouette score requires at least 2 clusters
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(X_scaled)
    score = silhouette_score(X_scaled, kmeans.labels_)
    silhouette_scores.append(score)

# Plot the Silhouette Score
plt.figure(figsize=(8, 6))
plt.plot(range(2, 50), silhouette_scores, marker='o')
plt.title('Silhouette Scores For Optimal K')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.grid(True)
plt.show()

In [None]:
silhouette_scores = []  # List to store silhouette scores

# Loop to find silhouette scores for a range of cluster numbers
for i in range(2, 50):  # Silhouette score requires at least 2 clusters
    kmeans = KMeans(n_clusters=i, random_state=42)
    kmeans.fit(X_scaled)  # Fit the model on scaled data

    # Calculate the silhouette score and append it to the list
    score = silhouette_score(X_scaled, kmeans.labels_)
    silhouette_scores.append(score)

    # Visualize the silhouette for the current number of clusters
    visualizer = SilhouetteVisualizer(kmeans, colors='yellowbrick')
    visualizer.fit(X_scaled)  # Fit the visualizer
    visualizer.show()  # Display the silhouette plot for this iteration

# Plot the silhouette scores for different values of K
plt.figure(figsize=(8, 6))
plt.plot(range(2, 50), silhouette_scores, marker='o')
plt.title('Silhouette Scores For Optimal K')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.grid(True)
plt.show()

In [None]:
# Define the fields for clustering
n_fields = ['accessory_labels', 'major_mine_labels', 'lithologic_labels', 'texture_labels']

tooltip_fields = ['accessory_', 'major_mine', 'lithologic', 'texture']

# Extract the relevant columns for clustering
X = filtered_gdf_cleaned[n_fields].copy()

# Standardize the features to ensure all features have the same scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Perform K-Means clustering (choosing clusters)
kmeans = KMeans(n_clusters=13, random_state=42)
filtered_gdf_cleaned['cluster'] = kmeans.fit_predict(X_scaled)

# Save the clustered GeoDataFrame as a shapefile
filtered_gdf_cleaned.to_file("clustered_data.shp")

# Create a Folium map centered around the data's centroid
map_center = [filtered_gdf_cleaned.geometry.centroid.y.mean(), filtered_gdf_cleaned.geometry.centroid.x.mean()]
m = folium.Map(location=map_center, zoom_start=10, tiles='CartoDB positron')

# Add the clusters to the map with tooltips showing the fields
cluster_colors = [f"#{(i * 1234567) % 16777215:06x}" for i in range(13)]  # Generate unique color codes for each cluster

for idx, row in filtered_gdf_cleaned.iterrows():
    cluster_color = cluster_colors[row['cluster']]  # Use color from the list based on the cluster index

    tooltip_text = f"Cluster: {row['cluster']}"  # Default cluster tooltip text
    # Add values from n_fields to the tooltip
    for field in n_fields:
        tooltip_text += f"<br>{field}: {row[field]}"  # Append each field's value to the tooltip

    folium.GeoJson(
        row['geometry'],
        style_function=lambda x, color=cluster_color: {
            'fillColor': color,
            'color': color,
            'weight': 1,
            'fillOpacity': 0.6
        },
        tooltip=tooltip_text  # Set the tooltip with the additional field values
    ).add_to(m)

# Add a custom legend
legend_html = """
    <div style="position: fixed;
                bottom: 50px; left: 50px; width: 150px; height: 270px;
                background-color: white; z-index:9999; border:2px solid grey;
                padding: 10px; font-size: 12px;">
        <strong>Cluster Legend</strong><br>
"""
for i, color in enumerate(cluster_colors):
    legend_html += f'<i style="background: {color}; width: 15px; height: 15px; float: left; margin-right: 5px;"></i> Cluster {i}<br>'
legend_html += "</div>"

# Add legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m.save("kmeans_clusters_map_with_legend.html")
m