# AI-Based Mineral Targeting Pipeline
_Aganitha Space Technologies_

## Step 1: Load and Clean NGCM Dataset

In [None]:

import pandas as pd

df = pd.read_excel('/mnt/data/NGCM-Stream-Sediment-Analysis-Updated.xlsx')
df_clean = df.dropna(subset=['Latitude', 'Longitude', 'U_ppm', 'Ta_ppm', 'Pt_ppb'])
df_clean.to_csv('/mnt/data/Cleaned_NGCM.csv', index=False)
df_clean.head()


## Step 2: KMeans Clustering on Elemental Features

In [None]:

from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=3, random_state=42)
df_clean['Cluster'] = kmeans.fit_predict(df_clean[['U_ppm', 'Ta_ppm']])
df_clean.to_csv('/mnt/data/Clustered_NGCM.csv', index=False)
df_clean[['U_ppm', 'Ta_ppm', 'Cluster']].head()


## Step 3: Visualize Spatial Distribution of Uranium

In [None]:

import geopandas as gpd
import matplotlib.pyplot as plt

gdf = gpd.GeoDataFrame(df_clean, geometry=gpd.points_from_xy(df_clean.Longitude, df_clean.Latitude), crs="EPSG:4326")

fig, ax = plt.subplots(figsize=(8, 6))
gdf.plot(column='U_ppm', cmap='viridis', legend=True, ax=ax)
plt.title("Uranium (U_ppm) Spatial Distribution")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()


## Step 4: Histograms of Key Element Distributions

In [None]:

import seaborn as sns

fig, axes = plt.subplots(2, 3, figsize=(14, 8))
cols = ['Ni_ppm', 'Co_ppm', 'Cr_ppm', 'U_ppm', 'Au_ppb']

for i, col in enumerate(cols):
    ax = axes[i//3, i%3]
    sns.histplot(df_clean[col], kde=False, ax=ax, color='orange', edgecolor='black')
    ax.set_title(f"{col} Concentration Distribution")

plt.tight_layout()
plt.show()


## Step 5: Elemental Correlation Heatmap

In [None]:

import seaborn as sns
import matplotlib.pyplot as plt

correlation_matrix = df_clean[['U_ppm', 'Ta_ppm', 'Pt_ppb', 'Pd_ppb', 'Co_ppm', 'Ni_ppm', 'Cr_ppm', 'Au_ppb']].corr()
plt.figure(figsize=(10, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title("Elemental Correlation Matrix")
plt.show()


## Step 6: Principal Component Analysis (PCA)

In [None]:

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import numpy as np

features = ['U_ppm', 'Ta_ppm', 'Pt_ppb', 'Pd_ppb', 'Co_ppm', 'Ni_ppm', 'Cr_ppm', 'Au_ppb']
X = df_clean[features].dropna()
X_scaled = StandardScaler().fit_transform(X)

pca = PCA(n_components=2)
principal_components = pca.fit_transform(X_scaled)
df_pca = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])

plt.figure(figsize=(8, 6))
sns.scatterplot(x='PC1', y='PC2', data=df_pca)
plt.title("PCA of Elemental Features")
plt.xlabel(f"PC1 ({pca.explained_variance_ratio_[0]*100:.1f}% variance)")
plt.ylabel(f"PC2 ({pca.explained_variance_ratio_[1]*100:.1f}% variance)")
plt.grid(True)
plt.show()
