In [88]:
# DATASET DE MINNERALES SUBTERRANEOS

import pandas as pd
import random

min0_samples = 300

# Define the bounding box of the Atacama region
bounding_box = ((-24.5, -70.5), (-22.5, -68.5))

# Define the list of metals
metals_list = ['copper', 'gold']

# Generate a list of 100 random lat long coordinates and associated metals
random_coordinates_metals = []
for _ in range(min0_samples):
    random_latitude = random.uniform(bounding_box[0][0], bounding_box[1][0])
    random_longitude = random.uniform(bounding_box[0][1], bounding_box[1][1])
    random_metal = random.choice(metals_list)
    random_weight = random.uniform(0, 1)
    random_coordinates_metals.append((random_latitude, random_longitude, random_metal, random_weight,0))

# Create a pandas dataframe
metals = pd.DataFrame(random_coordinates_metals, columns=['latitude', 'longitude', 'metal', 'weight','type'])

print(metals)

      latitude  longitude   metal    weight  type
0   -23.948722 -69.453128  copper  0.361609     0
1   -22.666166 -70.430131    gold  0.199405     0
2   -24.089698 -70.390163    gold  0.861836     0
3   -22.573329 -69.976613    gold  0.662121     0
4   -24.099462 -70.302353  copper  0.940996     0
..         ...        ...     ...       ...   ...
295 -23.899482 -69.620494  copper  0.840970     0
296 -24.232804 -69.034758    gold  0.384990     0
297 -22.764716 -69.532345  copper  0.212881     0
298 -23.392783 -69.845880    gold  0.644163     0
299 -23.048454 -69.389548  copper  0.666916     0

[300 rows x 5 columns]


In [89]:
# DATASET DE MINNERALES SUPERFICIALES (SATELITAL)

import pandas as pd
import random

min1_samples = 300

# Bounding box of the Atacama region
bounding_box = ((-24.5, -70.5), (-22.5, -68.5))


# List of minerals
minerals_list = ['kaolinita' , 'alunita', 'calcita' , 'Alunita+kalinita', 'Montmorillonita', 'silicio']


# List of random lat long coordinates and associated minerals and weights
random_coordinates_minerals_weights = []
for _ in range(min1_samples):
    random_latitude = random.uniform(bounding_box[0][0], bounding_box[1][0])
    random_longitude = random.uniform(bounding_box[0][1], bounding_box[1][1])
    random_mineral = random.choice(minerals_list)
    random_weight = random.uniform(0, 1)
    random_coordinates_minerals_weights.append((random_latitude, random_longitude, random_mineral, random_weight,1))


# Create a pandas dataframe
minerals = pd.DataFrame(random_coordinates_minerals_weights, columns=['latitude', 'longitude', 'metal', 'weight','type'])
print(minerals)

      latitude  longitude             metal    weight  type
0   -22.978388 -69.808940  Alunita+kalinita  0.673305     1
1   -22.876880 -68.864308           alunita  0.910468     1
2   -23.910392 -68.834252   Montmorillonita  0.995233     1
3   -22.901197 -69.332043           silicio  0.287753     1
4   -22.613390 -70.036423   Montmorillonita  0.068372     1
..         ...        ...               ...       ...   ...
295 -24.441751 -70.198157           alunita  0.938327     1
296 -23.922402 -69.130687           alunita  0.776521     1
297 -23.140388 -70.158652         kaolinita  0.920852     1
298 -22.577756 -69.485320           alunita  0.492827     1
299 -22.626656 -69.617357         kaolinita  0.509485     1

[300 rows x 5 columns]


In [90]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Merges the dataframes
df = pd.concat([minerals, metals], ignore_index=True)
# Calculate the Euclidean distance between each surface_material and underground_material.
df['distance'] = np.sqrt((df['latitude'] - df['longitude'])**2)
df['is_underground'] = df['type'].apply(lambda x: 1 if x == 0 else 1)


print(df)

      latitude  longitude             metal    weight  type   distance  \
0   -22.978388 -69.808940  Alunita+kalinita  0.673305     1  46.830553   
1   -22.876880 -68.864308           alunita  0.910468     1  45.987428   
2   -23.910392 -68.834252   Montmorillonita  0.995233     1  44.923860   
3   -22.901197 -69.332043           silicio  0.287753     1  46.430847   
4   -22.613390 -70.036423   Montmorillonita  0.068372     1  47.423032   
..         ...        ...               ...       ...   ...        ...   
595 -23.899482 -69.620494            copper  0.840970     0  45.721012   
596 -24.232804 -69.034758              gold  0.384990     0  44.801955   
597 -22.764716 -69.532345            copper  0.212881     0  46.767629   
598 -23.392783 -69.845880              gold  0.644163     0  46.453096   
599 -23.048454 -69.389548            copper  0.666916     0  46.341094   

     is_underground  
0                 1  
1                 1  
2                 1  
3                 1  
4

In [91]:
# CALCULATE DISTANCES
df['distance'] = np.sqrt((df['latitude'] - df['longitude'])**2)
print(df)

      latitude  longitude             metal    weight  type   distance  \
0   -22.978388 -69.808940  Alunita+kalinita  0.673305     1  46.830553   
1   -22.876880 -68.864308           alunita  0.910468     1  45.987428   
2   -23.910392 -68.834252   Montmorillonita  0.995233     1  44.923860   
3   -22.901197 -69.332043           silicio  0.287753     1  46.430847   
4   -22.613390 -70.036423   Montmorillonita  0.068372     1  47.423032   
..         ...        ...               ...       ...   ...        ...   
595 -23.899482 -69.620494            copper  0.840970     0  45.721012   
596 -24.232804 -69.034758              gold  0.384990     0  44.801955   
597 -22.764716 -69.532345            copper  0.212881     0  46.767629   
598 -23.392783 -69.845880              gold  0.644163     0  46.453096   
599 -23.048454 -69.389548            copper  0.666916     0  46.341094   

     is_underground  
0                 1  
1                 1  
2                 1  
3                 1  
4

In [92]:
# CORRELATION MATRIX BETWEEN
correlation_matrix = df.corr()
print(correlation_matrix)


                latitude  longitude    weight      type  distance  \
latitude        1.000000  -0.012923 -0.037764  0.011671  0.717298   
longitude      -0.012923   1.000000  0.025144 -0.012187 -0.705978   
weight         -0.037764   0.025144  1.000000  0.001567 -0.044269   
type            0.011671  -0.012187  0.001567  1.000000  0.016759   
distance        0.717298  -0.705978 -0.044269  0.016759  1.000000   
is_underground       NaN        NaN       NaN       NaN       NaN   

                is_underground  
latitude                   NaN  
longitude                  NaN  
weight                     NaN  
type                       NaN  
distance                   NaN  
is_underground             NaN  


In [93]:
# TRAINING

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

le = LabelEncoder()
features = ['latitude', 'longitude']

X = df[features]

print(X)

y = df['metal']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

      latitude  longitude
0   -22.978388 -69.808940
1   -22.876880 -68.864308
2   -23.910392 -68.834252
3   -22.901197 -69.332043
4   -22.613390 -70.036423
..         ...        ...
595 -23.899482 -69.620494
596 -24.232804 -69.034758
597 -22.764716 -69.532345
598 -23.392783 -69.845880
599 -23.048454 -69.389548

[600 rows x 2 columns]
Accuracy: 0.18333333333333332


In [94]:
# PREDICTION
import warnings
warnings.filterwarnings('ignore')

latitude , longitude = -23.545433, -69.169818
probabilities = clf.predict_proba([[latitude, longitude]])
predicted_metal = clf.predict([[latitude, longitude]])
most_likely_output = np.argmax(probabilities)
most_likely_output_probability = probabilities[0][most_likely_output]
print(f'The metal most likely to be found at {latitude}, {longitude} is {predicted_metal} with a probability of {most_likely_output_probability}')
print(f'The probabilities for the rest of the metals are {probabilities[0]}')


The metal most likely to be found at -23.545433, -69.169818 is ['copper'] with a probability of 0.45
The probabilities for the rest of the metals are [0.01 0.01 0.18 0.18 0.45 0.11 0.06 0.  ]


In [95]:
import numpy as np

# Define the latitude and longitude bounding box of the atacama region in chile
latitude_min, latitude_max = -24.5, -22.5
longitude_min, longitude_max = -70.5, -68.5

# Define the step size for traversing the bounding box
step_size = .1 # 1 km

# Generate the latitude and longitude coordinates for traversing the bounding box
latitudes = np.arange(latitude_min, latitude_max, step_size)
longitudes = np.arange(longitude_min, longitude_max, step_size)

coordinates = []

# Traverse the bounding box
for latitude in latitudes:
    for longitude in longitudes:
        print(f'Latitude: {latitude}, Longitude: {longitude}')
        probabilities = clf.predict_proba([[latitude, longitude]])
        predicted_metal = clf.predict([[latitude, longitude]])
        most_likely_output = np.argmax(probabilities)
        most_likely_output_probability = probabilities[0][most_likely_output]
        vals = (latitude,longitude,probabilities);
        print(vals)
        coordinates.append(vals)

Latitude: -24.5, Longitude: -70.5
(-24.5, -70.5, array([[0.16, 0.08, 0.05, 0.1 , 0.18, 0.06, 0.35, 0.02]]))
Latitude: -24.5, Longitude: -70.4
(-24.5, -70.4, array([[0.23, 0.21, 0.06, 0.11, 0.18, 0.09, 0.09, 0.03]]))
Latitude: -24.5, Longitude: -70.30000000000001
(-24.5, -70.30000000000001, array([[0.04, 0.  , 0.22, 0.15, 0.35, 0.01, 0.02, 0.21]]))
Latitude: -24.5, Longitude: -70.20000000000002
(-24.5, -70.20000000000002, array([[0.04, 0.  , 0.35, 0.18, 0.34, 0.09, 0.  , 0.  ]]))
Latitude: -24.5, Longitude: -70.10000000000002
(-24.5, -70.10000000000002, array([[0.07, 0.01, 0.02, 0.18, 0.69, 0.02, 0.01, 0.  ]]))
Latitude: -24.5, Longitude: -70.00000000000003
(-24.5, -70.00000000000003, array([[0.19, 0.01, 0.  , 0.21, 0.43, 0.14, 0.01, 0.01]]))
Latitude: -24.5, Longitude: -69.90000000000003
(-24.5, -69.90000000000003, array([[0.22, 0.01, 0.  , 0.29, 0.33, 0.14, 0.01, 0.  ]]))
Latitude: -24.5, Longitude: -69.80000000000004
(-24.5, -69.80000000000004, array([[0.19, 0.05, 0.01, 0.33, 0.35, 0

In [96]:
import folium
from folium import plugins
#from folium.plugins import HeatMap, MarkerCluster, GroupedLayerControl
import pandas as pd
from argparse import ArgumentParser
from IPython.display import display

map_lat = metals['latitude'].mean()
map_lon = metals['longitude'].mean()

map = folium.Map(location=[map_lat, map_lon], zoom_start=9, control_scale=True,width=800,height=600)

# MAPA DE CALOR DE LA PROBABILIDAD DE ENCONTRAR METALES (ORO, COBRE)

for index, mineral in enumerate(metals_list):
  g = folium.FeatureGroup(mineral).add_to(map)
  hmap_data = [[row[0], row[1], row[2][0][index]*1000] for row in coordinates]
  hm = plugins.HeatMap(hmap_data,
                            min_opacity=0,
                                    radius=50,
                                    blur=30)
  hm.add_to(g)

for index, location_info in minerals.iterrows():
    g = folium.FeatureGroup(location_info["metal"]).add_to(map)
    folium.Marker(
          [location_info["latitude"], location_info["longitude"]],
          popup=location_info["metal"]).add_to(g)

folium.LayerControl().add_to(map)
display(map)