# Creating a model to fill Null values into a Mountain Dataset

## Import Libraries

In [59]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
import geopy
from sklearn.preprocessing import LabelEncoder

## Fetch data from files

In [34]:
train = pd.read_csv("data/Mountain.csv")
unfilled = pd.read_csv("data/unfilled.csv")

## Information to the Data

In [35]:
train.sample(5)

Unnamed: 0,Name,Height,MountainRange,Country
323,Malika Parbat,5290.0,Himalaya,Pakistan
259,Shayaz,6045.0,Karakoram,Pakistan
1085,Guadalupe Peak,2667.0,Guadalupe Mountains,USA
102,Karjiang,7221.0,Himalaya,China
610,Vladimir Putin Peak,4446.0,Tian Shan,Kyrgyzstan


In [36]:
unfilled.sample(5)

Unnamed: 0,MountainID,Name,Height,FirstAcent,Mountains
1,2,K2,8611,1954-07-31,Karakorum
5,8,Mount Blanc,4810,1786-08-08,
3,5,Nanga Parbat,8125,1956-05-09,Himalaya
7,10,Makalu,8485,2009-02-09,Himalaya
6,9,Cho Oyu,8188,1985-02-12,Himalaya


In [37]:
train.isna().sum()

Name              0
Height            0
MountainRange     0
Country          45
dtype: int64

In [38]:
unfilled.isna().sum()

MountainID    0
Name          0
Height        0
FirstAcent    0
Mountains     3
dtype: int64

## Prepare the Data to model

In [39]:
mask_nan = unfilled['Mountains'].isnull()
predict = unfilled.loc[mask_nan, ['MountainID', 'Height', 'Name']]

In [40]:
predict.head()

Unnamed: 0,MountainID,Height,Name
0,1,2962,Zugspitze
2,4,2522,Hoher Göll
5,8,4810,Mount Blanc


In [41]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="Silly-Mountain-Model")
def get_lat_lon(name, geolocator):
    try:
        location = geolocator.geocode(name)
        if location:
            return pd.Series((location.latitude, location.longitude))
        else:
            return pd.Series((None, None))
    except Exception as e:
        print(f"Error with: '{name}': {e}")
        return pd.Series((None, None))

In [42]:
train[['latitude', 'longitude']] = train['Name'].apply(lambda x: get_lat_lon(x, geolocator))
train = train.drop(['Name', 'Country'], axis=1)
predict[['latitude', 'longitude']] = predict['Name'].apply(lambda x: get_lat_lon(x, geolocator))

Error with: 'Gasherbrum I': HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=Gasherbrum+I&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Error with: 'Laram Q'awa (Charaña)': HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=Laram+Q%27awa+%28Chara%C3%B1a%29&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Error with: 'Dufourspitze (Monte Rosa)': HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=Dufourspitze+%28Monte+Rosa%29&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Error with: 'Ludwigshöhe (Monte Rosa)'

In [45]:
train = train.dropna()

In [52]:
train.head()

Unnamed: 0,Height,MountainRange,latitude,longitude
0,8848.86,Himalaya,27.988061,86.92521
1,8611.0,Karakorum,35.881682,76.513331
2,8586.0,Himalaya,27.703011,88.147477
3,8516.0,Himalaya,27.961986,86.932504
4,8485.0,Himalaya,27.891438,87.08844


In [50]:
predict_val = predict[['Height', 'latitude', 'longitude']]

In [51]:
predict_val.head()

Unnamed: 0,Height,latitude,longitude
0,2962,47.421215,10.986297
2,2522,47.593926,13.067141
5,4810,-26.814444,152.878611


## Modelbuilding

In [64]:
label_encoder = LabelEncoder()
classes = train['MountainRange'].nunique()
train['MountainRange_encoded'] = label_encoder.fit_transform(train['MountainRange'])
y_train_encoded = tf.keras.utils.to_categorical(train['MountainRange_encoded'], num_classes=classes)
x_train = train[['Height', 'latitude', 'longitude']].values
input_layer = tf.keras.Input(shape=(3,), name='input')
dense_layer = layers.Dense(64, activation='relu')(input_layer)
dense_layer_2 = layers.Dense(64, activation='relu')(dense_layer)
output_layer = layers.Dense(classes, activation='softmax', name='output')(dense_layer_2)
model = keras.Model(inputs=input_layer, outputs=output_layer, name='Mountain_model')
model.summary()

In [65]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [68]:
model.fit(x_train , y_train_encoded, epochs=200, verbose=1)

Epoch 1/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3873 - loss: 2.1242
Epoch 2/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3619 - loss: 2.1722
Epoch 3/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3874 - loss: 2.0949
Epoch 4/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3890 - loss: 2.1299
Epoch 5/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3684 - loss: 2.2048
Epoch 6/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3727 - loss: 2.2340
Epoch 7/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4040 - loss: 2.0391
Epoch 8/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4022 - loss: 2.0830
Epoch 9/200
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7fbc3147ad90>