<a href="https://colab.research.google.com/github/shashaaankk/GradientAscent/blob/main/GradientAscent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# imports
import pandas as pd
import numpy as np
import gpxpy
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [17]:
LOCAL = True

In [18]:
if not LOCAL:
    !pip install --quiet kaggle kagglehub[pandas-datasets]
    from google.colab import files
    uploaded = files.upload()   # click to select your kaggle.json
    if 'kaggle.json' not in uploaded:
        raise FileNotFoundError("You must upload the kaggle.json you downloaded from Kaggle.")


In [19]:
if not LOCAL:
    import os, shutil
    # make sure ~/.kaggle exists
    kaggle_dir = os.path.expanduser("~/.kaggle")
    os.makedirs(kaggle_dir, exist_ok=True)

    # move and secure
    shutil.move("kaggle.json", os.path.join(kaggle_dir, "kaggle.json"))
    os.chmod(os.path.join(kaggle_dir, "kaggle.json"), 0o600)

    # sometimes needed:
    os.environ['KAGGLE_CONFIG_DIR'] = kaggle_dir


In [26]:
if not LOCAL:
    import kagglehub
    path = kagglehub.dataset_download("roccoli/gpx-hike-tracks")
else:
    !mkdir data
    !curl -L -o ./data/gpx-hike-tracks.zip https://www.kaggle.com/api/v1/datasets/download/roccoli/gpx-hike-tracks
    !unzip -o data/gpx-hike-tracks.zip -d data/

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 57.4M  100 57.4M    0     0  4410k      0  0:00:13  0:00:13 --:--:-- 4617k
Archive:  data/gpx-hike-tracks.zip
  inflating: data/gpx-tracks-from-hikr.org.csv  


In [27]:
# 1. Install gpxpy (to parse .gpx files)
import gpxpy
import pandas as pd
if not LOCAL:
    !pip install --quiet gpxpy

    # 2. Import libs
    import os, glob


    csv_files = glob.glob(os.path.join(path, "**", "*.csv"), recursive=True)
    csv_path = csv_files[0]
    print("Loading:", csv_path)

else:
    csv_path = "data/gpx-tracks-from-hikr.org.csv"

# Read and inspect
df = pd.read_csv(csv_path)
# print("Shape:", df.shape)
# print("Columns:", df.columns.tolist())
# print(df.head())



In [None]:
#Pre-processing 1

df = df.dropna()
# Convert time columns to datetime
df["start_time"] = pd.to_datetime(df["start_time"], format="%Y-%m-%d %H:%M:%S" , errors='coerce')
df["end_time"] = pd.to_datetime(df["end_time"], format="%Y-%m-%d %H:%M:%S" ,errors='coerce')

# Compute total duration in seconds
df["duration"] =  df["moving_time"]

# Compute break time: duration - moving_time
df["break_time"] = (df["end_time"] - df["start_time"]).dt.total_seconds() - df["moving_time"]

# Select relevant features
selected = df[["duration","length_3d", "min_elevation", "max_elevation", "break_time", "uphill", "downhill"]]

X = selected
y = df['difficulty'].str[1].astype(int)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train logistic regression
mask = X.notnull().all(axis=1) & y.notnull()
X_clean = X_scaled[mask]
y_clean = y[mask]


In [None]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(
        X_clean, y_clean, test_size=0.2, random_state=42, stratify=y_clean
    )

Unnamed: 0,_id,length_3d,user,start_time,max_elevation,bounds,uphill,moving_time,end_time,max_speed,gpx,difficulty,min_elevation,url,downhill,name,length_2d
0,5afb229e8f80884aaad9c6ea,10832.953016,Bergfritz,2018-05-11 07:37:40,1934.47,"{'min': {'type': 'Point', 'coordinates': [13.2...",612.88,12155.0,2018-05-11 11:38:23,1.595493,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<gpx x...",T2 - Mountain hike,1322.96,http://www.hikr.org/tour/post131855.html,609.67,"Remsteinkopf, 1945 m",10832.953016
1,5afb229e8f80884aaad9c6eb,12259.376315,Bergfritz,2018-05-12 07:25:08,2186.21,"{'min': {'type': 'Point', 'coordinates': [13.1...",614.753,13876.0,2018-05-12 12:08:28,1.39432,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<gpx x...",T3 - Difficult Mountain hike,1266.4,http://www.hikr.org/tour/post131856.html,1193.733,"Schuhflicker, 2214 m",12259.376315
2,5afb229e8f80884aaad9c6ec,22980.168081,igor,2018-05-11 06:29:38,2265.0,"{'min': {'type': 'Point', 'coordinates': [8.99...",2255.976,28971.0,2018-05-11 15:32:43,1.503002,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<gpx x...",T3 - Difficult Mountain hike,176.54,http://www.hikr.org/tour/post131839.html,2177.626,Cima d'erbea Est quota 2164m e Gaggio 2267m,22980.168081
3,5afb229e8f80884aaad9c6ed,24903.50347,rkroebl,2018-05-10 07:06:22,962.42,"{'min': {'type': 'Point', 'coordinates': [8.43...",882.312,26726.0,2018-05-10 16:15:18,1.516689,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<gpx x...",T2 - Mountain hike,388.51,http://www.hikr.org/tour/post131840.html,901.052,Waldstätterweg: Alpnachstad - Buochs,24903.50347
4,5afb229e8f80884aaad9c6ee,19581.273819,rkroebl,2018-05-11 05:44:58,697.57,"{'min': {'type': 'Point', 'coordinates': [8.61...",310.662,18197.0,2018-05-11 12:54:25,1.542405,"<?xml version=""1.0"" encoding=""UTF-8""?>\n<gpx x...",T2 - Mountain hike,438.5,http://www.hikr.org/tour/post131845.html,305.372,Waldstätterweg: Buochs - Beckenried und Gersau...,19581.273819


In [None]:
# Model
model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))