In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os
import numpy as np
from sklearn.model_selection import cross_val_score

In [2]:
# Load data from multiple CSV files
csv_dir = './data/csv_train/'
file_paths = os.listdir(csv_dir)

data_frames = [pd.read_csv(f'./data/csv_train/{file_path}')
               for file_path in file_paths]
combined_data = pd.concat(data_frames, ignore_index=False)

# combined_data.replace([np.inf, -np.inf], 5, inplace=True)
df = combined_data[combined_data['speed'] == np.inf]
combined_data.describe()


Unnamed: 0,latitude,longitude,altitude,altitude_diff,relative_elevation,distance,cum_distance,speed,lift?,lift_path
count,246395.0,246395.0,246395.0,246395.0,246395.0,246395.0,246395.0,246395.0,246395.0,246395.0
mean,46.183239,10.960967,1266.50668,-0.01268,262.499626,20.348112,26.923287,3.852743,0.000244,0.000467
std,0.0789,0.085127,424.490643,8.237209,426.892697,62.215384,19.580102,3.565807,0.015603,0.021599
min,45.884029,10.510484,30.0,-947.25,-1159.211,0.0,0.0,0.0,0.0,0.0
25%,46.142949,10.942188,1004.839,-0.827,-1.6745,4.64,11.217755,1.762172,0.0,0.0
50%,46.156417,10.989996,1247.733,-0.011,198.208,7.03,23.20787,3.065,0.0,0.0
75%,46.183007,11.015397,1544.0595,0.607,532.1545,12.52,39.206765,5.215893,0.0,0.0
max,46.551682,11.115855,2625.868,833.319,1867.893,12552.92,107.94184,739.37,1.0,1.0


In [3]:
# test
combined_data = pd.read_csv('./data/csv_train/bear-trail.csv')


In [4]:
# Select relevant features (columns)
# features = ['distance', 'altitude_diff']
features = ['distance', 'altitude_diff']
# Define the target column
target = 'lift?'

X = combined_data[features]
y = combined_data[target]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)


# Display the shape of the training and testing sets
print('the entire dataset\'s shape:', combined_data.shape)
print('x_train shape:', X_train.shape)
print('x_test shape:', y_test.shape)

the entire dataset's shape: (1765, 11)
x_train shape: (1412, 2)
x_test shape: (353,)


In [5]:
y = combined_data[target]
y[y == 1]


1       1
10      1
202     1
213     1
1042    1
1240    1
Name: lift?, dtype: int64

In [6]:
report = pd.read_csv(f'./data/report.csv')
report.describe()


Unnamed: 0.1,Unnamed: 0,n,sum_of_n
count,21.0,21.0,21.0
mean,10.0,2.857143,2.738095
std,6.204837,1.768777,1.757975
min,0.0,1.0,1.0
25%,5.0,1.0,1.0
50%,10.0,3.0,2.5
75%,15.0,4.0,4.0
max,20.0,6.0,6.0


## RF


In [7]:
# Initialize and train the Random Forest model
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model_rf.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


#### Cross validation


In [8]:
# Create a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform cross-validation with 5 folds
cross_val_scores = cross_val_score(rf_model, X, y, cv=5, scoring='accuracy')

# Print the cross-validation scores
print("Cross-validation Scores:", cross_val_scores)
print("Mean Accuracy:", cross_val_scores.mean())

Cross-validation Scores: [1.         0.99716714 1.         1.         1.        ]
Mean Accuracy: 0.9994334277620396


## NN


In [9]:
from sklearn.neural_network import MLPClassifier

# Create neural network model
model = MLPClassifier(hidden_layer_sizes=(
    6,), activation='relu', solver='adam', random_state=1)

# Perform 5-fold cross validation
scores = cross_val_score(model, X, y, cv=5)
print("Cross-validation scores: ", scores)

# Train model on training set
model.fit(X_train, y_train)

# Evaluate model performance on test set
print("Test set score: ", model.score(X_test, y_test))

Cross-validation scores:  [0.98583569 0.98583569 0.98016997 0.99433428 0.99150142]
Test set score:  0.9915014164305949


In [10]:
# L2 regularization
# model = MLPClassifier(alpha=0.001)

model = MLPClassifier(hidden_layer_sizes=(
    100,), activation='relu', solver='adam', alpha=0.001, random_state=1)
# model.add(Dropout(0.5))