In [1]:
! pip install lightgbm

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Training 5 neural networks for $ Map_i $, i=1,...,5

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

from tqdm import tqdm
from lightgbm import LGBMRegressor

from main import import_dataset_from_file

In [4]:
gbs = [None] * 5


def train_val_map(idx):
    # dataframe
    map_df = import_dataset_from_file(f"../Data/Map_{idx+1}.txt")

    # train_test_split
    cols = ['x', 'y']
    X_train, X_test, y_train, y_test = train_test_split(map_df[cols], map_df['z'], test_size=0.2, shuffle=True)

    # gb
    gb = LGBMRegressor(max_depth=100)

    # training
    gb.fit(X_train, y_train)

    # validating
    pred = gb.predict(X_test)
    mse = mean_squared_error(y_test, pred)
    print('Validation RMSE:', mse**0.5)

    # saving
    gbs[idx] = gb


In [5]:
for i in range(5):
    print(f'Training Map{i+1}')
    train_val_map(i)
    print()

Training Map1
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 957196, number of used features: 2
[LightGBM] [Info] Start training from score 0.155716
Validation RMSE: 0.00485810787405018

Training Map2
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012330 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 510
[LightGBM] [Info] Number of data points in the train set: 957196, number of used features: 2
[LightGBM] [Info] Start training from score 0.123917
Validation RMSE: 0.004673352447649967

Training Map3
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011069 seconds.
You can set `force_row_wise=tru

## Predicting $ map_i $ for pairs (x, y)

In [6]:
from main import import_dataset_from_file, export_dataset_to_file

In [7]:
result_df = import_dataset_from_file('../Data/Result_schedule.txt')
result_df

Unnamed: 0,x,y,z
0,68239.21,24467.0,
1,68251.71,24467.0,
2,68264.21,24467.0,
3,68276.71,24467.0,
4,68289.21,24467.0,
...,...,...,...
1196585,53264.21,46942.0,
1196586,53276.71,46942.0,
1196587,53289.21,46942.0,
1196588,53301.71,46942.0,


In [9]:
xynp = result_df[['x', 'y']].values.astype(np.float32)

for i in range(5):
    map_pred = gbs[i].predict(xynp)
    result_df[f'map{i+1}'] = map_pred



In [10]:
result_df

Unnamed: 0,x,y,z,map1,map2,map3,map4,map5
0,68239.21,24467.0,,0.153215,0.112100,38.157089,4168.423933,1.677636
1,68251.71,24467.0,,0.153215,0.112100,38.157089,4168.423933,1.677636
2,68264.21,24467.0,,0.153215,0.112100,38.157089,4157.856382,1.677636
3,68276.71,24467.0,,0.152706,0.112100,38.157089,4157.856382,1.677636
4,68289.21,24467.0,,0.152706,0.112100,38.157089,4157.856382,1.677636
...,...,...,...,...,...,...,...,...
1196585,53264.21,46942.0,,0.163908,0.114286,38.574245,4085.483519,1.675672
1196586,53276.71,46942.0,,0.163908,0.114286,38.574245,4085.483519,1.675672
1196587,53289.21,46942.0,,0.163908,0.114286,38.574245,4085.483519,1.675672
1196588,53301.71,46942.0,,0.163908,0.114286,38.574245,4080.623922,1.675672


In [11]:
result_df.to_csv('LGBM_preresults.csv')

In [12]:
training_df = import_dataset_from_file('../Data/Point_dataset.txt')
training_df

Unnamed: 0,x,y,z
0,44222.21,35600.90,14.974396
1,60003.40,26172.72,14.078868
2,44300.61,35291.79,14.261570
3,52776.47,33453.18,15.347562
4,49636.39,30697.34,14.422570
...,...,...,...
436,65193.40,26832.74,9.374795
437,50022.04,40606.42,7.636603
438,43745.18,35114.25,14.622497
439,71483.43,25152.76,11.014532


In [13]:
xynp = training_df[['x', 'y']].values.astype(np.float32)

for i in range(5):
    map_pred = gbs[i].predict(xynp)
    training_df[f'map{i+1}'] = map_pred



In [14]:
training_df

Unnamed: 0,x,y,z,map1,map2,map3,map4,map5
0,44222.21,35600.90,14.974396,0.176317,0.123170,41.349485,4053.175632,1.679941
1,60003.40,26172.72,14.078868,0.153543,0.119007,36.997702,4185.736815,1.678253
2,44300.61,35291.79,14.261570,0.174822,0.126810,41.273551,4057.709318,1.680239
3,52776.47,33453.18,15.347562,0.160092,0.132317,37.604019,4204.458212,1.677390
4,49636.39,30697.34,14.422570,0.142867,0.117498,37.369916,4215.434405,1.678432
...,...,...,...,...,...,...,...,...
436,65193.40,26832.74,9.374795,0.164266,0.132595,35.936368,4195.079415,1.672867
437,50022.04,40606.42,7.636603,0.154048,0.134098,40.954899,4190.107182,1.672080
438,43745.18,35114.25,14.622497,0.177520,0.123106,40.936217,4032.042864,1.679925
439,71483.43,25152.76,11.014532,0.169270,0.109169,39.975413,4143.529265,1.679192


In [15]:
training_df.to_csv('LGBM_training_maps.csv')