# General Statistic for Least-Cost-Path QGIS-Plugin

In [14]:
from pandas import read_csv, DataFrame
from geopandas import read_file
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error
import numpy as np

In [2]:
stat_data = read_csv('../results/least_cost_paths/execution_time_resolution.csv', sep=';', decimal=',')
stat_data.head()

Unnamed: 0,Resolution \m,Resolution \px,all touched,run number,execution time \s
0,5,17481 * 11270,True,1.0,2423.87
1,10,8741 * 5635,True,1.0,421.93
2,10,8741 * 5635,True,2.0,419.34
3,50,1749 * 1127,True,1.0,12.69
4,50,1749 * 1127,True,2.0,12.94


In [3]:
stat_data.dtypes

Resolution \m          int64
Resolution \px        object
all touched             bool
run number           float64
execution time \s    float64
dtype: object

In [4]:
stat_data.describe()

Unnamed: 0,Resolution \m,run number,execution time \s
count,14.0,13.0,14.0
mean,46.428571,1.384615,478.217857
std,39.488488,0.50637,864.955779
min,5.0,1.0,2.78
25%,10.0,1.0,5.23
50%,50.0,1.0,12.815
75%,87.5,2.0,421.2825
max,100.0,2.0,2516.9


In [5]:
X = np.log2(stat_data['Resolution \\m'].values).reshape(-1, 1)
y = np.log2(stat_data['execution time \\s'].values)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)

In [7]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

r2_score(y_train, lin_reg.predict(X_train)), r2_score(y_test, lin_reg.predict(X_test))

(0.9988503848986976, 0.9972624300029503)

In [8]:
lin_reg.coef_

array([-2.21510562])

In [9]:
stat_data['Resolution_px'] = stat_data['Resolution \\px']
products = [eval(x) for x in stat_data['Resolution_px'].to_list()]
X2 = np.log2(products).reshape(-1, 1)
X2

array([[27.55369999],
       [25.55378252],
       [25.55378252],
       [20.91058637],
       [20.91058637],
       [18.91269056],
       [18.91269056],
       [27.55369999],
       [25.55378252],
       [25.55378252],
       [20.91058637],
       [20.91058637],
       [18.91269056],
       [18.91269056]])

In [10]:
DataFrame({'Log pixel': np.log2(products),'Pixel':stat_data['Resolution \\px'], 'Log y': y, 'Original y': stat_data['execution time \\s']})

Unnamed: 0,Log pixel,Pixel,Log y,Original y
0,27.5537,17481 * 11270,11.243097,2423.87
1,25.553783,8741 * 5635,8.72086,421.93
2,25.553783,8741 * 5635,8.711977,419.34
3,20.910586,1749 * 1127,3.66562,12.69
4,20.910586,1749 * 1127,3.693766,12.94
5,18.912691,875 * 564,1.594549,3.02
6,18.912691,875 * 564,1.632268,3.1
7,27.5537,17481 * 11270,11.297432,2516.9
8,25.553783,8741 * 5635,8.68881,412.66
9,25.553783,8741 * 5635,8.778701,439.19


In [11]:
X2_train, X2_test, y2_train, y2_test = train_test_split(np.log2(products).reshape(-1, 1), y, test_size=0.10, random_state=42)

In [12]:
lin_reg2 = LinearRegression()
lin_reg2.fit(X2_train, y2_train)

mean_absolute_error(y2_train, lin_reg.predict(X2_train)), mean_absolute_error(y2_test, lin_reg.predict(X2_test))

(39.13992603933185, 41.40341266893853)

In [13]:
lin_reg2.coef_

array([1.10790803])

# Statistics over the aggregated costs

In [16]:
least_cost_path_5_false = read_file("../results/least_cost_paths/least_cost_path_test_points_res_5_al_false.gpkg")
least_cost_path_5_true = read_file("../results/least_cost_paths/least_cost_path_test_points_res_5_al_true.gpkg")

least_cost_path_10_false = read_file("../results/least_cost_paths/least_cost_path_test_points_res_10_al_false.gpkg")
least_cost_path_10_true = read_file("../results/least_cost_paths/least_cost_path_test_points_res_10_al_true.gpkg")

least_cost_path_50_false = read_file("../results/least_cost_paths/least_cost_path_test_points_res_50_al_false.gpkg")
least_cost_path_50_true = read_file("../results/least_cost_paths/least_cost_path_test_points_res_50_al_true.gpkg")

least_cost_path_100_false = read_file("../results/least_cost_paths/least_cost_path_test_points_res_100_al_false.gpkg")
least_cost_path_100_true = read_file("../results/least_cost_paths/least_cost_path_test_points_res_100_al_true.gpkg")

In [37]:
X = np.log2(np.array([5, 10, 50, 100]).reshape(-1, 1))
y_false = np.log2(np.array([least_cost_path_5_false['total cost'],
           least_cost_path_10_false['total cost'],
           least_cost_path_50_false['total cost'],
           least_cost_path_100_false['total cost'],
           ]))

y_true = np.log2(np.array([least_cost_path_5_true['total cost'],
           least_cost_path_10_true['total cost'],
           least_cost_path_50_true['total cost'],
           least_cost_path_100_true['total cost'],
           ]))

In [38]:
lin_reg_false = LinearRegression()
lin_reg_false.fit(X, y_false)

lin_reg_true = LinearRegression()
lin_reg_true.fit(X, y_true)

r2_score(y_false, lin_reg_false.predict(X)), r2_score(y_true, lin_reg_true.predict(X))

(0.9998469990875714, 0.9933976133506535)

In [42]:
lin_reg_false.coef_[0][0], lin_reg_true.coef_[0][0]

(-1.1305292427570628, -0.8531958731641293)