In [1]:
# To work with local eumap code
# import sys
# sys.path.append('../../')

import os
os.environ['USE_PYGEOS'] = '0'

from pathlib import Path
import pandas as pd
import geopandas as gpd
from eumap.mapper import LandMapper

import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.metrics import log_loss

def log_loss_scorer(clf, X, y_true):
    class_labels = clf.classes_
    y_pred_proba = clf.predict_proba(X)
    error = log_loss(y_true, y_pred_proba, labels=class_labels)
    return error * -1

In [3]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

estimator = RandomForestClassifier(n_estimators=100)

hyperpar = GridSearchCV(
    estimator = estimator,
    scoring = log_loss_scorer,
    param_grid = {
     'min_samples_leaf': [1, 5],
     'max_depth': [5, None],
     'max_features': [0.5]
    }
)


training data preprocessing

In [4]:
tdata_path = Path('tick_reports_training.gpkg') if Path('tick_reports_training.gpkg').is_file() else Path('tick_reports_dummy.gpkg')

In [5]:
tick_dummy = gpd.read_file(tdata_path)

In [6]:
tick_dummy.head()

Unnamed: 0,id,acc,datetime,X,Y,geometry
0,48,74,2015-03-11 21:09:27+00:00,2703318.0,1233180.0,POINT (2703317.828 1233180.426)
1,52,73,2015-03-11 20:39:01+00:00,2700818.0,1233604.0,POINT (2700817.848 1233603.591)
2,59,257,2015-03-13 07:54:01+00:00,2559113.0,1153136.0,POINT (2559113.393 1153136.235)
3,63,190,2015-03-14 16:38:27+00:00,2604972.0,1200216.0,POINT (2604972.189 1200216.263)
4,66,333,2015-03-15 18:59:35+00:00,2681722.0,1247090.0,POINT (2681722.042 1247090.450)


In [7]:
tick_dummy['date'] = tick_dummy['datetime'].apply(lambda x: x.strftime('%Y-%m'))
tick_dummy['month'] = tick_dummy['datetime'].apply(lambda x: x.strftime('%m'))
tick_dummy['year'] = tick_dummy['datetime'].apply(lambda x: x.strftime('%Y'))
tick_dummy = tick_dummy[['id','acc','geometry','date','year']]

In [8]:
tick_dummy 

Unnamed: 0,id,acc,geometry,date,year
0,48,74,POINT (2703317.828 1233180.426),2015-03,2015
1,52,73,POINT (2700817.848 1233603.591),2015-03,2015
2,59,257,POINT (2559113.393 1153136.235),2015-03,2015
3,63,190,POINT (2604972.189 1200216.263),2015-03,2015
4,66,333,POINT (2681722.042 1247090.450),2015-03,2015
...,...,...,...,...,...
28441,49910,272,POINT (2710389.567 1092322.160),2020-06,2020
28442,49949,269,POINT (2693109.931 1254805.024),2020-07,2020
28443,56372,969,POINT (2650954.421 1268689.730),2018-09,2018
28444,58623,301,POINT (2627787.691 1236753.273),2016-08,2016


In [9]:
static_fn_layers = []
#for i in os.scandir('/home/opengeohub/faen/Desktop/OpenGeoHub2023_Hackathon_Tickbites_dummy/prepared_2/'):
#    for j in os.scandir(i.path):
#        if j.name.endswith('.tif'):
#            static_fn_layers.append(Path(j.path))

In [11]:
for i in os.scandir('external_input'):
    if i.name == 'static':
        for j in os.scandir(i.path):
            static_fn_layers.append(Path(j.path))

FileNotFoundError: [Errno 2] No such file or directory: 'external_input'

In [None]:
from eumap.mapper import SpaceOverlay

spc_overlay = SpaceOverlay(os.path.join(root_path,'training_points.gpkg'), fn_layers=static_fn_layers)

result = spc_overlay.run()


print(result.shape)

In [None]:
result.head()

In [None]:
all_year = pd.DataFrame()
for year in ['2015','2016','2017','2018','2019','2020']:
    fn_layers = []
    for i in os.scandir(os.path.join(root_path,'external_input'):
        if i.name == year:
            for j in os.scandir(i.path):
                fn_layers.append(Path(j.path))
    #for i in os.scandir('/home/opengeohub/faen/Desktop/OpenGeoHub2023_Hackathon_Tickbites_dummy/prepared_2/Weather/'):
    #    for j in os.scandir(i.path):
    #        if j.name.split('_')[1].split('-')[0] == year and j.name.endswith('tif'):
    #            fn_layers.append(Path(j.path))
    spc_overlay = SpaceOverlay(os.path.join(root_path,'training_points.gpkg', fn_layers=fn_layers)

    result_tmp = spc_overlay.run()
    result_tmp = result_tmp[result_tmp.year==year]
    result_tmp.columns = result_tmp.columns.str.replace(year, '')
    all_year = pd.concat([all_year,result_tmp])

In [None]:
df = result.join(all_year, lsuffix='', rsuffix='_other')

In [None]:
columns = []
for i in df.columns:
    if i == 'Population_2015-2021':
        continue
    if not i.endswith('other'):
        columns.append(i)

In [None]:
feat_col_prfxs = list(columns[6:10])
target_col = 'acc'

min_samples_per_class = 0.05
cv = 5

landmapper_prob = LandMapper(points=df,
                        feat_col_prfxs = feat_col_prfxs,
                        target_col = target_col,
                        estimator = estimator,
                        hyperpar_selection = hyperpar,
                        cv = cv,
                        min_samples_per_class=min_samples_per_class,
                        pred_method='predict_proba',
                        verbose = True)

In [None]:
landmapper_prob.target_le

In [None]:
landmapper_prob.train()

In [None]:
print(f'Log loss: {landmapper_prob.eval_metrics["log_loss"]:.3f}\n\n')
print(landmapper_prob.eval_report)

In [None]:
fns_layers = static_fn_layers+fn_layers

In [None]:

fn_output = os.path.join(root_path, 'tick_acc.tif')

output_fn_files = landmapper_prob.predict(fn_layers=fns_layers, fn_output=fn_output, allow_additional_layers=True)

print('Output files:')
for output_fn_file in output_fn_files:
    print(f' - {Path(output_fn_file).name}')