In [None]:
#@title Install dependencies
%%capture
!pip install geopandas
!pip install scikeras
!pip install mapclassify
!pip install contextily

In [None]:
#@title Import libs
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point, LineString
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import pandas as pd
import time
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
import sys
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasRegressor
import pickle


# Function definitions

In [None]:
def gen_points(tl, tr, bl, br, N):
  assert(tl.y == tr.y)
  tline_points = [Point(x, tl.y) for x in np.linspace(tl.x, tr.x, N)]

  assert(bl.y == br.y)
  bline_points = [Point(x, bl.y) for x in np.linspace(bl.x, br.x, N)]

  vpoints = []
  for t,b in zip(tline_points, bline_points):
      assert(t.x == b.x)
      vline_points = [Point(t.x, y) for y in np.linspace(t.y, b.y, N)]
      for v in vline_points:
          if v not in tline_points and v not in bline_points :
              vpoints.append(v)
  return np.concatenate([tline_points, bline_points, vpoints], axis=None), tline_points, bline_points, vpoints

def prepare_data(s, d):
  u_x = [i.x for i in d]
  u_y = [i.y for i in d]

  gis_x = [i.x for i in s]
  gis_y = [i.y for i in s]

  X = pd.DataFrame({'gis_x': gis_x, 'gis_y': gis_y})
  y = pd.DataFrame({'u_x': u_x, 'u_y': u_y})

  X = X.loc[:, ['gis_x','gis_y']].values
  y = y.loc[:, ['u_x','u_y']].values
  return X, y

# Training Models

In [None]:
#@title Define Experiment Data
gis_tl = Point(21.3871803517162, 39.914469580065)
gis_br = Point(21.4300499951713, 39.8638090805479)
gis_tr = Point(21.4300499951713, 39.914469580065)
gis_bl = Point(21.3871803517162, 39.8638090805479)

unity_tl = Point(0,4750)
unity_br = Point(5250,0)
unity_tr = Point(5250,4750)
unity_bl = Point(0,0)

# N_values = [2, 4, 8, 10, 50, 100, 200, 400]

N_values = [400]

In [None]:
len(np.arange(0, 1e-04, 0.00001))

10

In [None]:
#@title Define ML Models

ml_models = [
             {'name' : 'RandomForestRegressor',
              'config': None
             },
             {'name' : 'MultiTaskLasso',
              'config': None
             },
             {'name' : 'MultiTaskElasticNet',
              'config': None
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'LinearRegression'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'SGDRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'Ridge'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'Lasso'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'ElasticNet'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'BayesianRidge'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'PassiveAggressiveRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'HuberRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'ARDRegression'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'LassoLars'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'LassoLarsIC'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'ElasticNetCV'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'KNeighborsRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'RadiusNeighborsRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'LinearSVR'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'NuSVR'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'SVR'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'DecisionTreeRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'ExtraTreeRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'PoissonRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'RANSACRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'Lars'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'TheilSenRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'LarsCV'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'LassoLarsCV'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'TweedieRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'BaggingRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'RandomForestRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'GradientBoostingRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'AdaBoostRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'ExtraTreesRegressor'}
             },
             {'name' : 'MultiOutputRegressor',
              'config': {'estimator': 'HistGradientBoostingRegressor'}
             },
             ]

ml_models = [
    {'name' : 'MultiOutputRegressor',
    'config': {'estimator': 'NuSVR'}
    },
    {'name' : 'MultiOutputRegressor',
    'config': {'estimator': 'SVR'}
    },
]

# ml_models = [
#     {'name' : 'MultiOutputRegressor',
#      'config': {'estimator': 'RadiusNeighborsRegressor'}
#     },
# ]

# Instantiante all models as objects
def mk_model_objects(models):
    model_objects = []
    for m in models:
        if m['name'] == 'RandomForestRegressor':
            from sklearn.ensemble import RandomForestRegressor
            model_objects.append(RandomForestRegressor(**m['config']) if m['config'] else RandomForestRegressor())
        elif m['name'] == 'MultiTaskLasso':
            from sklearn.linear_model import MultiTaskLasso
            model_objects.append(MultiTaskLasso(**m['config']) if m['config'] else MultiTaskLasso())
        elif m['name'] == 'MultiTaskElasticNet':
            from sklearn.linear_model import MultiTaskElasticNet
            model_objects.append(MultiTaskElasticNet(**m['config']) if m['config'] else MultiTaskElasticNet())
        elif m['name'] == 'MultiOutputRegressor':
            from sklearn.multioutput import MultiOutputRegressor
            if m['config'] is None:
                raise ValueError('config must be specified for MultiOutputRegressor')
            else:
                estimator = m['config']['estimator']
                if estimator == 'LinearRegression':
                    from sklearn.linear_model import LinearRegression
                    model_objects.append(MultiOutputRegressor(LinearRegression()))
                elif estimator == 'SGDRegressor':
                    from sklearn.linear_model import SGDRegressor
                    model_objects.append(MultiOutputRegressor(SGDRegressor()))
                elif estimator == 'Ridge':
                    from sklearn.linear_model import Ridge
                    model_objects.append(MultiOutputRegressor(Ridge()))
                elif estimator == 'Lasso':
                    from sklearn.linear_model import Lasso
                    model_objects.append(MultiOutputRegressor(Lasso()))
                elif estimator == 'ElasticNet':
                    from sklearn.linear_model import ElasticNet
                    model_objects.append(MultiOutputRegressor(ElasticNet()))
                elif estimator == 'BayesianRidge':
                    from sklearn.linear_model import BayesianRidge
                    model_objects.append(MultiOutputRegressor(BayesianRidge()))
                elif estimator == 'PassiveAggressiveRegressor':
                    from sklearn.linear_model import PassiveAggressiveRegressor
                    model_objects.append(MultiOutputRegressor(PassiveAggressiveRegressor()))
                elif estimator == 'HuberRegressor':
                    from sklearn.linear_model import HuberRegressor
                    model_objects.append(MultiOutputRegressor(HuberRegressor()))
                elif estimator == 'ARDRegression':
                    from sklearn.linear_model import ARDRegression
                    model_objects.append(MultiOutputRegressor(ARDRegression()))
                elif estimator == 'LassoLars':
                    from sklearn.linear_model import LassoLars
                    model_objects.append(MultiOutputRegressor(LassoLars()))
                elif estimator == 'LassoLarsIC':
                    from sklearn.linear_model import LassoLarsIC
                    model_objects.append(MultiOutputRegressor(LassoLarsIC()))
                elif estimator == 'ElasticNetCV':
                    from sklearn.linear_model import ElasticNetCV
                    model_objects.append(MultiOutputRegressor(ElasticNetCV()))
                elif estimator == 'ElasticNetDual':
                    from sklearn.linear_model import ElasticNetDual
                    model_objects.append(MultiOutputRegressor(ElasticNetDual()))
                elif estimator == 'ElasticNetEmbedding':
                    from sklearn.linear_model import ElasticNetEmbedding
                    model_objects.append(MultiOutputRegressor(ElasticNetEmbedding()))
                elif estimator == 'PoissonRegressor':
                    from sklearn.linear_model import PoissonRegressor
                    model_objects.append(MultiOutputRegressor(PoissonRegressor()))
                elif estimator == 'RANSACRegressor':
                    from sklearn.linear_model import RANSACRegressor
                    model_objects.append(MultiOutputRegressor(RANSACRegressor()))
                elif estimator == 'Lars':
                    from sklearn.linear_model import Lars
                    model_objects.append(MultiOutputRegressor(Lars()))
                elif estimator == 'TheilSenRegressor':
                    from sklearn.linear_model import TheilSenRegressor
                    model_objects.append(MultiOutputRegressor(TheilSenRegressor()))
                elif estimator == 'LarsCV':
                    from sklearn.linear_model import LarsCV
                    model_objects.append(MultiOutputRegressor(LarsCV()))
                elif estimator == 'LassoLarsCV':
                    from sklearn.linear_model import LassoLarsCV
                    model_objects.append(MultiOutputRegressor(LassoLarsCV()))
                elif estimator == 'TweedieRegressor':
                    from sklearn.linear_model import TweedieRegressor
                    model_objects.append(MultiOutputRegressor(TweedieRegressor()))
                elif estimator == 'KNeighborsRegressor':
                    from sklearn.neighbors import KNeighborsRegressor
                    model_objects.append(MultiOutputRegressor(KNeighborsRegressor()))
                elif estimator == 'RadiusNeighborsRegressor':
                    from sklearn.neighbors import RadiusNeighborsRegressor
                    model_objects.append(MultiOutputRegressor(RadiusNeighborsRegressor()))
                elif estimator == 'LinearSVR':
                    from sklearn.svm import LinearSVR
                    model_objects.append(MultiOutputRegressor(LinearSVR()))
                elif estimator == 'NuSVR':
                    from sklearn.svm import NuSVR
                    model_objects.append(MultiOutputRegressor(NuSVR()))
                elif estimator == 'SVR':
                    from sklearn.svm import SVR
                    model_objects.append(MultiOutputRegressor(SVR()))
                elif estimator == 'DecisionTreeRegressor':
                    from sklearn.tree import DecisionTreeRegressor
                    model_objects.append(MultiOutputRegressor(DecisionTreeRegressor()))
                elif estimator == 'ExtraTreeRegressor':
                    from sklearn.tree import ExtraTreeRegressor
                    model_objects.append(MultiOutputRegressor(ExtraTreeRegressor()))
                elif estimator == 'BaggingRegressor':
                    from sklearn.ensemble import BaggingRegressor
                    model_objects.append(MultiOutputRegressor(BaggingRegressor()))
                elif estimator == 'RandomForestRegressor':
                    from sklearn.ensemble import RandomForestRegressor
                    model_objects.append(MultiOutputRegressor(RandomForestRegressor()))
                elif estimator == 'GradientBoostingRegressor':
                    from sklearn.ensemble import GradientBoostingRegressor
                    model_objects.append(MultiOutputRegressor(GradientBoostingRegressor()))
                elif estimator == 'AdaBoostRegressor':
                    from sklearn.ensemble import AdaBoostRegressor
                    model_objects.append(MultiOutputRegressor(AdaBoostRegressor()))
                elif estimator == 'ExtraTreesRegressor':
                    from sklearn.ensemble import ExtraTreesRegressor
                    model_objects.append(MultiOutputRegressor(ExtraTreesRegressor()))
                elif estimator == 'HistGradientBoostingRegressor':
                    from sklearn.ensemble import HistGradientBoostingRegressor
                    model_objects.append(MultiOutputRegressor(HistGradientBoostingRegressor()))
                else:
                    raise ValueError('estimator must be a valid estimator name')
    return model_objects

print(f'Defined {len(ml_models)} models')

Defined 2 models


In [None]:
#@title Traing, Evaluate and Collect Results
import tempfile
import os
import datetime

today_datatime = datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")
print(today_datatime)
output_path = '/content/drive/MyDrive/CrowdUnitProjects/CrowdSimulationProject/CSConversion/Paper/'
os.makedirs(output_path, exist_ok=True)

# Set the number of trials
trial_no = 5

sc = StandardScaler()

# Test set generation
N_test = 80
gis_output_test, _, _, _ = gen_points(gis_tl, gis_tr, gis_bl, gis_br,  N_test)
unity_output_test, _, _, _ = gen_points(unity_tl, unity_tr, unity_bl, unity_br, N_test)

X_test, y_test = prepare_data(gis_output_test, unity_output_test)
X_test = sc.fit_transform(X_test)

mdf = pd.DataFrame(columns=['N', 'model', 'train_score_avg', 'train_score_std', 'test_score_avg', 'test_score_std', 'fit_time_avg', 'fit_time_std', 'model_size_KB_avg','model_size_KB_std',  'train_scoring_time_avg', 'train_scoring_time_std', 'test_scoring_time_avg', 'test_scoreing_time_std'])
for N in tqdm(N_values):

  gis_output, gis_tline_points, gis_bline_points, gis_vpoints = gen_points(gis_tl, gis_tr, gis_bl, gis_br, N)
  unity_output, unity_tline_points, unity_bline_points, unity_vpoints = gen_points(unity_tl, unity_tr, unity_bl, unity_br, N)
  X, y = prepare_data(gis_output, unity_output)

  mdf_per_n = pd.DataFrame(columns=mdf.columns)

  # scale training data

  X = sc.fit_transform(X)

  X_train = X
  y_train = y
  # split data
  # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

  for m in tqdm(ml_models):
    if m['config'] is None or m['config'] == '':
      m_name = m['name']
    elif m['name'] == 'MultiOutputRegressor':
      m_name = 'MOP(' + m['config']['estimator'] + ')'
    else:
      m_name = m['name'] + ' ' + str(m['config'])

    print('----------------------------------------------------')
    print(f'Model: {m_name}, N = {N}')
    print('----------------------------------------------------')


    # Model initialization
    model = mk_model_objects([m])[0]  # Use a list with one element and take the first element

    train_scores = []
    model_sizes = []
    et_training_fit = []
    et_training_scoring = []
    et_testing = []
    test_scores = []

    skip_model = False
    # if m_name in ('MOP(RadiusNeighborsRegressor)', 'MOP(NuSVR)', 'MOP(SVR)') and N > 400:
      # skip_model = True
    if m_name in ('MOP(LassoLarsIC)', 'MOP(ElasticNetCV)',
                  'MOP(KNeighborsRegressor)', 'MOP(RadiusNeighborsRegressor)',
                  'MOP(LarsCV)', 'MOP(LassoLarsCV)') and N == 2:
      skip_model = True

    if skip_model:
      agg_res_per_n = [
        N, m_name, np.mean(train_scores), np.std(train_scores),
        np.mean(test_scores), np.std(test_scores), np.mean(et_training_fit),
        np.std(et_training_fit), np.mean(model_sizes) / 1024, np.std(model_sizes) / 1024,
        np.mean(et_training_scoring), np.std(et_training_scoring),
        np.mean(et_testing), np.std(et_testing)
      ]
      mdf_per_n.loc[len(mdf_per_n)] = agg_res_per_n
      mdf.loc[len(mdf)] = agg_res_per_n
      print('-------------- Skipped -----------------')
      continue

    for i in range(trial_no):
        iter_start_time = time.perf_counter()
        # Fitting
        start_time = time.perf_counter()
        model.fit(X_train, y_train)
        end_time = time.perf_counter()
        elapsed_time = end_time - start_time
        et_training_fit.append(elapsed_time)

        # Model size
        model_size = sys.getsizeof(pickle.dumps(model))
        model_sizes.append(model_size)

        # Train scoring
        start_time = time.perf_counter()
        train_score = model.score(X_train, y_train)
        end_time = time.perf_counter()
        et_training_scoring.append(end_time - start_time)
        train_scores.append(train_score)

        # Test scoring
        start_time = time.perf_counter()
        test_score = model.score(X_test, y_test)
        end_time = time.perf_counter()
        elapsed_time3 = end_time - start_time
        test_scores.append(test_score)
        et_testing.append(elapsed_time3)

        print(f'Completed trial {i}. It took {time.perf_counter() - iter_start_time} Seconds.')

    # Append results to DataFrame
    agg_res = [
        N, m_name, np.mean(train_scores), np.std(train_scores),
        np.mean(test_scores), np.std(test_scores), np.mean(et_training_fit),
        np.std(et_training_fit), np.mean(model_sizes) / 1024, np.std(model_sizes) / 1024,
        np.mean(et_training_scoring), np.std(et_training_scoring),
        np.mean(et_testing), np.std(et_testing)
    ]
    mdf.loc[len(mdf)] = agg_res
    mdf_per_n.loc[len(mdf_per_n)] = agg_res
    print('----------------------------------------------------')
    print(agg_res)
    print('----------------------------------------------------')

  mdf_per_n.to_csv(f'{output_path}/nb_results_{N}_{trial_no}trials_rand_{today_datatime}.csv', index=False)


# Display the DataFrame
mdf.to_csv(f'{output_path}/nb_results_{N_values[0]}-{N_values[len(N_values)-1]}_{trial_no}trials_rand_{today_datatime}.csv', index=False)
mdf

2025-04-18-102959


  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/2 [00:00<?, ?it/s][A

----------------------------------------------------
Model: MOP(NuSVR), N = 400
----------------------------------------------------
Completed trial 0. It took 3162.816360197 Seconds.
Completed trial 1. It took 3201.5908427489994 Seconds.
Completed trial 2. It took 3207.4723876220014 Seconds.
Completed trial 3. It took 3198.053431974 Seconds.



 50%|█████     | 1/2 [4:28:41<4:28:41, 16121.04s/it][A

Completed trial 4. It took 3351.1035743850007 Seconds.
----------------------------------------------------
[400, 'MOP(NuSVR)', np.float64(0.998910689958195), np.float64(0.0), np.float64(0.9985441270373187), np.float64(0.0), np.float64(2491.9898145670004), np.float64(63.55029597023741), np.float64(4377.447265625), np.float64(0.0), np.float64(704.0753640282004), np.float64(3.3728173150427785), np.float64(28.140781642399997), np.float64(0.2835783486259754)]
----------------------------------------------------
----------------------------------------------------
Model: MOP(SVR), N = 400
----------------------------------------------------
Completed trial 0. It took 2991.5545462690025 Seconds.
Completed trial 1. It took 2930.9392140579985 Seconds.
Completed trial 2. It took 2909.6699041659995 Seconds.
Completed trial 3. It took 2907.657307163001 Seconds.



100%|██████████| 2/2 [8:32:46<00:00, 15383.39s/it]
100%|██████████| 1/1 [8:34:06<00:00, 30846.76s/it]

Completed trial 4. It took 2905.910859959 Seconds.
----------------------------------------------------
[400, 'MOP(SVR)', np.float64(0.9990310244167262), np.float64(0.0), np.float64(0.9986677632591888), np.float64(1.1102230246251565e-16), np.float64(1497.8653733670012), np.float64(24.760833573484067), np.float64(8583.296875), np.float64(0.0), np.float64(1376.2182665426), np.float64(7.338504990783167), np.float64(55.056460014200276), np.float64(0.47695035190118945)]
----------------------------------------------------





Unnamed: 0,N,model,train_score_avg,train_score_std,test_score_avg,test_score_std,fit_time_avg,fit_time_std,model_size_KB_avg,model_size_KB_std,train_scoring_time_avg,train_scoring_time_std,test_scoring_time_avg,test_scoreing_time_std
0,400,MOP(NuSVR),0.998911,0.0,0.998544,0.0,2491.989815,63.550296,4377.447266,0.0,704.075364,3.372817,28.140782,0.283578
1,400,MOP(SVR),0.999031,0.0,0.998668,1.110223e-16,1497.865373,24.760834,8583.296875,0.0,1376.218267,7.338505,55.05646,0.47695
