# Testowanie róznicy w casie obliczeń pomiędzy róznymi konfiguracjami

### Konfiguracja

In [1]:
import os
while "notebooks" in os.getcwd():
    os.chdir("../")


In [2]:
from pathlib import Path
import pandas as pd
import time

from src.decision_tree.tree import load_trees
from src.decision_tree.prediction_gap import prediction_gap_by_exact_calc
from src.decision_tree.prediction_gap import NormalPredictionGap

from src.decision_tree.tree_float import load_trees as load_trees_float
from src.decision_tree.prediction_gap_float import prediction_gap_by_exact_calc as prediction_gap_by_exact_calc_float 
from src.decision_tree.prediction_gap_float import NormalPredictionGap as NormalPredictionGap_float

%load_ext autoreload
%autoreload 2


In [3]:
models_path = Path("models")
data_path = Path("data")
wine_model_name = "winequality_red"
wine_test_data_path = data_path / "wine_quality/test_winequality_red_scaled.csv"
housing_model_name = "housing"
housing_test_data_path = data_path / "housing_data/test_housing_scaled.csv"


In [4]:
stddev = 0.3


### Wczytywanie modeli

In [5]:
import os
while "notebooks" in os.getcwd():
    os.chdir("../")


In [6]:
from pathlib import Path
import pandas as pd

from src.decision_tree.tree import load_trees
from src.decision_tree.prediction_gap import (
    NormalPredictionGap,
    prediction_gap_on_single_feature_perturbation,
    prediction_gap_by_random_sampling,
    prediction_gap_by_exact_calc
)

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
models_path = Path("models")
data_path = Path("data")
wine_model_name = "winequality_red"
big_wine_model_name = "winequality_red_big"
wine_test_data_path = data_path / "wine_quality/test_winequality_red_scaled.csv"
housing_model_name = "housing"
big_housing_model_name = "housing_big"

In [8]:
wine_trees = load_trees(models_path, wine_model_name)
wine_trees_float= load_trees_float(models_path, wine_model_name)
wine_trees_big= load_trees(models_path, big_wine_model_name)

housing_trees = load_trees(models_path, housing_model_name)
housing_trees_float= load_trees_float(models_path, housing_model_name)
housing_trees_big= load_trees(models_path, big_housing_model_name)


In [9]:
wine_data = pd.read_csv(wine_test_data_path)
housing_data = pd.read_csv(housing_test_data_path)

In [10]:
stddev = 0.3


### Testowanie różnicy w czasie - Wine model

#### Testowanie zmiany z np.float32 na float

In [11]:
features = ['alcohol', 'sulphates', 'total_sulfur_dioxide', 'volatile_acidity',
       'chlorides', 'residual_sugar', 'pH', 'citric_acid',
       'free_sulfur_dioxide', 'fixed_acidity', 'density']

In [12]:
prediction_gap = NormalPredictionGap(stddev)
prediction_gap_float = NormalPredictionGap_float(stddev)

In [13]:
times = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, wine_trees, wine_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['alcohol']
Starting exact prediction gap calculation.


Datapoint 0 returned predgap value of 0.009961894626400574.
Datapoint 6 returned predgap value of 0.08256291449303822.
Datapoint 4 returned predgap value of 0.06822433780229976.
Datapoint 2 returned predgap value of 0.017130650184473687.
Datapoint 1 returned predgap value of 0.006269428552839615.
Datapoint 7 returned predgap value of 0.00466620184826839.
Datapoint 3 returned predgap value of 0.007913960483980687.
Datapoint 5 returned predgap value of 0.007354241393369156.
Datapoint 8 returned predgap value of 0.004228402540532536.
Datapoint 10 returned predgap value of 0.00883218425803068.
Datapoint 12 returned predgap value of 0.01833661776440575.
Datapoint 14 returned predgap value of 0.00988571589010846.
Datapoint 9 returned predgap value of 0.007220900555114093.
Datapoint 13 returned predgap value of 0.009962945945979199.
Datapoint 11 returned predgap value of 0.013004404318175525.
Datapoint 15 returned predgap value of 0.012405523953909066.
Datapoint 16 returned predgap value of 0

In [14]:
times_float = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc_float(prediction_gap_float, wine_trees_float, wine_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times_float.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['alcohol']
Starting exact prediction gap calculation.


Datapoint 0 returned predgap value of 0.003195137012516015.
Datapoint 6 returned predgap value of 0.002299968761725282.
Datapoint 4 returned predgap value of 0.06551769579726952.
Datapoint 2 returned predgap value of 0.017130624755765454.
Datapoint 1 returned predgap value of 0.002402166228184087.
Datapoint 7 returned predgap value of 0.005362242229147442.
Datapoint 3 returned predgap value of 0.007916692989029432.
Datapoint 5 returned predgap value of 0.007354237971035082.
Datapoint 8 returned predgap value of 0.004289196526012002.
Datapoint 10 returned predgap value of 0.008832176783223098.
Datapoint 14 returned predgap value of 0.009971076418964777.
Datapoint 12 returned predgap value of 0.019724264426875724.
Datapoint 9 returned predgap value of 0.007220903965404675.
Datapoint 13 returned predgap value of 0.00438117548096376.
Datapoint 11 returned predgap value of 0.013004407459116255.
Datapoint 15 returned predgap value of 0.021748072196588225.
Datapoint 16 returned predgap value 

In [15]:
print(times)
print(times_float)
print([i-j for i, j in zip(times, times_float)])

[2.646014451980591, 6.580998182296753, 11.679245948791504, 24.988547325134277]
[2.225283145904541, 5.819832801818848, 10.708717346191406, 23.01026177406311]
[0.4207313060760498, 0.7611653804779053, 0.9705286026000977, 1.978285551071167]


#### Różnica w czasie po przejściu na mniejszy model

In [16]:
times_big = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, wine_trees_big, wine_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times_big.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['alcohol']
Starting exact prediction gap calculation.


Datapoint 0 returned predgap value of 0.0043070606183592845.
Datapoint 6 returned predgap value of 0.0017482469810372924.
Datapoint 2 returned predgap value of 0.04361715193581863.
Datapoint 4 returned predgap value of 0.03991054766229579.
Datapoint 1 returned predgap value of 0.0006586617573354249.
Datapoint 7 returned predgap value of 0.013657041870025114.
Datapoint 3 returned predgap value of 0.004440416971521214.
Datapoint 5 returned predgap value of 0.023687695847587495.
Datapoint 8 returned predgap value of 0.00820151680979141.
Datapoint 10 returned predgap value of 0.012061886759428745.
Datapoint 12 returned predgap value of 0.017374837956452033.
Datapoint 14 returned predgap value of 0.018082851192373052.
Datapoint 9 returned predgap value of 0.009954352718086467.
Datapoint 13 returned predgap value of 0.008369137248947882.
Datapoint 11 returned predgap value of 0.024041116284922684.
Datapoint 15 returned predgap value of 0.011052685381863376.
Datapoint 16 returned predgap valu

In [17]:
print(times_big)
print(times)
print([i-j for i, j in zip(times, times_big)])

[2.767120838165283, 8.001223802566528, 14.030475378036499, 30.347758054733276]
[2.646014451980591, 6.580998182296753, 11.679245948791504, 24.988547325134277]
[-0.12110638618469238, -1.4202256202697754, -2.351229429244995, -5.359210729598999]


###  Testowanie różnicy w czasie - Housing model

#### Testowanie zmiany z np.float32 na float

In [18]:
features = ['longitude', 'latitude', 'median_income', 'population',
       'households', 'housing_median_age', 'total_rooms',
       'total_bedrooms']

In [19]:
prediction_gap = NormalPredictionGap(stddev)
prediction_gap_float = NormalPredictionGap_float(stddev)

In [20]:
times = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, housing_trees, housing_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['longitude']
Starting exact prediction gap calculation.


Datapoint 6 returned predgap value of 2147874103.1646175.
Datapoint 4 returned predgap value of 575547544.3247929.
Datapoint 0 returned predgap value of 3728844732.5504975.
Datapoint 2 returned predgap value of 3977975844.6321635.
Datapoint 3 returned predgap value of 7192604601.442774.
Datapoint 1 returned predgap value of 1036110156.0758018.
Datapoint 7 returned predgap value of 740560713.9488819.
Datapoint 5 returned predgap value of 192296999.24713215.
Datapoint 8 returned predgap value of 755252390.4681877.
Datapoint 14 returned predgap value of 344176786.7414682.
Datapoint 12 returned predgap value of 5252336281.303322.
Datapoint 10 returned predgap value of 6224725515.802461.
Datapoint 9 returned predgap value of 7155512849.652548.
Datapoint 13 returned predgap value of 317889326.5565662.
Datapoint 15 returned predgap value of 321798491.7218494.
Datapoint 11 returned predgap value of 6691382353.574624.
Datapoint 16 returned predgap value of 4376192521.669079.
Datapoint 18 return

In [21]:
times_float = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc_float(prediction_gap_float, housing_trees_float, housing_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times_float.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['longitude']
Starting exact prediction gap calculation.


Datapoint 6 returned predgap value of 1774160982.7764633.
Datapoint 4 returned predgap value of 575547417.2381544.
Datapoint 0 returned predgap value of 3800503920.06152.
Datapoint 2 returned predgap value of 3977976550.5886726.
Datapoint 3 returned predgap value of 8863088946.693634.
Datapoint 7 returned predgap value of 740560649.5373532.
Datapoint 1 returned predgap value of 282375767.3423549.
Datapoint 5 returned predgap value of 192297261.4901413.
Datapoint 8 returned predgap value of 768146645.023417.
Datapoint 14 returned predgap value of 344177201.6094975.
Datapoint 10 returned predgap value of 6224725217.293062.
Datapoint 12 returned predgap value of 5252336126.284694.
Datapoint 9 returned predgap value of 9053798176.4945.
Datapoint 13 returned predgap value of 317888826.0981266.
Datapoint 15 returned predgap value of 321798866.597871.
Datapoint 11 returned predgap value of 6691382512.862429.
Datapoint 16 returned predgap value of 4376192036.33982.
Datapoint 18 returned predga

In [22]:
print(times)
print(times_float)
print([i-j for i, j in zip(times, times_float)])

[25.265269994735718, 147.60511565208435, 301.05758261680603, 420.3620150089264]
[21.862010955810547, 138.5977144241333, 291.7063150405884, 377.06349658966064]
[3.403259038925171, 9.00740122795105, 9.351267576217651, 43.29851841926575]



#### Różnica w czasie po prejściu na mniejszy model

Zakresy eksperymentów zmniejszone, bo ten duży model się długo liczył

In [None]:
times = []

for i in range(1, 4):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, housing_trees, housing_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times.append(t)

In [None]:
times_big = []

for i in range(1, 4):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, housing_trees_big, housing_data[:20], features[0:i], squared=True, processes=8)
    t = time.time() - t
    times_big.append(t)

In [27]:
print(times)
print(times_big)
print([i-j for i, j in zip(times, times_big)])

[21.568105220794678, 124.8886775970459, 300.2667496204376]
[51.0811185836792, 338.42005372047424, 1449.2556111812592]
[-29.51301336288452, -213.53137612342834, -1148.9888615608215]
