# Testowanie róznicy w casie obliczeń pomiędzy róznymi konfiguracjami

## Konfiguracja

In [1]:
import os
while "notebooks" in os.getcwd():
    os.chdir("../")


In [2]:
from pathlib import Path
import pandas as pd
import time

from src.decision_tree.tree import load_trees
from src.decision_tree.prediction_gap import prediction_gap_by_exact_calc
from src.decision_tree.prediction_gap import NormalPredictionGap

from src.decision_tree.tree_float import load_trees as load_trees_float
from src.decision_tree.prediction_gap_float import prediction_gap_by_exact_calc as prediction_gap_by_exact_calc_float 
from src.decision_tree.prediction_gap_float import NormalPredictionGap as NormalPredictionGap_float

%load_ext autoreload
%autoreload 2


In [3]:
models_path = Path("models")
data_path = Path("data")
wine_model_name = "winequality_red"
wine_test_data_path = data_path / "wine_quality/test_winequality_red_scaled.csv"
housing_model_name = "housing"
housing_test_data_path = data_path / "housing_data/test_housing_scaled.csv"


In [4]:
stddev = 0.3


### Wczytywanie modeli

In [5]:
import os
while "notebooks" in os.getcwd():
    os.chdir("../")


In [6]:
from pathlib import Path
import pandas as pd

from src.decision_tree.tree import load_trees
from src.decision_tree.prediction_gap import (
    NormalPredictionGap,
    prediction_gap_on_single_feature_perturbation,
    prediction_gap_by_random_sampling,
    prediction_gap_by_exact_calc
)

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
models_path = Path("models")
data_path = Path("data")
wine_model_name = "winequality_red"
wine_test_data_path = data_path / "wine_quality/test_winequality_red_scaled.csv"
big_wine_model_name = "winequality_red_big"


In [16]:
wine_trees = load_trees(models_path, wine_model_name)
wine_trees_float= load_trees_float(models_path, wine_model_name)
wine_trees_big= load_trees(models_path, big_wine_model_name)


In [9]:
wine_data = pd.read_csv(wine_test_data_path)
wine_data


Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,0.505795,2.190902,-0.159061,0.043416,-0.009916,-0.370562,1.414996,1.618302,1.094265,-0.697233,-0.584777,5
1,-0.356000,0.738418,-1.186070,-0.666062,-1.030094,-1.135608,-1.139357,-0.289747,0.575922,-1.110324,-0.960246,5
2,1.195232,-0.490607,2.357111,2.845854,-0.286214,2.115838,0.898044,1.194291,-0.914312,1.132173,1.480302,7
3,-0.873078,0.570823,-1.032019,-0.311323,0.032592,-0.466193,-0.075043,-0.766760,1.029472,-0.638220,-0.021574,6
4,2.401746,-0.881661,1.432803,0.043416,-0.094931,0.872638,0.654772,1.459298,-1.043897,0.955133,-0.021574,6
...,...,...,...,...,...,...,...,...,...,...,...,...
315,0.333436,-1.719632,1.073350,-0.524166,-0.222453,1.255161,0.411500,0.028261,0.511130,-0.756246,-0.960246,6
316,-0.126188,-1.328579,0.457144,-0.240375,-0.838811,1.828946,0.198638,-0.925764,-0.266384,2.076382,1.855771,7
317,0.046171,2.833346,-0.621215,2.455641,-0.307468,-0.466193,0.229047,1.194291,0.381544,-0.992298,-0.490910,5
318,-1.447608,0.459094,-0.826617,-0.524166,-0.456244,0.203223,-0.561586,-1.015866,1.547814,0.010924,0.447763,6


In [10]:
stddev = 0.3


### Testowanie różnicy w czasie

#### Testowanie zmiany z np.float32 na float

In [11]:
features = ['alcohol', 'sulphates', 'total_sulfur_dioxide', 'volatile_acidity',
       'chlorides', 'residual_sugar', 'pH', 'citric_acid',
       'free_sulfur_dioxide', 'fixed_acidity', 'density']

In [12]:
prediction_gap = NormalPredictionGap(stddev)
prediction_gap_float = NormalPredictionGap_float(stddev)

In [13]:
times = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, wine_trees, wine_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['alcohol']
Starting exact prediction gap calculation.
Datapoint 0 returned predgap value of 0.009961894626400574.
Datapoint 1 returned predgap value of 0.006269428552839615.
Datapoint 2 returned predgap value of 0.017130650184473687.
Datapoint 3 returned predgap value of 0.007913960483980687.
Datapoint 4 returned predgap value of 0.06822433780229976.
Datapoint 5 returned predgap value of 0.007354241393369156.
Datapoint 6 returned predgap value of 0.08256291449303822.
Datapoint 7 returned predgap value of 0.00466620184826839.
Datapoint 8 returned predgap value of 0.004228402540532536.
Datapoint 9 returned predgap value of 0.007220900555114093.
Datapoint 10 returned predgap value of 0.00883218425803068.
Datapoint 11 returned predgap value of 0.013004404318175525.
Datapoint 12 returned predgap value of 0.01833661776440575.
Datapoint 13 returned predgap value of 0.009962945945979199.
Datapoint 14

In [14]:
times_float = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc_float(prediction_gap_float, wine_trees_float, wine_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times_float.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['alcohol']
Starting exact prediction gap calculation.
Datapoint 0 returned predgap value of 0.003195137012516015.
Datapoint 1 returned predgap value of 0.002402166228184087.
Datapoint 2 returned predgap value of 0.017130624755765454.
Datapoint 3 returned predgap value of 0.007916692989029432.
Datapoint 4 returned predgap value of 0.06551769579726952.
Datapoint 5 returned predgap value of 0.007354237971035082.
Datapoint 6 returned predgap value of 0.002299968761725282.
Datapoint 7 returned predgap value of 0.005362242229147442.
Datapoint 8 returned predgap value of 0.004289196526012002.
Datapoint 9 returned predgap value of 0.007220903965404675.
Datapoint 10 returned predgap value of 0.008832176783223098.
Datapoint 11 returned predgap value of 0.013004407459116255.
Datapoint 12 returned predgap value of 0.019724264426875724.
Datapoint 13 returned predgap value of 0.00438117548096376.
Datapoint

In [15]:
print(times)
print(times_float)
print([i-j for i, j in zip(times, times_float)])

[6.8901283740997314, 17.64706587791443, 30.75902819633484, 64.14338684082031]
[5.640876531600952, 15.365988492965698, 27.71587085723877, 60.47069549560547]
[1.2492518424987793, 2.2810773849487305, 3.0431573390960693, 3.6726913452148438]


#### Różnica w czasie po prejściu na mniejszy model

In [18]:
times_big = []

for i in range(1, 5):
    t = time.time()
    exact_result = prediction_gap_by_exact_calc(prediction_gap, wine_trees_big, wine_data[:20], features[0:i], squared=True)
    t = time.time() - t
    times_big.append(t)

There are 20 datapoints in the dataset.
The following 1 features are subject to perturbation:
['alcohol']
Starting exact prediction gap calculation.
Datapoint 0 returned predgap value of 0.0043070606183592845.
Datapoint 1 returned predgap value of 0.0006586617573354249.
Datapoint 2 returned predgap value of 0.04361715193581863.
Datapoint 3 returned predgap value of 0.004440416971521214.
Datapoint 4 returned predgap value of 0.03991054766229579.
Datapoint 5 returned predgap value of 0.023687695847587495.
Datapoint 6 returned predgap value of 0.0017482469810372924.
Datapoint 7 returned predgap value of 0.013657041870025114.
Datapoint 8 returned predgap value of 0.00820151680979141.
Datapoint 9 returned predgap value of 0.009954352718086467.
Datapoint 10 returned predgap value of 0.012061886759428745.
Datapoint 11 returned predgap value of 0.024041116284922684.
Datapoint 12 returned predgap value of 0.017374837956452033.
Datapoint 13 returned predgap value of 0.008369137248947882.
Datapoi

In [21]:
print(times_big)
print(times)
print([i-j for i, j in zip(times, times_big)])

[7.451418876647949, 21.639697551727295, 37.21090769767761, 82.87585544586182]
[6.8901283740997314, 17.64706587791443, 30.75902819633484, 64.14338684082031]
[-0.5612905025482178, -3.992631673812866, -6.451879501342773, -18.732468605041504]
