In [233]:
import numpy as np
import numpy.typing as npt
from data_exploration.helpers import find_file, save
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import keras

import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
import trackml_copy as outrunner_code
import trackml_2_solution_example as my_code

In [234]:
DO_EXPORT = True
DIRECTORY = my_code.DIRECTORY
SOLUTION_DIR = my_code.SOLUTION_DIR


DATA_ROOT = "/data/atlas/users/lschoonh/BachelorProject/data/"
DATA_SAMPLE = DATA_ROOT + "train_100_events/"
MODELS_ROOT = DIRECTORY + "trained_models/2nd_place/"

event_name: str = "event000001001"
hits, cells, truth, particles = outrunner_code.get_event(event_name)
preload = True
PATH_THR = 0.85

In [None]:
my_preds: list[npt.NDArray] = find_file(f"preds_{event_name}", dir=DIRECTORY)  # type: ignore
outrunner_preds = np.load(SOLUTION_DIR + "my_%s.npy" % event_name, allow_pickle=True)
outrunner_preds_regenerated = find_file(f"try_from_notebook", dir=DIRECTORY)  # type: ignore
module_id = my_code.get_module_id(hits)

In [None]:
def verify_matrices(test_matrix: npt.NDArray | list[npt.NDArray], verification_matrix: npt.NDArray | list[npt.NDArray], limit: int | None = None, error: float = 0):
    for i, (test_row, verification_row) in tqdm(enumerate(zip(test_matrix, verification_matrix)), total=min(len(test_matrix), len(verification_matrix))):
        if limit is not None and i >= limit:
            break

        # test_row == np.array(verification_row)
        tracks_equal = np.all(test_row == np.array(verification_row))
        if not tracks_equal:
            print("Rows are not equal")
            print("test row", test_row)
            print("good row", verification_row)
            print("Instances not in agreement: ", np.where(test_row!= verification_row)[0])
            raise ValueError("Rows are not equal")
    print(f"All first {i+1} rows are equal")
    return True

Generate features

In [None]:
hits, cells, truth, particles = outrunner_code.get_event(event_name)
hit_cells = cells.groupby(['hit_id']).value.count().values
hit_value = cells.groupby(['hit_id']).value.sum().values
hit_value = cells.groupby(['hit_id']).value.sum().values
outrunner_features = np.hstack((hits[['x','y','z']]/1000, hit_cells.reshape(len(hit_cells),1)/10,hit_value.reshape(len(hit_cells),1)))

In [None]:
outrunner_model = my_code.load_model(SOLUTION_DIR + "my_model.h5")
outrunner_model_h = my_code.load_model(SOLUTION_DIR + "my_model_h.h5")

# Generate a few predicts

In [None]:
used_features: pd.DataFrame = outrunner_features # type: ignore
used_model: keras.models.Model = outrunner_model_h # type: ignore
pred_matrix_limit: int = 10
hit_id_test = 3
hit_index_test = hit_id_test - 1

In [None]:
TestX = np.zeros((len(used_features), 10))
TestX[:,5:] = used_features

# for TTA
TestX1 = np.zeros((len(used_features), 10))
TestX1[:,:5] = used_features

preds_new = []

for i in tqdm(range(pred_matrix_limit)):
    TestX[i+1:,:5] = np.tile(used_features[i], (len(TestX)-i-1, 1))

    pred = used_model.predict(TestX[i+1:], batch_size=20000,verbose="0")[:,0]                
    idx = np.where(pred>0.2)[0]

    if len(idx) > 0:
        TestX1[idx+i+1,5:] = TestX[idx+i+1,:5]
        pred1 = used_model.predict(TestX1[idx+i+1], batch_size=20000,verbose="0")[:,0]
        pred[idx] = (pred[idx]+pred1)/2

    idx = np.where(pred>0.5)[0]

    preds_new.append([idx+i+1, pred[idx]])

    #if i==0: print(preds_new[-1])

preds_new.append([np.array([], dtype='int64'), np.array([], dtype='float32')])

In [None]:
# rebuild to NxN
for i in range(len(preds_new)):
    ii = len(preds_new)-i-1
    for j in range(len(preds_new[ii][0])):
        jj = preds_new[ii][0][j]
        if jj < pred_matrix_limit:
            preds_new[jj][0] = np.insert(preds_new[jj][0], 0 ,ii)
            preds_new[jj][1] = np.insert(preds_new[jj][1], 0 ,preds_new[ii][1][j])
    

## Compare my_preds with new outrunner preds

In [None]:
outrunner_preds_pre_df =pd.DataFrame([outrunner_preds[hit_index_test][0],outrunner_preds[hit_index_test][1]]).T.set_index(0).rename(columns={1:"out_pre"})
outrunner_preds_pre_df

In [None]:
outrunner_preds_new_df = pd.DataFrame(outrunner_preds_regenerated[hit_index_test]).T.set_index(0).rename(columns={1:"out_preds_new"})
outrunner_preds_new_df

In [None]:
my_preds_df = pd.DataFrame(my_preds[hit_index_test]).T.set_index(0).rename(columns={1:"my_preds"})
my_preds_df

In [None]:
preds_new_df = pd.DataFrame(preds_new[hit_index_test]).T.set_index(0).rename(columns={1:"preds_new"})
preds_new_df

In [None]:
one_row = my_code.make_predict(used_model, used_features, hits, hit_id_test)
one_row_indcices = np.arange(len(one_row))
one_df = pd.DataFrame([one_row_indcices,one_row]).T.set_index(0).rename(columns={1:"make_predict"})
one_df

In [None]:
verify_matrices(my_preds[0][0], outrunner_preds_regenerated[0][0])
verify_matrices(my_preds[0][1][2:], outrunner_preds_regenerated[0][1][2:])

In [None]:
combined_df = pd.concat([ outrunner_preds_new_df, my_preds_df, preds_new_df, one_df, outrunner_preds_pre_df], axis=1)
combined_df.head(50)

In [None]:
outrunner_preds[0][1].min()

In [None]:
combined_df_small = pd.concat([outrunner_preds_pre_df, outrunner_preds_new_df, my_preds_df, preds_new_df, one_df ], axis=1)
combined_df_small.head(50)

In [None]:
preds_new

In [None]:
# for i in range(pred_matrix_limit):
#     verify_matrices(preds_new[i][0], outrunner_preds[i][0], error = 0.0001)
i = 5
verify_matrices(preds_new[i][0], outrunner_preds[i][0], error = 0.0001)

In [None]:
preds_new[i][0]

In [None]:
outrunner_preds[i][0]
