# imports

In [None]:
import sys
sys.path.append('../')

import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

import json
import numpy as np
import pandas as pd
import pickle
from plotly import express as px
from models.patch_to_score.dataset import PatchToScoreCrossValidationDataset
from models.patch_to_score.models.modules.transformer_encoder_mlp import TransformerEncoderMLP
from models.patch_to_score.models.modules.global_sum_pooling import GlobalSumPooling

# constants

In [None]:
model_results_dir_path = '/home/iscb/wolfson/doririmon/home/order/ubinet/repo/ubinet/results/patch_to_score/hypotheses/pts_shalllow_mlps_autoregressive_scannet/21_07/2025-08-08_22693c7950'
fold = 0

# load model

In [None]:
with open(f'{model_results_dir_path}/configuration.json', 'rb') as f:
    configuration = json.load(f)
model_configuration = configuration['model']

In [None]:
from models.patch_to_score.bootstrappers.model import build_model_from_configuration

In [None]:
model = build_model_from_configuration(**model_configuration)
model.load_weights(f'{model_results_dir_path}/fold_{fold}/model.keras')

# analysis

In [None]:
model.summary()

# inference

In [None]:
fold = 0

In [None]:
cross_validation_dataset = PatchToScoreCrossValidationDataset(**configuration['data'])
dataset = cross_validation_dataset.fold_datasets[fold]
batch = [*dataset.test_set, dataset.test_sizes, dataset.test_num_patch]

In [None]:
# with open(configuration['data']['path'], 'rb') as f:
#     folds_training_dicts = pickle.load(f)
folds_training_dicts = cross_validation_dataset.fold_dicts
fold_training_dict = folds_training_dicts[fold]
test_uniprots = fold_training_dict['uniprots_test']

In [None]:
predictions = model(batch)
saved_predictions = np.load(f'{model_results_dir_path}/fold_{fold}/test/predictions.npy')

In [None]:
np.allclose(predictions, saved_predictions)

In [None]:
# px.histogram(predictions.numpy())

In [None]:
mat = dataset.test_set[0].numpy()
mat = mat.reshape(mat.shape[0], mat.shape[1] * mat.shape[2])

In [None]:
non_patches = (mat.sum(axis=1) == 0)
non_patches

In [None]:
# 2994 + 2994 + 9160

In [None]:
# px.histogram(tf.argmax(dataset.test_num_patch, axis=-1).numpy())

In [None]:
dataset.test_sizes.shape

In [None]:
non_patches_labels = dataset.test_labels.numpy()[non_patches]
non_patches_sizes = dataset.test_sizes.numpy()[non_patches, 0]
non_patches_predictions = predictions.numpy()[non_patches, 0]
non_patches_df = pd.DataFrame({
    'label': non_patches_labels,
    'size': non_patches_sizes,
    'prediction': non_patches_predictions
    })

In [None]:
px.histogram(non_patches_df, 'prediction', color='label', histnorm='percent', barmode='overlay',
             title=f'Score Distribution - Non-Patches Proteins | Fold {fold} | Test')

In [None]:
mat_train = dataset.train_set[0].numpy()
mat_train = mat_train.reshape(mat_train.shape[0], mat_train.shape[1] * mat_train.shape[2])
non_patches_train = (mat_train.sum(axis=1) == 0)
non_patches_train_sizes = dataset.train_sizes.numpy()[non_patches_train, 0]
non_patches_train_labels = dataset.train_labels.numpy()[non_patches_train]
non_patches_train_df = pd.DataFrame({
    'label': non_patches_train_labels,
    'size': non_patches_train_sizes
    })

In [None]:
px.histogram(non_patches_train_df, 'size', color='label', 
             histnorm='percent', barmode='overlay', nbins=1_000,
             title=f'Size Distribuion - Non-Patches Proteins | Fold {fold} | Train')

In [None]:
indexes = np.where((1.2 < dataset.test_sizes.numpy()[:, 0]) & (dataset.test_sizes.numpy()[:, 0] < 2.2))[0]

In [None]:
indexes

In [None]:
px.histogram(predictions.numpy()[indexes])