# Run scorer on old data

Please use `old_data_investigation` notebook to export all required data.

## Imports

In [None]:
import pandas as pd
import os
import re
import numpy as np
import sys
import math

In [None]:
sys.path.insert(0, "../..")

from txmatching.scorers.split_hla_additive_scorer import SplitScorer
from txmatching.scorers.high_res_hla_additive_scorer import HighResScorer
from txmatching.scorers.high_res_other_hla_types_additive_scorer import \
    HighResWithDQDPScorer

from txmatching.utils.blood_groups import BloodGroup
from txmatching.utils.country_enum import Country

from txmatching.patients.patient import Donor, Recipient
from txmatching.patients.patient_parameters import PatientParameters

from tests.test_utilities.hla_preparation_utils import (create_antibodies,
                                                        create_hla_typing)

from tests.test_utilities.prepare_app_for_tests import DbTests

## Load data

In [None]:
df_survival_summary = pd.read_pickle('data/survival_summary.pkl')
df_survival_summary.head()

In [None]:
# Initialize db
test = DbTests()
test.setUp()

In [None]:
# test.tearDown()

In [None]:
# Select scorer that will be used

split_scorer = SplitScorer()
high_res_scorer = HighResScorer()
high_res_other_hla_types_scorer = HighResWithDQDPScorer()

active_scorer = split_scorer

In [None]:
def row_to_score(row):
    
    donor_typization = row.donor_typization
    recipient_typization = row.recipient_typization
    
    if donor_typization == '' or recipient_typization == '':
        return None
    
    donor_typing = donor_typization.split(" ")
    recipient_typing = recipient_typization.split(" ")
    
    donor = Donor(
        db_id=-1,
        medical_id='donor',
        related_recipient_db_id=1,
        parameters=PatientParameters(
            blood_group=BloodGroup.A,
            country_code=Country.CZE,
            hla_typing=create_hla_typing(
                donor_typing
            )
        )
    )
    
    recipient = Recipient(
        db_id=1,
        acceptable_blood_groups=[],
        related_donor_db_id=1,
        medical_id='recipient',
        parameters=PatientParameters(
            blood_group=BloodGroup.A,
            country_code=Country.CZE,
            hla_typing=create_hla_typing(recipient_typing)
        ),
        hla_antibodies=create_antibodies([])
    )
    
    score = active_scorer.score_transplant(
        donor=donor,
        recipient=recipient,
        original_donor=None
    )
    
    return score

df_scored = df_survival_summary #.iloc[61:62]
df_scored['score'] = df_scored.apply(row_to_score, axis=1)

### Score distribution

In [None]:
df_scored_filtered = df_scored[df_scored.score.notnull()]
df_scored_filtered.score.hist(bins=26)

### Delay distribution on ended patients

In [None]:
df_scored_ended = df_scored_filtered[df_scored_filtered.EndDate.notnull()]
print(len(df_scored_ended.index))
df_scored_ended.delay.hist()

### Score distribution on ended patients

In [None]:
df_scored_ended.score.hist(bins=26)

### Score distribution on ended patients with delay < 10 years

In [None]:
df_scored_ended[df_scored_ended.delay<3650].score.hist(bins=26)

### Dependency between delay and score on ended patients

In [None]:
df_scored_ended.plot.scatter(x='delay', y='score')

In [None]:
df_scored_ended.score.corr(df_scored_ended.delay)

### Ended patients with the smalles delay

In [None]:
df_scored_ended.sort_values(by='delay', ascending=True).head()