# Tests

## VDM
$$
d(x,y)=\sqrt{\sum_{a=1}^m{{vdm_a(x_a,y_a)}^2}} \\
vdm_a(x_a,y_a)=\sum_{c=1}^C{{|\frac{N_{a,x_a,c}}{N_{a,x_a}}-\frac{N_{a,y_a,c}}{N_{a,y_a}}|}^q}
$$
* $N_{a,x}$ is the number of instances in the training set T that have value $x_a$ for attribute $a$
* $N_{a,x,c}$ is the number of instances in the training set T that have value $x_a$ for attribute $a$ and class $c$
* $C$ is the number of classes
* $q$ is a constant. Usually $1$ or $2$

In [None]:
import numpy as np


def vdm(
    x_a: float,
    y_a: float,
    a: int,
    input_sample_space: np.ndarray = input_sample_space,
    output_sample_space=output_sample_space,
    q=1
) -> float:
    classes = np.unique(output_sample_space)
    attribute_in = input_sample_space[:,a]
    attribute_c = output_sample_space
    vals = []
    for c in classes:
        n_x_c = ((attribute_c==c)&(attribute_in==x_a)).sum()
        n_y_c = ((attribute_c==c)&(attribute_in==y_a)).sum()
        n_x = (attribute_in==x_a).sum()
        n_y = (attribute_in==y_a).sum()
        diff = n_x_c/n_x-n_y_c/n_y
        vals.append(diff)
    val = (np.abs(vals)**q).sum()
    
    return val


def dist_vdm(x: np.ndarray, y: np.ndarray) -> float:
    assert x.size == y.size, "The lengths of the arrays must be equal"
    m = x.size
    dist = 0
    for a in range(m):
        dist += vdm(x[a], y[a], a) ** 2
    dist = np.sqrt(dist)

    return dist


dist_vdm(x1, x2)

## LIME

In [None]:
import joblib
import numpy as np
from src.datasets import IMDBDataset
from lime.lime_tabular import LimeTabularExplainer

ds = IMDBDataset(config_path="./configs/datasets/imdb.yaml", root="datasets/imdb")
ds.set_split("test")
x1 = ds[0][0]
x2 = ds[1][0]
x1.shape
knn_classifier = joblib.load("models/analysis-models/knn.pkl")

In [None]:
# Choose an instance to explain (index of a test point)
instance_index = 0
instance = ds[instance_index][0]

# Create a LimeTabularExplainer instance
explainer = LimeTabularExplainer(ds.x_train, mode="classification")

# Generate an explanation for the chosen instance
explanation = explainer.explain_instance(instance, knn_classifier.predict_proba)

# Display the explanation
# explanation.show_in_notebook()


In [None]:
cont = explanation.as_html()
with open("test.html", "w") as handler:
    handler.write(cont)

In [None]:
exp = explanation.as_map()

# Implementation

## Predefined configuration

In [None]:
from src.analyzers.knn import KNNAnalyzer
analyzer = KNNAnalyzer(
    knn_path="./models/analysis-models/knn.pkl",
    vectorizer_path="./models/analysis-models/tfidf.pkl",
    cf_generator_config="./configs/models/wf-cf-generator.yaml"
)
text="One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fact that it goes where other shows wouldn't dare. Forget pretty pictures painted for mainstream audiences, forget charm, forget romance...OZ doesn't mess around. The first episode I ever saw struck me as so nasty it was surreal, I couldn't say I was ready for it, but as I watched more, I developed a taste for Oz, and got accustomed to the high levels of graphic violence. Not just violence, but injustice (crooked guards who'll be sold out for a nickel, inmates who'll kill on order and get away with it, well mannered, middle class inmates being turned into prison bitches due to their lack of street skills or prison experience) Watching Oz, you may become comfortable with what is uncomfortable viewing....thats if you can get in touch with your darker side."
analyzer(text, 2)
print(analyzer.explanation())

## Test bench

In [None]:
from src.test_bench import TestBench

configurations = [
    {
        "name": "adjectives",
        "generator_config": {
            "sample_prob_decay_factor": 0.2,
            "flip_prob": 0.5,
            "flipping_tags": ["JJ", "JJR", "JJS"],
        },
    },
    {
        "name": "nouns",
        "generator_config": {
            "sample_prob_decay_factor": 0.2,
            "flip_prob": 0.5,
            "flipping_tags": ["NN", "NNP", "NNPS", "NNS"],
        },
    },
    {
        "name": "adverbs",
        "generator_config": {
            "sample_prob_decay_factor": 0.2,
            "flip_prob": 0.5,
            "flipping_tags": ["RB", "RBR", "RBS", "RP"],
        },
    },
    {
        "name": "verbs",
        "generator_config": {
            "sample_prob_decay_factor": 0.2,
            "flip_prob": 0.5,
            "flipping_tags": ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"],
        },
    },
]
text="One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fact that it goes where other shows wouldn't dare. Forget pretty pictures painted for mainstream audiences, forget charm, forget romance...OZ doesn't mess around. The first episode I ever saw struck me as so nasty it was surreal, I couldn't say I was ready for it, but as I watched more, I developed a taste for Oz, and got accustomed to the high levels of graphic violence. Not just violence, but injustice (crooked guards who'll be sold out for a nickel, inmates who'll kill on order and get away with it, well mannered, middle class inmates being turned into prison bitches due to their lack of street skills or prison experience) Watching Oz, you may become comfortable with what is uncomfortable viewing....thats if you can get in touch with your darker side."

tb = TestBench(
    model_path="./models/analysis-models/knn.pkl",
    vectorizer_path="./models/analysis-models/tfidf.pkl",
    analyzer_name="knn",
    cf_generator_config="./configs/models/wf-cf-generator.yaml",
)

In [None]:
reports = tb(configurations, text, 2)

In [None]:
for report in reports:
    print(report)
    print()

In [None]:
from src.datasets import IMDBDataset

ds = IMDBDataset(config_path="./configs/datasets/imdb.yaml", root="datasets/imdb")
tb.evaluate(ds.x_test, ds.y_test, save_dir="evaluations/knn")