In [101]:
import time
import random
import csv
import itertools
import json
import re
import os
import math
import pickle
from xgboost import XGBClassifier

import numpy as np
import pennylane as qml
from concurrent.futures import ThreadPoolExecutor

from sklearn.datasets import load_files
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.metrics import accuracy_score

# Classical LIME
from lime.lime_text import LimeTextExplainer

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from keras.utils import to_categorical


import tensorflow as tf

# PART 0: DATA LOADING AND PREPROCESSING

In [102]:

def clean_text(text):
    """
    Removes HTML tags and converts to lowercase.
    """
    # Remove anything between <...> tags, then lowercase the text
    cleaned = re.sub(r'<.*?>', '', text).lower()
    return cleaned

def load_imdb_subset(
    num_samples=5000, 
    min_df=1, 
    max_features=15, 
    stopwords_option=True,
    stop_words = 'english'
):
    """
    Loads a subset of IMDb data, returns:
      - X_train, X_test (lists of text)
      - y_train, y_test (0/1 sentiment)
      - vectorizer (CountVectorizer)
    
    Now with text cleaning for HTML, lowercase, etc.
    """
    data = load_files(
        "C:/Users/migue/Downloads/aclImdb_v1/aclImdb/train",
        categories=['pos','neg'], 
        encoding="utf-8", 
        decode_error="replace"                  
    )
    X_text_all, y_all = data.data, data.target

    # Clean text (HTML removal + lowercase)
    X_text_all = [clean_text(txt) for txt in X_text_all]

    # Shuffle & truncate to num_samples
    full_idx = np.arange(len(X_text_all))
    #np.random.shuffle(full_idx)
    subset_idx = full_idx[:num_samples]
    X_text = [X_text_all[i] for i in subset_idx]
    y = y_all[subset_idx]

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X_text, y, test_size=0.2, random_state=42
    )

    # Vectorizer: presence/absence
    if stopwords_option:
        vectorizer = CountVectorizer(
            binary=True, stop_words=stop_words, 
            min_df=min_df, max_features=max_features
        )
    else:
        vectorizer = CountVectorizer(
            binary=True, stop_words=None, 
            min_df=min_df, max_features=max_features
        )

    vectorizer.fit(X_train)
    return X_train, X_test, y_train, y_test, vectorizer

#def train_XGBoost_classifier(X_train, y_train, vectorizer):
    """
    Trains an XGBoost classifier on the binary presence/absence of words.
    Returns the fitted model.
    """
    X_train_bow = vectorizer.transform(X_train)
    # Use log(len(y_train)) as n_estimators (rounded to an int)
    clXGB = XGBClassifier(
        #booster="gblinear",
        objective="binary:logistic", 
        eval_metric="logloss", 
        random_state=42, 
        n_estimators=int(round(math.log(len(y_train)))),
        learning_rate=0.1, 
        max_depth=3
    )
    clXGB.fit(X_train_bow, y_train)
    return clXGB

#def get_cached_xgboost(X_train, y_train, vectorizer, num_samples, max_features, stopwords_option):
    """
    Checks if a classifier trained with the given parameters exists.
    If so, load it; otherwise, train it and save it.
    """
    filename = f"cached_xgboost_ns{num_samples}_mf{max_features}_sw{stopwords_option}_xgboost_classifier_seed42.pkl"
    if os.path.exists(filename):
        print("Loading cached xgboost from", filename)
        with open(filename, 'rb') as f:
            clXGB = pickle.load(f)
    else:
        print("No cached classifier found. Training a new one...")
        clXGB = train_XGBoost_classifier(X_train, y_train, vectorizer)
        with open(filename, 'wb') as f:
            pickle.dump(clXGB, f)
        print("Cached classifier saved as", filename)
    return clXGB
def train_NN_classifier(X_train, y_train, X_test, y_test, vectorizer):
    """
    Trains a neural network on the binary presence/absence of words.
    Returns the fitted model.
    """
    X_train_bow = vectorizer.transform(X_train)
    X_valid_bow = vectorizer.transform(X_test)
    input_dim = X_train_bow.shape[1]

    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),  # First hidden layer
        Dropout(0.3),  # Dropout with 30% probability
        Dense(32, activation='relu'),  # Second hidden layer
        Dropout(0.2),  # Dropout with 20% probability
        Dense(1, activation='sigmoid')  # Output layer for binary classification
    ])

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train_bow, y_train, epochs=100, batch_size=10, validation_data=(X_valid_bow, y_test), verbose=1)
    return model

def get_cached_NN(X_train, y_train, vectorizer, num_samples, max_features, stop_words, X_valid, y_valid):
    """
    Checks if a classifier trained with the given parameters exists.
    If so, load it; otherwise, train it and save it.
    """
    filename = f"cached_classifier_ns{num_samples}_mf{max_features}_sw{stop_words}_NN_classifier_seed42.pkl"
    if os.path.exists(filename):
        print("Loading cached logistic from", filename)
        with open(filename, 'rb') as f:
            clNN = pickle.load(f)
    else:
        print("No cached classifier found. Training a new one...")
        clNN = train_NN_classifier(X_train, y_train, X_valid, y_valid, vectorizer)
        with open(filename, 'wb') as f:
            pickle.dump(clNN, f)
        print("Cached classifier saved as", filename)
    return clNN

#def train_logistic_classifier(X_train, y_train, vectorizer):
    """
    Trains a logistic regression on the binary presence/absence of words.
    Returns the fitted model.
    """
    X_train_bow = vectorizer.transform(X_train)
    clf = LogisticRegression()
    clf.fit(X_train_bow, y_train)
    return clf

#def get_cached_logistic(X_train, y_train, vectorizer, num_samples, max_features, stop_words):
    """
    Checks if a classifier trained with the given parameters exists.
    If so, load it; otherwise, train it and save it.
    """
    filename = f"cached_classifier_ns{num_samples}_mf{max_features}_sw{stop_words}_logistic_classifier_seed42.pkl"
    if os.path.exists(filename):
        print("Loading cached logistic from", filename)
        with open(filename, 'rb') as f:
            clf = pickle.load(f)
    else:
        print("No cached classifier found. Training a new one...")
        clf = train_logistic_classifier(X_train, y_train, vectorizer)
        with open(filename, 'wb') as f:
            pickle.dump(clf, f)
        print("Cached classifier saved as", filename)
    return clf

#def train_lasso_regression(X_train, y_train, vectorizer):
    """
    Trains a logistic regression on the binary presence/absence of words.
    Returns the fitted model.
    """
    X_train_bow = vectorizer.transform(X_train)
    lasso_model = Lasso(alpha=0.5)
    lasso_model.fit(X_train_bow, y_train)
    return lasso_model
    

#def get_cached_lasso(X_train, y_train, vectorizer, num_samples, max_features, stopwords_option, alpha):
    """
    Checks if a Lasso model trained with the given parameters exists.
    If so, load it; otherwise, train it and save it.
    """
    filename = f"cached_lasso_ns{num_samples}_mf{max_features}_sw{stopwords_option}_seed42_alpha{alpha}.pkl"
    if os.path.exists(filename):
        print("Loading cached Lasso model from", filename)
        with open(filename, 'rb') as f:
            lasso_model = pickle.load(f)
    else:
        print("No cached Lasso model found. Training a new one...")
        lasso_model = train_lasso_regression(X_train, y_train, vectorizer)
        with open(filename, 'wb') as f:
            pickle.dump(lasso_model, f)
        print("Cached Lasso model saved as", filename)
    return lasso_model

# CLASSICAL LIME

In [103]:



#CHANGE clXGB TO clf IF WE WANT LOGISTIC INSTEAD OF XGBOOST
def run_classical_lime(
    text_sample, clXGB, vectorizer,  # These values dont really matter, 
    k_features=10, num_samples=500   # The values used re the ones used when u call the function later
):
    """
    Runs classical LIME on a single text instance.
    Returns the top (word, weight) pairs.
    """
    class_names = ["negative", "positive"]
    explainer = LimeTextExplainer(class_names=class_names, feature_selection="auto")

    def predict_proba(texts):
        bow = vectorizer.transform(texts) 
        proba = clXGB.predict(bow)
        return np.hstack((1 - proba, proba))  # Return probabilities for both classes
        
        

    explanation = explainer.explain_instance(
        text_sample,
        predict_proba,
        num_features=k_features,
        num_samples=num_samples  # e.g. 300 or 500
    )
    return explanation.as_list()  # list of (word, weight)

# Q-LIME Pi (Flip Only 1->0)

# EXPERIMENTAL ROUTINE

In [104]:
def run_experiment( #Did I change these numbers? check if i fcked up smth here!!!!!!!!!!
    num_samples=10,
    min_df=1,
    max_features=15,
    stopwords_option=True,
    lime_num_samples=30,
    shots=None,
    n_test_explanations=10,
    stop_words = None
):
    """
    1) Load data with given params (includes text cleaning)
    2) Train logistic classifier
    3) Evaluate test accuracy
    4) Pick n_test_explanations random samples
    5) For each, run classical LIME vs. Q-LIME Pi
    6) Return summary stats
    """
    # A) Load data
    X_train, X_test, y_train, y_test, vectorizer = load_imdb_subset(
        num_samples=num_samples,
        min_df=min_df,
        max_features=max_features,
        stopwords_option=stopwords_option,
        stop_words = stop_words
    )
    # B) Train model

    #clf  = get_cached_logistic(X_train, y_train, vectorizer, num_samples, max_features, stopwords_option)
    #clXGB = get_cached_xgboost(X_train, y_train, vectorizer, num_samples, max_features, stopwords_option)
    clNN = get_cached_NN(X_train, y_train, vectorizer, num_samples, max_features, stop_words, X_test, y_test)

    # Evaluate
    X_test_bow = vectorizer.transform(X_test)
    test_acc = accuracy_score(y_test, clNN.predict(X_test_bow) > 0.5)  # Convert probabilities to binary class labels
    #test_acc = accuracy_score(y_test, y_pred)

    #test_acc = accuracy_score(y_test, clf.predict(X_test_bow))
    #logistic_weights = clf.coef_[0]
    #bias = clf.intercept_[0]


    # IT ONLY GIVES 1 WEIGHT NOT 15 !!!!
    #logistic_weights = clXGB.coef_[0]
    #bias = clXGB.intercept_[0]


    #lasso_model = get_cached_lasso(X_train, y_train, vectorizer, num_samples, max_features, stopwords_option, alpha=0.1)
    

    # We'll track times & top-feature overlap
    lime_times = []
    # qlime_times = []
    # overlaps = []
    instance_local_accuracies = []

    # Random samples for explanation
    #n_test = len(X_test)
    sample_indices = [5,6,12,11,10, 0, 1, 2, 3, 4]
    #random.sample(range(n_test), n_test_explanations)

    for idx in sample_indices:
        text_sample = X_test[idx]
        y_true = y_test[idx]

        # 1) Classical LIME
        start_lime = time.time()
        explanation_lime = run_classical_lime(
            text_sample, clNN, vectorizer, 
            k_features=15, num_samples=lime_num_samples
        )

        bow = vectorizer.transform([text_sample])
        bin_features = bow.toarray()[0]

        y_pred = clNN.predict(bow)[0]
        instance_accuracy = int(y_pred == y_true)
        instance_local_accuracies.append(instance_accuracy)

        #explanation_lime = run_classical_lime(
        #    text_sample, clf, vectorizer, 
        #    k_features=15, num_samples=lime_num_samples
        #)
        lime_time = time.time() - start_lime
        lime_times.append(lime_time)

        # parse top features
        lime_dict = dict(explanation_lime)
        top_words_lime = sorted(
            lime_dict.keys(),
            key=lambda w: abs(lime_dict[w]),
            reverse=True
        )[:5]

        # 2) Q-LIME Pi
        

        #start_qlime = time.time()
        #contributions_qlime = quantum_lime_explanation(bin_features, clf, lasso_model, shots=shots)
        #contributions_qlime = quantum_lime_explanation(
        #    bin_features, logistic_weights, bias=bias, shots=shots)


        contributions_lime_abs = [(word, abs(score)) for word, score in explanation_lime] # Absolute values for comparison; This is a tuple. PROB SHOULD MAKE QLIME A TUPLE TOO!
        
        #unsorted_contributions_qlime_abs = tuple(
            #(word, abs(score)) for word, score in zip(vectorizer.get_feature_names_out(), contributions_qlime)) # Absolute values for comparison
        
        #contributions_qlime_sorted = tuple(
        #sorted(unsorted_contributions_qlime_abs, key=lambda x: x[1], reverse=True))

        #print("X_test_bow",X_test_bow)
          
        print("text sample", text_sample, "bin_features", bin_features)
        #, "vectorizer", vectorizer.get_feature_names_out(), "contributions_qlime_abs", contributions_qlime_abs, "Contributions_Lime", top_words_lime
     
        print("Classical LIME Explanation:")
        for word, weight in contributions_lime_abs:
            print(f"Word: {word}, Importance: {weight}")

        # print("\nQ-LIME Pi Explanation:")
        #for word, weight in contributions_qlime_sorted:
        #    print(f"Word: {word}, Importance: {weight}")
        
        #print("\n weights", clf.coef_[0])
        #qlime_time = time.time() - start_qlime
        #qlime_times.append(qlime_time)

        # top 5 (by absolute value)
        # nonzero_indices = [
        #     (i, abs(contributions_qlime[i])) 
        #     for i in range(len(contributions_qlime))
        # ] 
        # top_indices_qlime = sorted(nonzero_indices, key=lambda x: x[1], reverse=True)[:5]
        # top_words_qlime = [
        #     vectorizer.get_feature_names_out()[i2]
        #     for (i2, val) in top_indices_qlime
        # ] 

        # # measure overlap
        #  overlap = set(top_words_lime).intersection(set(top_words_qlime))
        # overlaps.append(len(overlap)) 

    # Summary
    results = {
        "local_accuracy": np.mean(instance_local_accuracies),
        "lime_time_avg": round(np.mean(lime_times), 4),
        # "qlime_time_avg": round(np.mean(qlime_times), 4),
        # "overlap_avg": round(np.mean(overlaps), 4),
    }
    return results



# MAIN

In [105]:


if __name__ == "__main__":
    import pandas as pd

    # Parameter grid to systematically vary certain settings
    param_grid = {
        "num_samples": [500],
        "max_features": [20],
        "stopwords_option": [True],
        "lime_num_samples": [500],
        # Shots: None => analytic mode, 100 => finite sampling
        "shots": [100],
        "stop_words": ['english'],
        "n_test_explanations": [5]
    }

    combos = list(itertools.product(*param_grid.values()))
    all_results = []

    for combo in combos:
        (num_samples_, max_features_, stopwords_, lime_samps_, shots_, stop_words_, n_test_explanations_) = combo
        
        print("\n==================================")
        print(f"Running experiment with: "
              f"num_samples={num_samples_}, "
              f"max_features={max_features_}, "
              f"stopwords={stopwords_}, "
              f"lime_num_samples={lime_samps_}, "
              f"shots={shots_},"
              f"stop_words={stop_words_},"
              f"n_test_explanations={n_test_explanations_}")
        
        res = run_experiment(
            num_samples=num_samples_,
            max_features=max_features_,
            stopwords_option=stopwords_,
            lime_num_samples=lime_samps_,
            shots=shots_,
            stop_words=stop_words_,
            n_test_explanations=n_test_explanations_,
            
            
        )
        res_row = {
            "num_samples": num_samples_,
            "max_features": max_features_,
            "stopwords": stopwords_,
            "lime_num_samples": lime_samps_,
            "shots": shots_,
            "local_accuracy": res["local_accuracy"],
            "lime_time_avg": res["lime_time_avg"],
            # "qlime_time_avg": res["qlime_time_avg"],
            # "overlap_avg": res["overlap_avg"],
            "n_test_explanations": n_test_explanations_,
            "stop_words": stop_words_
        }
        print("Results =>", res_row)
        all_results.append(res_row)

    # Save results to CSV
    df = pd.DataFrame(all_results)
    df.to_csv("results_expanded_flips.csv", index=False)
    print("\nAll done! Saved results to 'results_expanded_flips.csv'.")


Running experiment with: num_samples=500, max_features=20, stopwords=True, lime_num_samples=500, shots=100,stop_words=english,n_test_explanations=5
Loading cached logistic from cached_classifier_ns500_mf20_swenglish_NN_classifier_seed42.pkl
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
Classical LIME Explanation:
Word: plot, Importance: 0.33301511842435233
Word: just, Importance: 0.24613271944457665
Word: way, Importance: 0.2245547074301717
Word: best, Importance: 0.21062127354254187
Word: movies, Importance: 0.18368342994025386
Word: movie, Importance: 0.15765671007894988
Word: good, Importance: 0.13413046879996862
Word: on, Importance: 0.08096507616424309
Word: really, Importance: 0.06733995547729016
Word: dirty, Importance: 0.05366553741612623
Word: examine, Importance: 0.03783122197354466
Word: cinematic, Impor

  instance_accuracy = int(y_pred == y_true)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 149ms/step
text sample this is so incredibly bad. poor actors. you can tell they're trying really hard to polish a turd, but we all know you can't. the writing is so obvious and facile, it's sad watching them try to sell it. the humor and pacing are so labored, it's hard to believe any of these good actors signed on for this.that said, it's so awful that we're having a hard time looking away from the screen. we just have to know where this trainwreck goes. but that's only because we caught it on tv. if we had actually paid for this, we'd be disgusted. so it gets 2 stars for being at least amusingly/fascinatingly bad. and the incidental music (as opposed to the trying-too-hard indie soundtrack) is laughably reminiscent of an episode of scooby-doo... but not as good. bin_features [0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 1 0 0]
Classical LIME Explanation:
Word:

  instance_accuracy = int(y_pred == y_true)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
text sample i chanced upon this movie because i had a free non-new release from blockbuster and needed to grab something quickly, as the store was getting ready to close for the evening. the plain white cover and title intrigued me. i'm a (relatively speaking) "old" lady and my son is a young man of 30. i adore movies that are sheer entertainment, such as the sixth sense, interview with a vampire, harry potter and beetlejuice. my son, on the other hand, is a film graduate and enjoys very specialized foreign films, such as those directed by bergman or hertzog. we generally hate each other's movie choices, however, we both watched and loved the movie nothing! it was unlike any movie we'd ever seen before. we're both cynical/critical personality types and we usually crack on movies while we watch them -- but in this case we just laughed and enjo

  instance_accuracy = int(y_pred == y_true)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step
text sample although the plot was a bit sappy at times, and very rushed at the end, as if the director had run out of his alloted time and needed to hurry up and finish the story, overall it was pretty good for the made-for-backwoods-cable-tv genre. however, the actress who played the babysitter, mariana klaveno, was very good! i hope to see more of her around in movie-land. the music was also well done, getting every possible chill out of the dah-duh-dah-duh (think "jaws") type music-based tension build-ups.i don't think i'd want to watch "while the children sleep" again, but if i did, it would be to focus on the performance of the talented klaveno. bin_features [0 0 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 1 1 0]
Classical LIME Explanation:
Word: don, Importance: 0.2698347607147438
Word: plot, Importance: 0.20520979415611257
Word: watch, Importance: 

  instance_accuracy = int(y_pred == y_true)


[1m 1/16[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 82ms/step

InvalidArgumentError: {{function_node __wrapped__IteratorGetNext_output_types_1_device_/job:localhost/replica:0/task:0/device:CPU:0}} Error in user-defined function passed to ParallelMapDatasetV2:1654 transformation with iterator: Iterator::Root::Prefetch::ParallelMapV2: indices[17] = 497 is not in [0, 497)
	 [[{{node RaggedGather/RaggedGather}}]] [Op:IteratorGetNext] name: 