# Exploratory Analysis for xᵢⁿAI Research Toolkit

This notebook provides an initial exploration of the data and model behavior for our AI Interpretability research project.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pyspark.sql import SparkSession
from pyspark.ml.feature import Tokenizer, StopWordsRemover, CountVectorizer, IDF
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from captum.attr import IntegratedGradients

%matplotlib inline

## Data Loading and Preprocessing

In [None]:
# Initialize Spark session
spark = SparkSession.builder.appName("xᵢⁿai - Exploratory Analysis").getOrCreate()

# Load the data
df = spark.read.csv("../data/raw/sample_data.csv", header=True, inferSchema=True)
df.show(5)

In [None]:
# Preprocess the data
tokenizer = Tokenizer(inputCol="text", outputCol="words")
remover = StopWordsRemover(inputCol="words", outputCol="filtered")
cv = CountVectorizer(inputCol="filtered", outputCol="tf")
idf = IDF(inputCol="tf", outputCol="features")

pipeline = Pipeline(stages=[tokenizer, remover, cv, idf])
model = pipeline.fit(df)
processed_df = model.transform(df)

processed_df.select("text", "label", "features").show(5)

## Data Visualization

In [None]:
# Convert to Pandas for easier visualization
pandas_df = processed_df.toPandas()

# Plot label distribution
plt.figure(figsize=(10, 6))
sns.countplot(x='label', data=pandas_df)
plt.title('Label Distribution')
plt.show()

## Model Training and Interpretation

In [None]:
# Load pre-trained model and tokenizer
model_name = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Prepare a sample input
sample_text = pandas_df['text'].iloc[0]
inputs = tokenizer(sample_text, return_tensors="pt")

# Get model prediction
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits).item()
print(f"Sample text: {sample_text}")
print(f"Predicted label: {prediction}")

In [None]:
# Interpret the model's decision
ig = IntegratedGradients(model)
attributions, delta = ig.attribute(inputs['input_ids'], target=prediction, return_convergence_delta=True)

# Visualize attributions
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
attr_scores = attributions.sum(dim=-1).squeeze(0)
attr_scores = attr_scores / torch.norm(attr_scores)
attr_scores = attr_scores.cpu().detach().numpy()

plt.figure(figsize=(20, 5))
sns.barplot(x=tokens, y=attr_scores)
plt.xticks(rotation=90)
plt.title('Token Attributions')
plt.show()

## Conclusion

This notebook provides an initial exploration of our data and model behavior. We've seen how the data is distributed, how our model makes predictions, and how we can interpret these predictions using integrated gradients. Further analysis could involve:

1. Exploring model drift over time
2. Analyzing attention patterns in different layers of the model
3. Investigating how different preprocessing steps affect model interpretability