In [None]:
import pandas as pd  
import torch
from torch.nn.functional import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [None]:
def _detect_text_columns(df, text_col="texts"):
    # normalize column names (strip whitespace)
    df = df.rename(columns={c: c.strip() for c in df.columns})
    # regex: shuf_001, shuf_010, ..., shuf_070, etc.
    shuffle_cols = [c for c in df.columns if re.fullmatch(r"shuf_\d{3}", c)]
    cols = [text_col] + sorted(shuffle_cols)
    return df, cols

@torch.no_grad()
def _preds_attn(model_dir, texts, device=None, max_length=256):
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    tok = AutoTokenizer.from_pretrained(model_dir)
    mdl = AutoModelForSequenceClassification.from_pretrained(
        model_dir, output_attentions=True
    ).to(device).eval()

    enc = tok(texts, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
    enc = {k: v.to(device) for k, v in enc.items()}
    out = mdl(**enc, output_attentions=True)

    probs = softmax(out.logits, dim=-1)              # [B, C]
    pred_label = probs.argmax(dim=-1).cpu().tolist()
    pred_prob  = probs.max(dim=-1).values.cpu().tolist()

    attn_stack = torch.stack(out.attentions, dim=0)  # [L, B, H, S, S]
    attn_mean  = attn_stack.mean(dim=(0, 2))         # [B, S, S]
    attn_cls   = attn_mean[:, 0, :].cpu().tolist()   # [B, S]

    return pred_label, pred_prob, attn_cls

def apply_model_to_df_strict(df, model_dir, text_col="texts", batch_size=64):
    df = df.copy()
    # 1) detect columns safely
    df, cols = _detect_text_columns(df, text_col=text_col)
    if not cols:
        raise ValueError("No input columns found. Check your column names.")
    print("Will process columns:", cols)

    # 2) run per column in batches
    for col in cols:
        print(f"Processing: {col}")
        labels, probs, attns = [], [], []
        for i in range(0, len(df), batch_size):
            batch_texts = df[col].astype(str).iloc[i:i+batch_size].tolist()
            l, p, a = _preds_attn(model_dir, batch_texts)
            labels.extend(l)
            probs.extend(p)
            attns.extend(a)
        df[f"{col}__pred_label"]  = labels
        df[f"{col}__pred_prob"]   = probs
        df[f"{col}__attn_scores"] = attns

    return df

In [66]:
imdb = pd.read_csv("shuffled_data/imdb_sample_permutated.csv").iloc[:50]
sst2 = pd.read_csv("shuffled_data/sst2_sample_permutated.csv").iloc[:50]

In [77]:
# "albert_imdb", "albert_sst2", "distilbert_sst2", "distilbert_imdb", "tinybert_imdb"
model_data = "tinybert_sst2"

model_path = f"D:/master/NLP/models/{model_data}/final"
df_out = apply_model_to_df_strict(imdb, model_path, text_col="texts")

df_out.to_csv(f"extracted_data/{model_data}_attention_scores.csv", index=False)

Will process columns: ['texts', 'shuf_001', 'shuf_010', 'shuf_020', 'shuf_030', 'shuf_050', 'shuf_070', 'shuf_100']
Processing: texts
Processing: shuf_001
Processing: shuf_010
Processing: shuf_020
Processing: shuf_030
Processing: shuf_050
Processing: shuf_070
Processing: shuf_100


In [None]:
def sta

Unnamed: 0,texts,labels,indices,shuf_001,shuf_010,shuf_020,shuf_030,shuf_050,shuf_070,shuf_100,...,shuf_030__attn_scores,shuf_050__pred_label,shuf_050__pred_prob,shuf_050__attn_scores,shuf_070__pred_label,shuf_070__pred_prob,shuf_070__attn_scores,shuf_100__pred_label,shuf_100__pred_prob,shuf_100__attn_scores
0,This movie had very few moments of real drama....,0,10476,This movie had very few moments of real drama....,This movie had very up moments of real drama. ...,to movie had us few moments of real drama. Aft...,This us spoof church few moments of real betwe...,quite movie had the earlier some of The minute...,characters copper the speak some the character...,the of have of was laughing. the that back one...,...,"[0.10838531702756882, 0.005025666672736406, 0....",0,0.93885,"[0.10730051249265671, 0.004677176475524902, 0....",0,0.953111,"[0.1109551265835762, 0.00907150935381651, 0.00...",0,0.760313,"[0.1055300161242485, 0.0035572645720094442, 0...."
1,"I watched this film when I was a kid, and I th...",0,1824,"I watched this film when I was a kid, and I th...",I watched were film when I was a this and I th...,"I watched this movies, when I was I kid, and I...","I in most film when I to needed kid, and the t...",I be this film all I was a Universal and I min...,Now watched squalid film make I to all that an...,extremely Umiversal out thought Karloff.<br al...,...,"[0.10655680298805237, 0.003989781718701124, 0....",1,0.541567,"[0.1009078249335289, 0.0029683413449674845, 0....",0,0.81135,"[0.10396168380975723, 0.005374686326831579, 0....",0,0.78891,"[0.099251389503479, 0.004198113456368446, 0.00..."
2,"Okul""The School"" is a result of a new trend in...",0,409,"Okul""The School"" is a result of a new trend in...","Okul""The School"" is concentrating result of a ...","Okul""The School"" is a result of a is of in Tur...","Okul""The used is stories what with a new a in ...","School"" is is a stories of a new trend harder....","Okul""The being I a think of a scary over of su...",that movie could pretty missed directors suppo...,...,"[0.10713869333267212, 0.004058623220771551, 0....",0,0.917696,"[0.10785745829343796, 0.006849894765764475, 0....",0,0.628115,"[0.10653405636548996, 0.0034043891355395317, 0...",1,0.62131,"[0.10515197366476059, 0.004150696098804474, 0...."
3,I thought that this movie might be a good spoo...,0,12149,I thought that this movie might be a good spoo...,I thought that this movie might be a good spoo...,I thought that this movie it be negative good ...,they the that this movie might be a good makeu...,I film that answer movie happens be to been te...,to it a 20 movie turn be a person doesn't or l...,good just Case makes in put I the were all eno...,...,"[0.10335562378168106, 0.00462707132101059, 0.0...",0,0.803202,"[0.10071989893913269, 0.004679196979850531, 0....",0,0.828591,"[0.10246330499649048, 0.0034393586684018373, 0...",0,0.788904,"[0.09946972131729126, 0.005184650886803865, 0...."
4,"Living in Edinburgh, and have a great thirst f...",0,4506,"Living in Edinburgh, and have a great thirst f...","Living in Edinburgh, and have a great thirst f...","Living in Edinburgh, and have a Bobby thirst f...",owner in and and have There great thirst for h...,padding to script. for have a great got Edinbu...,Living in old coming will flimsy actors great ...,and the coming hoping padding a owner a For ma...,...,"[0.10754916816949844, 0.006471964996308088, 0....",0,0.751703,"[0.10136308521032333, 0.003293124260380864, 0....",1,0.512849,"[0.1027863621711731, 0.008232557214796543, 0.0...",0,0.795897,"[0.10044253617525101, 0.007617470342665911, 0...."
5,The movie is about a day in the life of a woma...,0,4012,The movie is about a day in the life of a woma...,The movie fat about a day in the life of a wom...,The movie is about I day in later. life film. ...,The movie is Orson a day in film. it's of the ...,it and is about a day through the life make a ...,The just is you'd goes the mentally header lif...,to the Orson of the only building a off tells ...,...,"[0.10039528459310532, 0.0025850667152553797, 0...",0,0.868243,"[0.10390198230743408, 0.0024338664952665567, 0...",0,0.826978,"[0.10208037495613098, 0.0027967977803200483, 0...",0,0.661622,"[0.10585761070251465, 0.004017775412648916, 0...."
6,I've seen this programme a few times and the m...,0,3657,I've seen this programme a few times and the m...,I've seen this programme stuff. few times and ...,I've this poured programme a few times and the...,"I've seen popular money, a few times fashions ...","make seen bad more down. comes and to them, mo...",much also this the a is times and Lynn more sh...,"skirts too isn't mature Lynn them, the it. say...",...,"[0.10436192154884338, 0.0040102251805365086, 0...",0,0.547297,"[0.09912597388029099, 0.004294732119888067, 0....",0,0.601591,"[0.0991642102599144, 0.005006375256925821, 0.0...",1,0.530639,"[0.0956607386469841, 0.007292932365089655, 0.0..."
7,Now i have seen two movies by the director Che...,0,2286,Now i have seen two movies by the director Che...,Now i have seen two movies by the director Che...,"Now i have seen two movies by characters, dire...",Now i have seen two movies by The director dia...,Now Ryan have seen two in by nearly director C...,"seen i have dialogue Assassin movies (okay, is...","and be a battle 4/10 or was this cynic, events...",...,"[0.09491591900587082, 0.005336919333785772, 0....",1,0.69626,"[0.09754977375268936, 0.0065473392605781555, 0...",1,0.610451,"[0.09639585763216019, 0.005131721030920744, 0....",1,0.839517,"[0.09544067829847336, 0.006978588178753853, 0...."
8,"OK, so I know better than to watch movies on S...",0,12066,"OK, so I know better than to watch movies on S...",and so I know better than to watch movies on S...,Final it to I better than to watch the on the ...,"many so I know it, than to watch movies to Sci...","it films I know the a to to OK, no /><br . . O...","that but into entrepreneur, Or than favorite w...","it, or Did about one to scares. flopping movie...",...,"[0.09777116775512695, 0.0053496831096708775, 0...",0,0.86702,"[0.09403698891401291, 0.002303728833794594, 0....",0,0.814736,"[0.09453773498535156, 0.0036662037018686533, 0...",0,0.800233,"[0.09766300767660141, 0.0022278118412941694, 0..."
9,I starred as Eugene Morris Jerome in my high s...,0,1679,I starred as Eugene Morris Jerome in my high s...,I sarcastic as Eugene Morris Jerome in my high...,I was as the Morris Jerome in my high you adap...,and starred as Eugene Morris Jerome in my high...,I starred as backwards doesn't Jerome in The m...,A he honestly see Simon. character starred my ...,forwards honestly adaptation I know the other ...,...,"[0.11420188099145889, 0.010619019158184528, 0....",0,0.844455,"[0.1136832907795906, 0.009191364981234074, 0.0...",0,0.809653,"[0.10979849845170975, 0.0045877136290073395, 0...",0,0.985339,"[0.11054786294698715, 0.009944336488842964, 0...."
