In [1]:
import math
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
import pandas as pd

In [2]:
def calculate_sentence_log_prob(model, tokenizer, sentence, device):
    inputs = tokenizer(sentence, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs, labels=inputs["input_ids"])
    
    loss = outputs.loss.item() # -mean(log_prob)
    num_tokens = inputs["input_ids"].shape[1]
    
    sentence_log_prob = -loss * (num_tokens - 1)

    perplexity = math.exp(loss)
    
    return sentence_log_prob, perplexity

In [3]:
model_id = "mistralai/Mistral-7B-v0.3"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Loading {model_id}...")
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    device_map="auto", 
    torch_dtype=torch.float16, 
    trust_remote_code=True
)

Loading mistralai/Mistral-7B-v0.3...


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

2025-12-02 11:52:33.547191: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764676353.739292      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764676353.793360      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

### Testing on Animate Subject Passive

In [4]:
data1 = pd.read_csv("/kaggle/input/dataset/animate_subject_passive.csv")

for index, row in data1.iterrows():
  s1 = row["rows__row__sentence_good"]         
  s2 = row["rows__row__sentence_bad"]   

  log_prob_s1, ppl_s1 = calculate_sentence_log_prob(model, tokenizer, s1, device)
  log_prob_s2, ppl_s2 = calculate_sentence_log_prob(model, tokenizer, s2, device)

  data1.loc[index, "log(P(S1))"] = log_prob_s1
  data1.loc[index, "log(P(S2))"] = log_prob_s2

  if log_prob_s1 > log_prob_s2:
      data1.loc[index, "log(P(S1)) > log(P(S2))"] = 1
  else:
      data1.loc[index, "log(P(S1)) > log(P(S2))"] = 0
  data1.loc[index, "Confidence"] = abs(log_prob_s1 - log_prob_s2)
  data1.loc[index, "Ppl(S1)"] = ppl_s1
  data1.loc[index, "Ppl(S2)"] = ppl_s2  

In [5]:
data1

Unnamed: 0,rows__row__sentence_good,rows__row__sentence_bad,log(P(S1)),log(P(S2)),log(P(S1)) > log(P(S2)),Confidence,Ppl(S1),Ppl(S2)
0,Amanda was respected by some waitresses.,Amanda was respected by some picture.,-53.197736,-56.755071,1.0,3.557334,369.023226,3320.488670
1,Some lake was passed by some cashiers.,Some lake was passed by some phenomena.,-61.211786,-60.220716,0.0,0.991070,899.023870,805.280545
2,Lisa was kissed by the boys.,Lisa was kissed by the blouses.,-39.273139,-47.158657,1.0,7.885519,273.266733,363.155869
3,Amanda isn't respected by the children.,Amanda isn't respected by the cups.,-46.116250,-54.106533,1.0,7.990283,168.010710,408.232557
4,The glove was noticed by some woman.,The glove was noticed by some mouse.,-47.391754,-50.890230,1.0,3.498476,193.591527,285.565084
...,...,...,...,...,...,...,...,...
95,All public parks were explored by some teenager.,All public parks were explored by some screen.,-54.611351,-60.268464,1.0,5.657114,431.785025,809.564145
96,This cake is seen by the pedestrians.,This cake is seen by the parentheses.,-44.630265,-48.117137,1.0,3.486872,86.749662,122.942122
97,Every person wasn't cared for by the offspring.,Every person wasn't cared for by the fungus.,-54.526314,-56.067416,1.0,1.541102,94.053097,74.655047
98,Edward wasn't insulted by some alumni.,Edward wasn't insulted by some oxen.,-58.021603,-58.304277,1.0,0.282674,125.855390,200.414719


In [6]:
data1.to_csv("/kaggle/working/animate_subject_passive_output.csv")

### Testing Animate Subject Trans

In [7]:
data2 = pd.read_csv("/kaggle/input/dataset/animate_subject_trans.csv")

for index, row in data2.iterrows():
  s1 = row["rows__row__sentence_good"]         
  s2 = row["rows__row__sentence_bad"]   

  log_prob_s1, ppl_s1 = calculate_sentence_log_prob(model, tokenizer, s1, device)
  log_prob_s2, ppl_s2 = calculate_sentence_log_prob(model, tokenizer, s2, device)

  data2.loc[index, "log(P(S1))"] = log_prob_s1
  data2.loc[index, "log(P(S2))"] = log_prob_s2

  if log_prob_s1 > log_prob_s2:
      data2.loc[index, "log(P(S1)) > log(P(S2))"] = 1
  else:
      data2.loc[index, "log(P(S1)) > log(P(S2))"] = 0
  data2.loc[index, "Confidence"] = abs(log_prob_s1 - log_prob_s2)
  data2.loc[index, "Ppl(S1)"] = ppl_s1
  data2.loc[index, "Ppl(S2)"] = ppl_s2

In [8]:
data2

Unnamed: 0,rows__row__sentence_good,rows__row__sentence_bad,log(P(S1)),log(P(S2)),log(P(S1)) > log(P(S2)),Confidence,Ppl(S1),Ppl(S2)
0,Tina revealed Margaret.,The horse revealed Margaret.,-43.118892,-41.896906,0.0,1.221986,5562.363257,4356.312318
1,Danielle visited Irene.,The eye visited Irene.,-39.424081,-44.196545,1.0,4.772464,713.855201,1581.439480
2,Paul runs around the art galleries.,The river runs around the art galleries.,-42.273653,-42.069731,0.0,0.203922,109.625773,67.152965
3,Most banks have praised Raymond.,The jackets have praised Raymond.,-48.500696,-49.248203,1.0,0.747507,3240.391270,1136.214738
4,Every doctor was selling some restaurants.,A cup was selling some restaurants.,-53.299371,-52.008657,0.0,1.290714,2026.764227,1685.484165
...,...,...,...,...,...,...,...,...
95,Lawrence fled from Brad.,Literature fled from Brad.,-45.431013,-45.346725,0.0,0.084289,8832.581632,1915.602394
96,These dancers should investigate Caroline.,Glaciers should investigate Caroline.,-51.727142,-51.303811,0.0,0.423331,1619.044912,1524.033729
97,Mitchell can't complain about Peter.,This window can't complain about Peter.,-45.051517,-52.385679,1.0,7.334162,279.063592,337.184759
98,Every doctor wasn't describing a river.,This story wasn't describing a river.,-55.198686,-44.859190,0.0,10.339495,460.902855,146.109230


In [9]:
data2.to_csv("/kaggle/working/animate_subject_trans_output.csv")

### Testing on Self Curated Data

In [10]:
data3 = pd.read_csv("/kaggle/input/dataset/Self_curated_data.csv")

for index, row in data3.iterrows():
  s1 = row["rows__row__sentence_good"]         
  s2 = row["rows__row__sentence_bad"]   

  log_prob_s1, ppl_s1 = calculate_sentence_log_prob(model, tokenizer, s1, device)
  log_prob_s2, ppl_s2 = calculate_sentence_log_prob(model, tokenizer, s2, device)

  data3.loc[index, "log(P(S1))"] = log_prob_s1
  data3.loc[index, "log(P(S2))"] = log_prob_s2

  if log_prob_s1 > log_prob_s2:
      data3.loc[index, "log(P(S1)) > log(P(S2))"] = 1
  else:
      data3.loc[index, "log(P(S1)) > log(P(S2))"] = 0
  data3.loc[index, "Confidence"] = abs(log_prob_s1 - log_prob_s2)
  data3.loc[index, "Ppl(S1)"] = ppl_s1
  data3.loc[index, "Ppl(S2)"] = ppl_s2  

In [11]:
data3

Unnamed: 0,rows__row__sentence_good,rows__row__sentence_bad,log(P(S1)),log(P(S2)),log(P(S1)) > log(P(S2)),Confidence,Ppl(S1),Ppl(S2)
0,I eat poha with tea,I eat plate with tea,-42.390831,-38.27961,0.0,4.111221,200.10733,2113.122385
1,I ordered a cycle,I ordered a wall,-28.28074,-22.32523,0.0,5.95551,286.044661,265.41839
2,I gave him water,I gave him air,-24.98711,-26.826954,1.0,1.839844,516.346233,817.898685
3,I eat a tablet,I eat an ipad,-34.161522,-35.26274,1.0,1.101218,927.325353,356.781431
4,I and he drank coffee together,I and he drank coffee mug together,-43.30484,-46.841705,1.0,3.536866,486.095768,349.049297
5,He has 10 lakh ruppes in his bank,He has 10 lakh cars in his bank,-51.228065,-56.218821,1.0,4.990756,38.828228,108.299627
6,He has calculator in his phone,He has car in his phone,-42.809395,-44.94725,1.0,2.137856,210.855772,614.639408


In [12]:
data3.to_csv("/kaggle/working/Self_curated_data_output.csv")

### Testing on Reference Tracking Data

In [13]:
data4 = pd.read_csv("/kaggle/input/dataset/Reference_tracking_data.csv")

for index, row in data4.iterrows():
  s1 = row["rows__row__sentence_good"]         
  s2 = row["rows__row__sentence_bad"]   

  log_prob_s1, ppl_s1 = calculate_sentence_log_prob(model, tokenizer, s1, device)
  log_prob_s2, ppl_s2 = calculate_sentence_log_prob(model, tokenizer, s2, device)

  data4.loc[index, "log(P(S1))"] = log_prob_s1
  data4.loc[index, "log(P(S2))"] = log_prob_s2

  if log_prob_s1 > log_prob_s2:
      data4.loc[index, "log(P(S1)) > log(P(S2))"] = 1
  else:
      data4.loc[index, "log(P(S1)) > log(P(S2))"] = 0
  data4.loc[index, "Confidence"] = abs(log_prob_s1 - log_prob_s2)
  data4.loc[index, "Ppl(S1)"] = ppl_s1
  data4.loc[index, "Ppl(S2)"] = ppl_s2  

In [14]:
data4

Unnamed: 0,rows__row__sentence_good,rows__row__sentence_bad,log(P(S1)),log(P(S2)),log(P(S1)) > log(P(S2)),Confidence,Ppl(S1),Ppl(S2)
0,Riya gave Anu a gift because she had a birthday.,Riya gave Anu a gift because she had a basket.,-55.073737,-65.548556,1.0,10.474819,51.102753,107.99012
1,Arjun called Kabir because he needed help with...,Arjun called Kabir because the project needed ...,-54.337789,-68.33842,1.0,14.000631,48.485784,131.803965
2,Meera hugged Sanya because she was crying.,Meera hugged Sanya because the hallway was cry...,-50.235772,-68.761339,1.0,18.525568,96.244163,308.003639
3,The dog barked at the stranger because he ente...,The dog barked at the stranger because the gat...,-43.306327,-55.225323,1.0,11.918996,27.973452,51.659076
4,Amit cleaned the table because it had food sta...,Amit cleaned the table because the food stains...,-50.619358,-67.126022,1.0,16.506664,67.916556,120.870035
5,Tina put the book in the bag because it was to...,Tina put the book in the bag because the bag w...,-51.264824,-57.534032,1.0,6.269208,24.632125,29.498955
6,Rohan thanked Neeraj because he fixed the comp...,Rohan thanked Neeraj because the computer fixe...,-62.479733,-71.225389,1.0,8.745657,122.257964,239.577298
7,Priya covered the pot because it was boiling o...,Priya covered the pot because the boiling wate...,-49.779676,-64.463325,1.0,14.683648,63.326657,99.935321
8,The car hit the pole because it slipped on the...,The car hit the pole because the pole slipped ...,-41.874022,-50.661323,1.0,8.787301,25.055131,37.28779
9,Anita washed the spoon because it was dirty.,Anita washed the spoon because she was dirty.,-45.292768,-51.623998,1.0,6.331229,92.691507,174.582912


In [15]:
data4.to_csv("/kaggle/working/Reference_tracking_data_output.csv")

In [16]:
model.eval()
print("model_type:", getattr(model.config, "model_type", None), "class:", type(model))

model_type: mistral class: <class 'transformers.models.mistral.modeling_mistral.MistralForCausalLM'>
