In [None]:
# Cell 1: Inference & Evaluation on Kaggle Dataset
!pip install transformers datasets scikit-learn sentencepiece pandas

import os
import zipfile
import pandas as pd
import ast
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm

# --- 1. SETUP MODEL ---
zip_path = "/kaggle/input/finstructabsa/final_fin_model.zip" # Ensure you uploaded this file!
extract_path = "my_trained_model"

if not os.path.exists(extract_path):
    print(f"Extracting {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

model_dir = None
for root, dirs, files in os.walk(extract_path):
    if "config.json" in files:
        model_dir = root
        break
if model_dir is None: raise ValueError("Model not found in zip!")

print(f"‚úÖ Model loaded from: {model_dir}")
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForSeq2SeqLM.from_pretrained(model_dir).to("cuda" if torch.cuda.is_available() else "cpu")

# --- 2. LOAD DATA FROM KAGGLE INPUT (The Modification) ---
print("Search for dataset...")
dataset_dir = "/kaggle/input/aspect-based-sentiment-analysis-for-financial-news"
csv_path = None
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.endswith(".csv"):
            csv_path = os.path.join(root, file)
            break

if not csv_path: raise FileNotFoundError("Could not find CSV in /kaggle/input/...")
print(f"Reading data from: {csv_path}")

df = pd.read_csv(csv_path)

# --- 3. PROCESS DATA (Match Training Logic) ---
# We need to extract (Sentence, Aspect, Sentiment) triplets
eval_rows = []

# Choose 'test' or 'val'. Usually 'test' is for final report.
target_split = 'test' 
print(f"Filtering for split: {target_split}")

for _, row in df.iterrows():
    # 1. Check Split
    if str(row['split']).strip().lower() != target_split:
        continue
        
    # 2. Parse Decisions (e.g., "{'Stocks': 'Positive'}")
    try:
        raw_decisions = row['Decisions']
        # Fix common CSV quoting issues
        if isinstance(raw_decisions, str):
            if '""' in raw_decisions: raw_decisions = raw_decisions.replace('""', '"')
            decisions = ast.literal_eval(raw_decisions)
        else:
            decisions = raw_decisions
            
        if not isinstance(decisions, dict): continue

        # 3. Create One Row per Aspect (ATSC Task)
        for aspect, sentiment in decisions.items():
            eval_rows.append({
                'raw_text': row['Title'],
                'term': aspect,
                'labels': sentiment.lower()
            })
    except:
        continue

df_eval = pd.DataFrame(eval_rows)
print(f"Loaded {len(df_eval)} examples for evaluation.")

# --- 4. FORMAT PROMPTS (Crucial: Must Match Training) ---
# We used InstructABSA-2 (ATSC) format in training
# Prompt: Definition + 1 Example + Input
prompt_prefix = """Definition: The output will be 'positive' if the sentiment of the identified financial entity or aspect in the input is positive (good news, growth, profit). If the sentiment is negative (loss, drop, risk), the answer will be 'negative'. Otherwise, the output should be 'neutral'. For aspects which are classified as noaspectterm, the sentiment is none.
Positive example 1- 
input: Profits for Apple surged by 20% this quarter exceeding expectations. The aspect is Profits. 
output: positive   
Positive example 2-
input: The bank maintains a healthy capital adequacy ratio. The aspect is capital adequacy ratio.    
output: positive
Negative example 1-
input: Stocks of Tesla fell sharply due to production delays. The aspect is Stocks.
output: negative
Negative example 2-
input: Rising debt levels are a major concern for the investors. The aspect is debt levels.
output: negative
Neutral example 1-
input: SpiceJet to issue 6.4 crore warrants to promoters. The aspect is SpiceJet.
output: neutral
Neutral example 2-
input: The merger discussion is still ongoing with no final decision. The aspect is merger.
output: neutral
Now complete the following example-
input: """


inputs = []
for _, row in df_eval.iterrows():
    # Format: "input: {text} The aspect is {term}\noutput:"
    text_input = f"{prompt_prefix}{row['raw_text']} The aspect is {row['term']}\noutput:"
    inputs.append(text_input)

# --- 5. RUN INFERENCE ---
print("Running predictions (This may take a few minutes)...")
batch_size = 32
predictions = []

for i in tqdm(range(0, len(inputs), batch_size)):
    batch_texts = inputs[i : i + batch_size]
    batch_inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(**batch_inputs, max_new_tokens=10)
    
    batch_preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    predictions.extend(batch_preds)

# --- 6. FINAL REPORT ---
y_true = [str(l).lower().strip() for l in df_eval['labels']]
y_pred = [str(p).lower().strip().replace('.','') for p in predictions]
# Clean up any hallucinations (rare with T5-Large but possible)
valid_labels = ['positive', 'negative', 'neutral']

print("\n" + "="*50)
print(f"CLASSIFICATION REPORT ({target_split.upper()} SET)")
print("="*50)
print(classification_report(y_true, y_pred, labels=valid_labels, digits=4))

Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (47.7 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m47.7/47.7 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pyarrow
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 19.0.1
    Uninstalling pyarrow-19.0.1:
      Successfully uninstalled pyarrow-19.0.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
pylibcudf-cu12 25.2.2 requires pyarrow<20.0.0a0,>=14.0.0; platform_machine == "x86_64", but you hav

2025-12-28 14:30:36.515216: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1766932236.687581      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1766932236.737576      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

Search for dataset...
Reading data from: /kaggle/input/aspect-based-sentiment-analysis-for-financial-news/SEntFiN-v1.1_with_split.csv
Filtering for split: test
Loaded 3000 examples for evaluation.
Running predictions (This may take a few minutes)...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 94/94 [00:36<00:00,  2.56it/s]


CLASSIFICATION REPORT (TEST SET)
              precision    recall  f1-score   support

    positive     0.9073    0.9192    0.9132      1065
    negative     0.8921    0.9141    0.9030       850
     neutral     0.8914    0.8627    0.8768      1085

    accuracy                         0.8973      3000
   macro avg     0.8969    0.8987    0.8977      3000
weighted avg     0.8973    0.8973    0.8972      3000






In [None]:
# Cell: Evaluation with Fixed Config for Unrecognized Model
import json
import os
import torch
from transformers import T5ForConditionalGeneration, T5Config, AutoTokenizer
from sklearn.metrics import classification_report
import pandas as pd
import ast
from tqdm import tqdm

# --- 1. ROBUST MODEL LOADING ---
target_model_path = "/kaggle/input/m/joemum/finstructabsa/pytorch/default/1"

print(f"üîÑ Attempting to load model from: {target_model_path}")

# Step A: Find the actual config file
config_path = None
model_root = target_model_path
for root, dirs, files in os.walk(target_model_path):
    if "config.json" in files:
        config_path = os.path.join(root, "config.json")
        model_root = root
        break

if not config_path:
    raise ValueError("Could not find config.json in the input path!")

print(f"‚úÖ Found config at: {config_path}")

# Step B: Load Config & Force 't5' type
# We load the JSON manually to inject the missing key
with open(config_path, 'r') as f:
    config_dict = json.load(f)

# FIX: Force the model type if missing
if 'model_type' not in config_dict:
    print("üõ†Ô∏è  Patching config: Adding 'model_type': 't5'")
    config_dict['model_type'] = 't5'
    # Also ensure architectures list is correct if missing
    if 'architectures' not in config_dict:
        config_dict['architectures'] = ["T5ForConditionalGeneration"]

# Create a config object from the dictionary
config = T5Config.from_dict(config_dict)

# Step C: Load Model using the specific T5 class and patched config
try:
    # We use T5ForConditionalGeneration directly instead of AutoModel
    model = T5ForConditionalGeneration.from_pretrained(model_root, config=config)
    tokenizer = AutoTokenizer.from_pretrained(model_root)
    
    # Move to GPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    print(f"‚úÖ Success! Model loaded on {device}")
    
except Exception as e:
    print(f"‚ùå Failed to load model: {e}")
    raise e

# --- 2. LOAD DATA (Same as before) ---
dataset_dir = "/kaggle/input/aspect-based-sentiment-analysis-for-financial-news"
csv_path = None
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.endswith(".csv"):
            csv_path = os.path.join(root, file)
            break
if not csv_path: raise FileNotFoundError("CSV not found.")

df = pd.read_csv(csv_path)
eval_rows = []
target_split = 'test'

for _, row in df.iterrows():
    if str(row['split']).strip().lower() != target_split: continue
    try:
        raw_decisions = row['Decisions']
        if isinstance(raw_decisions, str):
            if '""' in raw_decisions: raw_decisions = raw_decisions.replace('""', '"')
            decisions = ast.literal_eval(raw_decisions)
        else:
            decisions = raw_decisions
        if isinstance(decisions, dict):
            for aspect, sentiment in decisions.items():
                eval_rows.append({'raw_text': row['Title'], 'term': aspect, 'labels': sentiment.lower()})
    except: continue

df_eval = pd.DataFrame(eval_rows)
print(f"Loaded {len(df_eval)} examples.")

# --- 3. RUN INFERENCE ---
# Use the Standard InstructABSA Prompt
prompt_prefix = """Definition: The output will be 'positive' if the sentiment of the identified financial entity or aspect in the input is positive (good news, growth, profit). If the sentiment is negative (loss, drop, risk), the answer will be 'negative'. Otherwise, the output should be 'neutral'. For aspects which are classified as noaspectterm, the sentiment is none.
Positive example 1- 
input: Profits for Apple surged by 20% this quarter exceeding expectations. The aspect is Profits. 
output: positive   
Positive example 2-
input: The bank maintains a healthy capital adequacy ratio. The aspect is capital adequacy ratio.    
output: positive
Negative example 1-
input: Stocks of Tesla fell sharply due to production delays. The aspect is Stocks.
output: negative
Negative example 2-
input: Rising debt levels are a major concern for the investors. The aspect is debt levels.
output: negative
Neutral example 1-
input: SpiceJet to issue 6.4 crore warrants to promoters. The aspect is SpiceJet.
output: neutral
Neutral example 2-
input: The merger discussion is still ongoing with no final decision. The aspect is merger.
output: neutral
Now complete the following example-
input: """


inputs = [f"{prompt_prefix}{row['raw_text']} The aspect is {row['term']}\noutput:" for _, row in df_eval.iterrows()]

print("Running predictions...")
batch_size = 64
predictions = []

for i in tqdm(range(0, len(inputs), batch_size)):
    batch_texts = inputs[i : i + batch_size]
    batch_inputs = tokenizer(batch_texts, return_tensors="pt", padding=True, truncation=True, max_length=512).to(model.device)
    with torch.no_grad():
        outputs = model.generate(**batch_inputs, max_new_tokens=10)
    predictions.extend(tokenizer.batch_decode(outputs, skip_special_tokens=True))

# --- 4. REPORT ---
y_true = [str(l).lower().strip() for l in df_eval['labels']]
y_pred = [str(p).lower().strip().replace('.','') for p in predictions]
print(classification_report(y_true, y_pred, digits=4))

üîÑ Attempting to load model from: /kaggle/input/m/joemum/finstructabsa/pytorch/default/1
‚úÖ Found config at: /kaggle/input/m/joemum/finstructabsa/pytorch/default/1/sentfin_model_output/atsc/googleflan-t5-base-run1/config.json
‚úÖ Success! Model loaded on cuda
Loaded 3000 examples.
Running predictions...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 47/47 [00:10<00:00,  4.37it/s]


              precision    recall  f1-score   support

       mixed     0.0000    0.0000    0.0000         0
    negative     0.8613    0.9353    0.8968       850
     neutral     0.9229    0.7945    0.8539      1085
    positive     0.8660    0.9286    0.8962      1065

    accuracy                         0.8820      3000
   macro avg     0.6626    0.6646    0.6617      3000
weighted avg     0.8853    0.8820    0.8811      3000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
