In [None]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.7 kB)
Downloading faiss_cpu-1.13.0-cp39-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.6/23.6 MB[0m [31m114.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.0


In [None]:
# ===============================================================
# 1. IMPORTS
# ===============================================================
import pandas as pd
import torch
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM,
    RobertaTokenizerFast, RobertaForSequenceClassification
)
from retrieval import ReviewRetrieval
from sklearn.model_selection import train_test_split

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

Using device: cuda


In [None]:

# ===============================================================
# 2. LOAD ROUTING FILE
# ===============================================================
df = pd.read_csv("routing_output.csv")

#  Fix column names if different in your file
# df = df.rename(columns={
#     "cleanedreview2": "cleaned_review2",
#     "cleaned_reply2": "cleaned_response2"
# })

print("Loaded routing file:", df.shape)

Loaded routing file: (9952, 18)


In [None]:
# ===============================================================
# 3. SPLIT DATA FIRST (70/15/15)
# ===============================================================

from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(df, test_size=0.30, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.50, random_state=42)

test_df = test_df.reset_index(drop=True)

print("Test samples:", len(test_df))

Test samples: 1493


In [None]:
# ===============================================================
# 4. SENTIMENT FUNCTION
# ===============================================================
def get_sentiment(score):
    if score > 0:
        return "positive"
    else:
        return "negative"

In [None]:
# ===============================================================
# 5. LOAD TRIGGER MODEL (SHORT/LONG)
# ===============================================================
trigger_tokenizer = RobertaTokenizerFast.from_pretrained("triggerModule")
trigger_model = RobertaForSequenceClassification.from_pretrained("triggerModule").to(device)
trigger_model.eval()

def trigger_predict(text):
    inputs = trigger_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        logits = trigger_model(**inputs).logits
    pred = torch.argmax(logits, dim=1).item()
    return "short" if pred == 0 else "long"

In [None]:
# ===============================================================
# 6. RETRIEVAL MODEL USING ONLY TRAIN DATA
# (so test data remains unseen!)
# ===============================================================
retriever = ReviewRetrieval(
    df=train_df[["cleaned_review2", "cleaned_response2"]].dropna(),
    embed_col="cleaned_review2",
    reply_col="cleaned_response2",
    model_name="sentence-transformers/all-mpnet-base-v2",
    use_gpu=True
)

Loading model: sentence-transformers/all-mpnet-base-v2 ...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Using GPU for embeddings.
Encoding all reviews into embeddings...


Batches:   0%|          | 0/218 [00:00<?, ?it/s]

Building FAISS index...
FAISS index built. Number of items: 6966


In [None]:
# ===============================================================
# 7. LOAD YOUR 4 TRAINED GENERATORS
# ===============================================================
path_pos_short = "/content/drive/MyDrive/pos_short_generator_retrieval"
path_pos_long  = "/content/drive/MyDrive/pos_long_generator_retrieval"
path_neg_short = "/content/drive/MyDrive/neg_short_generator_retrieval"
path_neg_long  = "/content/drive/MyDrive/neg_long_generator_retrieval"

tok_pos_short = AutoTokenizer.from_pretrained(path_pos_short)
model_pos_short = AutoModelForSeq2SeqLM.from_pretrained(path_pos_short).to(device)

tok_pos_long = AutoTokenizer.from_pretrained(path_pos_long)
model_pos_long = AutoModelForSeq2SeqLM.from_pretrained(path_pos_long).to(device)

tok_neg_short = AutoTokenizer.from_pretrained(path_neg_short)
model_neg_short = AutoModelForSeq2SeqLM.from_pretrained(path_neg_short).to(device)

tok_neg_long = AutoTokenizer.from_pretrained(path_neg_long)
model_neg_long = AutoModelForSeq2SeqLM.from_pretrained(path_neg_long).to(device)

In [None]:
# ===============================================================
# 8. MASTER INFERENCE FUNCTION
# ===============================================================
def full_pipeline(review_text, sentiment_score):

    # STEP 1 — sentiment
    sentiment = get_sentiment(sentiment_score)

    # STEP 2 — trigger
    trigger = trigger_predict(review_text)

    # STEP 3 — retrieval (train data only)
    retrieved = retriever.retrieve(review_text, top_k=3)
    context = " ".join(retrieved)

    # STEP 4 — choose generator
    if sentiment == "positive" and trigger == "short":
        tok, model = tok_pos_short, model_pos_short

    elif sentiment == "positive" and trigger == "long":
        tok, model = tok_pos_long, model_pos_long

    elif sentiment == "negative" and trigger == "short":
        tok, model = tok_neg_short, model_neg_short
        final_input = (
            "task: negative_short_reply | review: "
            + review_text
            + " | retrieved: "
            + context
        )

    else:
        tok, model = tok_neg_long, model_neg_long
        final_input = "review: " + review_text + " retrieved: " + context

    if "final_input" not in locals():
        final_input = "review: " + review_text + " retrieved: " + context

    # STEP 5 — generate
    enc = tok(final_input, return_tensors="pt", truncation=True).to(device)
    output = model.generate(
        **enc,
        max_length=150,
        num_beams=4,
        early_stopping=True
    )
    reply = tok.decode(output[0], skip_special_tokens=True)

    return sentiment, trigger, context, reply


In [None]:
# ===============================================================
# 9. RUN INFERENCE ON TEST SET ONLY
# ===============================================================
sentiments = []
triggers = []
retrieved_ctx_list = []
final_replies = []

for i, row in test_df.iterrows():
    print(f"Processing {i+1}/{len(test_df)} ...")

    s, t, ctx, reply = full_pipeline(
        row["cleaned_review2"],
        row["review_sentiment"]
    )

    sentiments.append(s)
    triggers.append(t)
    retrieved_ctx_list.append(ctx)
    final_replies.append(reply)

test_df["sentiment_label"] = sentiments
test_df["trigger_label"] = triggers
test_df["retrieved_context"] = retrieved_ctx_list
test_df["generated_reply"] = final_replies


Processing 1/1493 ...
Processing 2/1493 ...
Processing 3/1493 ...
Processing 4/1493 ...
Processing 5/1493 ...
Processing 6/1493 ...
Processing 7/1493 ...
Processing 8/1493 ...
Processing 9/1493 ...
Processing 10/1493 ...
Processing 11/1493 ...
Processing 12/1493 ...
Processing 13/1493 ...
Processing 14/1493 ...
Processing 15/1493 ...
Processing 16/1493 ...
Processing 17/1493 ...
Processing 18/1493 ...
Processing 19/1493 ...
Processing 20/1493 ...
Processing 21/1493 ...
Processing 22/1493 ...
Processing 23/1493 ...
Processing 24/1493 ...
Processing 25/1493 ...
Processing 26/1493 ...
Processing 27/1493 ...
Processing 28/1493 ...
Processing 29/1493 ...
Processing 30/1493 ...
Processing 31/1493 ...
Processing 32/1493 ...
Processing 33/1493 ...
Processing 34/1493 ...
Processing 35/1493 ...
Processing 36/1493 ...
Processing 37/1493 ...
Processing 38/1493 ...
Processing 39/1493 ...
Processing 40/1493 ...
Processing 41/1493 ...
Processing 42/1493 ...
Processing 43/1493 ...
Processing 44/1493 .

In [None]:
# ===============================================================
# 10. SAVE FINAL INFERENCE FILE
# ===============================================================
test_df.to_csv("/content/live_demo_inference.csv", index=False)
print("Saved → /content/live_demo_inference.csv")

Saved → /content/live_demo_inference.csv
