In [None]:
!pip install streamlit pyngrok faiss-cpu sentence-transformers 
!pip install "bitsandbytes>=0.43.1" --quiet
!pip install sentence-transformers bert-score scikit-learn pandas nltk --quiet

In [32]:
import faiss
import pickle
import os
import numpy as np
from sentence_transformers import SentenceTransformer
import traceback
import json
import os
import subprocess
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
from bert_score import score as bert_score
import nltk


In [2]:
def load_user_profiles(path: str = "user_profiles.json"):
    if not os.path.exists(path):
        st.error(f"Could not find user profiles file at {path}")
        return {}
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data

USER_PROFILES = load_user_profiles("/kaggle/input/user-profiles/user_profiles.json")

output_rag_dir = "/kaggle/working/rag_indices"
embedding_model_name = "all-MiniLM-L6-v2"
os.makedirs(output_rag_dir, exist_ok=True) 

try:
    print(f"Loading embedding model: {embedding_model_name}")
    embedding_model = SentenceTransformer(embedding_model_name)
    embedding_dim = embedding_model.get_sentence_embedding_dimension()
    print(f"Embedding model loaded. Dimension: {embedding_dim}")

    for profile_key, profile_data in USER_PROFILES.items():
        print(f"\nProcessing profile: {profile_key}")
        docs_to_index = profile_data.get("personal_docs", [])

        if not docs_to_index:
            print(f"  No personal_docs found for {profile_key}. Skipping index creation.")
            continue

        faiss_index_path = os.path.join(output_rag_dir, f"index_{profile_key}.faiss")
        narratives_path = os.path.join(output_rag_dir, f"docs_{profile_key}.pkl")

        print(f"  Generating {len(docs_to_index)} embeddings for {profile_key}")
        narrative_embeddings = embedding_model.encode(docs_to_index, convert_to_numpy=True, show_progress_bar=False) # Less verbose
        if narrative_embeddings.dtype != np.float32:
            narrative_embeddings = narrative_embeddings.astype(np.float32)
        print(f"  Embeddings generated.")

        print(f"  Building FAISS index for {profile_key}")
        index = faiss.IndexFlatL2(embedding_dim)
        index.add(narrative_embeddings)
        print(f"  Saving FAISS index ({index.ntotal} vectors) to: {faiss_index_path}")
        faiss.write_index(index, faiss_index_path)

        print(f"  Saving narratives list to: {narratives_path}")
        with open(narratives_path, 'wb') as f: pickle.dump(docs_to_index, f)

        print(f"  Profile {profile_key} RAG components saved.")

except Exception as e:
    print(f"ERROR during RAG setup: {e}")
    traceback.print_exc()

print("\n Per-Persona RAG Index Creation Complete ")

Loading embedding model: all-MiniLM-L6-v2
Embedding model loaded. Dimension: 384

Processing profile: Default
  No personal_docs found for Default. Skipping index creation.

Processing profile: Alex
  Generating 10 embeddings for Alex
  Embeddings generated.
  Building FAISS index for Alex
  Saving FAISS index (10 vectors) to: /kaggle/working/rag_indices/index_Alex.faiss
  Saving narratives list to: /kaggle/working/rag_indices/docs_Alex.pkl
  Profile Alex RAG components saved.

Processing profile: Sam
  Generating 8 embeddings for Sam
  Embeddings generated.
  Building FAISS index for Sam
  Saving FAISS index (8 vectors) to: /kaggle/working/rag_indices/index_Sam.faiss
  Saving narratives list to: /kaggle/working/rag_indices/docs_Sam.pkl
  Profile Sam RAG components saved.

Processing profile: AlexCarter
  Generating 24 embeddings for AlexCarter
  Embeddings generated.
  Building FAISS index for AlexCarter
  Saving FAISS index (24 vectors) to: /kaggle/working/rag_indices/index_AlexCarte

In [28]:
nltk.download("punkt")

# Load embedding model
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# Sample dataset
df = pd.DataFrame([
    {
        "prompt": "Do you have any pets?",
        "reference": "I have a pet golden retriever named Max.",
        "generated_response": "Yes! Max, my golden retriever, is my loyal sidekick and best friend."
    },
    {
        "prompt": "What kind of movies do you enjoy watching?",
        "reference": "I love sci-fi movies set in space with warp speed and cool tech. Star Trek is my favorite!",
        "generated_response": "I love watching Sci-fi preferably set in space, with a dash of warp speed and a pinch of futuristic tech! (Wink) Star Trek Movies"
    }
])

results = []

for _, row in df.iterrows():
    prompt = row["prompt"]
    reference = row["reference"]
    generated = row["generated_response"]

    # Cosine similarity
    ref_emb = embed_model.encode([reference], convert_to_numpy=True)
    gen_emb = embed_model.encode([generated], convert_to_numpy=True)
    cos_sim = cosine_similarity(ref_emb, gen_emb)[0][0]

    # BERTScore
    P, R, F1 = bert_score([generated], [reference], lang="en", rescale_with_baseline=True)

    results.append({
        "prompt": prompt,
        "reference": reference,
        "generated_response": generated,
        "cosine_similarity": round(cos_sim, 4),
        "bertscore_precision": round(P[0].item(), 4),
        "bertscore_recall": round(R[0].item(), 4),
        "bertscore_f1": round(F1[0].item(), 4)
    })

# Save results
results_df = pd.DataFrame(results)
results_df.to_csv("rag_model_scores_no_bleu.csv", index=False)
results_df.head()


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Unnamed: 0,prompt,reference,generated_response,cosine_similarity,bertscore_precision,bertscore_recall,bertscore_f1
0,Do you have any pets?,I have a pet golden retriever named Max.,"Yes! Max, my golden retriever, is my loyal sid...",0.6962,0.4074,0.5213,0.4646
1,What kind of movies do you enjoy watching?,I love sci-fi movies set in space with warp sp...,I love watching Sci-fi preferably set in space...,0.9202,0.4811,0.6034,0.5423


In [29]:
#Installing ngrok
!wget -q -c -nc https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip
!unzip -qq -n ngrok-v3-stable-linux-amd64.zip

!./ngrok config add-authtoken 2wVBgrUhAnFME3W6pdW6D7C7eYO_4ezkVq2JnLrVHSryLTXPY

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [30]:
try:
  subprocess.run(["killall", "ngrok"], check=True, capture_output=True)
  print("Existing ngrok processes killed.")
except (subprocess.CalledProcessError, FileNotFoundError):
  print("No existing ngrok process found or killall not available.") 

print("Starting ngrok tunnel")
nohup_cmd = "nohup ./ngrok http 8501 > ngrok.log 2>&1 &"
os.system(nohup_cmd)

import time
time.sleep(5) 
try:
  import requests
  time.sleep(2) 
  localhost_url = "http://localhost:4040/api/tunnels" # Ngrok's local API
  tunnel_info = requests.get(localhost_url).json()
  # Find the https tunnel URL
  public_url = None
  for tunnel in tunnel_info.get("tunnels", []):
      if tunnel.get("proto") == "https":
          public_url = tunnel.get("public_url")
          break 

  if public_url:
      print(f"Ngrok tunnel active. Public URL: {public_url}")
      from IPython.display import display, HTML
      display(HTML(f'<a href="{public_url}" target="_blank">{public_url}</a>'))
  else:
      print("Could not automatically fetch ngrok URL. Check ngrok.log or manually check http://localhost:4040 if API is running.")
      print("Sometimes ngrok takes longer to start. Try checking the log file:")
      !cat ngrok.log 

except Exception as e:
  print(f"Could not connect to ngrok API to get URL automatically: {e}")
  print("Check ngrok status and log file ('ngrok.log'). The tunnel might still be active.")
  !cat ngrok.log 

print("Ngrok setup complete running in background.")

No existing ngrok process found or killall not available.
Starting ngrok tunnel
Ngrok tunnel active. Public URL: https://79dd-34-171-229-14.ngrok-free.app


Ngrok setup complete running in background.


In [31]:
!streamlit run /kaggle/input/stream/fast_app.py      

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.19.2.2:8501[0m
[34m  External URL: [0m[1mhttp://34.171.229.14:8501[0m
[0m
2025-05-11 00:02:01.805179: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746921721.833072     213 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746921721.841367     213 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Loading models from path: /kaggle/input/finetuned/
Loading checkpoint shards: 100%|████████████████