In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import json
from transformers import pipeline

  warn(





In [None]:
# -----------------------------
# 1. Build FAISS Index in rag_pipeline
# -----------------------------
def build_faiss_index(chunks_path):
    # Load model
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

    # Load chunks from file
    chunks = []
    with open(chunks_path, "r", encoding="utf-8") as f:
        for line in f:
            chunks.append(json.loads(line))

    # Create FAISS index
    embedding_dim = len(chunks[0]['embedding'])
    index = faiss.IndexFlatL2(embedding_dim)
    embeddings = np.array([chunk['embedding'] for chunk in chunks]).astype('float32')
    index.add(embeddings)

    return model, index, chunks


In [None]:
# -----------------------------
# 2. Retrieve function
# -----------------------------
def retrieve_chunks(query, model, index, chunks, top_k=5):
    query_embedding = model.encode(query).astype('float32')
    D, I = index.search(np.array([query_embedding]), top_k)
    results = [chunks[i] for i in I[0]]
    return results


In [None]:
# -----------------------------
# 3. Load generator model
# -----------------------------
generator = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    tokenizer="google/flan-t5-base",
    device=-1  # CPU; use 0 for GPU
)


Device set to use cpu


In [None]:
# -----------------------------
# 4. Answer generation
# -----------------------------
def generate_answer(query, model, index, chunks, top_k=5):
    
    # Step 1: Retrieve
    retrieved = retrieve_chunks(query, model, index, chunks, top_k=top_k)

    # Step 2: Prepare context
    context = "\n\n".join([f"Source: {r.get('source', 'Unknown')}\n{r['text']}" for r in retrieved])

    # Step 3: Build prompt
    prompt = f"""
    You are an expert eligibility officer.
    Using only the context below, answer the question truthfully.
    If the answer is not in the context, say "I cannot find relevant information."

    Context:
    {context}

    Question: {query}
    Answer:
    """

    # Step 4: Generate
    output = generator(prompt, max_new_tokens=200)

    # Step 5: Collect citations
    #citations = list({r.get('source', 'Unknown') for r in retrieved})
    citations = [f"{r.get('source', 'Unknown')} — {r['text']}" for r in retrieved]

    return output[0]["generated_text"], citations



In [None]:
# -----------------------------
# 5. Run everything
# -----------------------------
chunks_path = r"E:\Info_Srping\swiftvisa\index\chunks_with_embeddings.jsonl"
model, index, chunks = build_faiss_index(chunks_path)

query = "Is the student eligible for UK Student Visa?"
answer, citations = generate_answer(query, model, index, chunks, top_k=5)

print("\nFinal Answer:\n", answer)
print("\nCitations:\n", citations)

Token indices sequence length is longer than the specified maximum sequence length for this model (1763 > 512). Running this sequence through the model will result in indexing errors



Final Answer:
 Yes

Citations:
 ['Student and Child Student']


In [None]:
chunks_path = r"E:\Info_Srping\swiftvisa\index\chunks_with_embeddings.jsonl"
model, index, chunks = build_faiss_index(chunks_path)

query = "What are the eligibility requirements for a UK Student Visa?"
answer, citations = generate_answer(query, model, index, chunks, top_k=5)

print("\nFinal Answer:\n", answer)
print("\nCitations:\n", citations)


Final Answer:
 validity requirements for the Student route, contained in Appendix Student ST 1.1 to 1.5 • the validity requirements for the Child Student route, contained in Appendix Child Student CS 1.1 to 1.5 • the validity requirements for dependants of a Student, contained in Appendix Student ST 28.1 to 28.4 Detailed guidance on how to assess the validity requirements can be found in the Validation, variation, voiding and withdrawing of applications guidance. The caseworker must conduct verification checks if they have any doubts about whether the supporting documents an applicant has submitted are genuine. If an applicant has previously had official financial sponsorship from a government or international sponsorship agency, covering both course fees and living costs, it is a validation requirement that they must obtain consent from the financial sponsor to a further application for permission to study in the UK as a Student being made within 12 months of completing that course, 

In [None]:
chunks_path = r"E:\Info_Srping\swiftvisa\index\chunks_with_embeddings.jsonl"
model, index, chunks = build_faiss_index(chunks_path)

query = "I am from Canada and applying for a UK Student Visa. Do I need to prove my English language ability?"
answer, citations = generate_answer(query, model, index, chunks, top_k=5)

print("\nFinal Answer:\n", answer)
print("\nCitations:\n", citations)


Final Answer:
 Yes

Citations:
 ['Student and Child Student — Page 48 of 107  Published for Home Office staff on 16 July 2025 \nStudent: English language requirement \nThis page tells caseworkers about the English language requirement for applicants \non the Student route. \n \nApplicants on the Student route must meet a required level of English language \nability depending on the level of their course.  \n \nThe caseworker must check the applicant’s Confirmation of Acceptance for Studies \nto determine what level of English is required and what assessment was carried out \nor what evidence has been used to demonstrate that the applicant meets the \nrequirement. \n \nIf applying to study a course below degree level, the applicant must have the \nequivalent of level B1 of the Common European Framework of References for \nEnglish language.  \n \nIf applying to study a course at degree level or above, the applicant must have the \nequivalent of level B2 of the Common European Framework 