In [1]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss
import json
from transformers import pipeline

  warn(





In [2]:
# -----------------------------
# 1. Build FAISS Index in rag_pipeline
# -----------------------------
def build_faiss_index(chunks_path):
    # Load model
    model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

    # Load chunks from file
    chunks = []
    with open(chunks_path, "r", encoding="utf-8") as f:
        for line in f:
            chunks.append(json.loads(line))

    # Create FAISS index
    embedding_dim = len(chunks[0]['embedding'])
    index = faiss.IndexFlatL2(embedding_dim)
    embeddings = np.array([chunk['embedding'] for chunk in chunks]).astype('float32')
    index.add(embeddings)

    return model, index, chunks


In [3]:
# -----------------------------
# 2. Retrieve function
# -----------------------------
def retrieve_chunks(query, model, index, chunks, top_k=5):
    query_embedding = model.encode(query).astype('float32')
    D, I = index.search(np.array([query_embedding]), top_k)
    results = [chunks[i] for i in I[0]]
    return results


In [4]:
# -----------------------------
# 3. Load generator model
# -----------------------------
generator = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    tokenizer="google/flan-t5-base",
    device=-1  # CPU; use 0 for GPU
)


Device set to use cpu


In [5]:
# -----------------------------
# 4. Answer generation
# -----------------------------
def generate_answer(query, model, index, chunks, top_k=5):
    
    # Step 1: Retrieve
    retrieved = retrieve_chunks(query, model, index, chunks, top_k=top_k)

    # Step 2: Prepare context
    context = "\n\n".join([f"Source: {r.get('source', 'Unknown')}\n{r['text']}" for r in retrieved])

    # Step 3: Build prompt
    prompt = f"""
    You are an expert eligibility officer.
    Using only the context below, answer the question truthfully.
    If the answer is not in the context, say "I cannot find relevant information."

    Context:
    {context}

    Question: {query}
    Answer:
    """

    # Step 4: Generate
    output = generator(prompt, max_new_tokens=200)

    # Step 5: Collect citations
    #citations = list({r.get('source', 'Unknown') for r in retrieved})
    citations = [f"{r.get('source', 'Unknown')} — {r['text']}" for r in retrieved]

    return output[0]["generated_text"], citations



In [6]:
# -----------------------------
# 5. Run everything
# -----------------------------
chunks_path = r"E:\Info_Srping\swiftvisa\index\chunks_with_embeddings_new.jsonl"
model, index, chunks = build_faiss_index(chunks_path)

query = "Is the student eligible for UK Student Visa?"
answer, citations = generate_answer(query, model, index, chunks, top_k=5)

print("\nFinal Answer:\n", answer)
print("\nCitations:\n", citations)

Token indices sequence length is longer than the specified maximum sequence length for this model (1561 > 512). Running this sequence through the model will result in indexing errors



Final Answer:
 Yes

Citations:
 ['Student and Child Student — page 94 of 107 published for home office staff on 16 july 2025 work conditions this page tells caseworkers what employment a student or child student can undertake in the uk. employment conditions are dependent on the type of sponsor the applicant will be studying at and the level of course they are studying, information on the amount of hours a student can work and the type of employment that is permitted can be found in appendix student st 26. periods of permission after a student has completed their course are considered to be outside of term - time for the purposes of any work conditions. type of sponsor course type level work permitted if study is at : • a higher education provider ( hep ) with a track record of compliance • overseas higher education institution and the student is on a short - term study - abroad programme in the uk full - time course at degree level or above then the following work is permitted : • pa

In [7]:
chunks_path = r"E:\Info_Srping\swiftvisa\index\chunks_with_embeddings_new.jsonl"
model, index, chunks = build_faiss_index(chunks_path)

query = "What are the eligibility requirements for a UK Student Visa?"
answer, citations = generate_answer(query, model, index, chunks, top_k=5)

print("\nFinal Answer:\n", answer)
print("\nCitations:\n", citations)


Final Answer:
 I cannot find relevant information.

Citations:
 ['Student and Child Student — page 9 of 107 published for home office staff on 16 july 2025 validity for entry clearance and permission to stay applications this page tells caseworkers where to find the validity requirements that an applicant must meet when they apply for entry clearance or permission to stay as a student, a child student or a dependant of a student. before considering any application, the caseworker must check the application is valid by referring to : • the validity requirements for the student route, contained in appendix student st 1. 1 to 1. 5 • the validity requirements for the child student route, contained in appendix child student cs 1. 1 to 1. 5 • the validity requirements for dependants of a student, contained in appendix student st 28. 1 to 28. 4 detailed guidance on how to assess the validity requirements can be found in the validation, variation, voiding and withdrawing of applications guida

In [8]:
chunks_path = r"E:\Info_Srping\swiftvisa\index\chunks_with_embeddings_new.jsonl"
model, index, chunks = build_faiss_index(chunks_path)

query = "I am from Canada and applying for a UK Student Visa. Do I need to prove my English language ability?"
answer, citations = generate_answer(query, model, index, chunks, top_k=5)

print("\nFinal Answer:\n", answer)
print("\nCitations:\n", citations)


Final Answer:
 Yes

Citations:
 ['Student and Child Student — • an interview • the sponsor ’ s own test or entrance exam the sponsor must, however, provide details of how they assessed the applicant on the cas. evidence of english language ability all sponsors must assess their prospective students ’ english language ability. the level of english required and the documents to be submitted depends on : • the level of course the applicant is studying • the type of institution at which the applicant is studying verifying documents the caseworker must carry out a verification check if : • they have reasonable doubts that a specified document is not genuine', 'Student and Child Student — page 48 of 107 published for home office staff on 16 july 2025 student : english language requirement this page tells caseworkers about the english language requirement for applicants on the student route. applicants on the student route must meet a required level of english language ability depending on t