In [2]:
pip install PyPDF2 transformers sentence_transformers faiss-cpu requests

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manyli

In [9]:
import os
import google.generativeai as genai
import PyPDF2
from sentence_transformers import SentenceTransformer
import faiss

# Configure API Key directly
api_key = "APIKEY"
genai.configure(api_key=api_key)

# Create the model configuration
generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  system_instruction="You are a helpful assistant designed to provide accurate and detailed information about international student visas, work authorizations, and related queries of International Student Services at the University of North Texas. Use the provided context to generate relevant and helpful responses to user queries.",
)

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

# Preprocess and split text into chunks
def preprocess_text(text, chunk_size=512):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

# Load models
retriever_model = SentenceTransformer('all-MiniLM-L6-v2')

# Convert chunks to embeddings and store in FAISS index
def create_faiss_index(chunks):
    embeddings = retriever_model.encode(chunks, convert_to_tensor=True)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings.cpu().numpy())
    return index, chunks

# Query the FAISS index
def retrieve_relevant_chunks(query, index, chunks, top_k=5):
    query_embedding = retriever_model.encode([query], convert_to_tensor=True)
    _, indices = index.search(query_embedding.cpu().numpy(), top_k)
    return [chunks[i] for i in indices[0]]

# Generate response using retrieved chunks via Google Gemini
def generate_response(query, retrieved_chunks):
    context = " ".join(retrieved_chunks)
    input_text = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"

    chat_session = model.start_chat(
        history=[
            {
                "role": "user",
                "parts": [input_text],
            },
        ]
    )

    response = chat_session.send_message(query)
    return response.text

# Main workflow
pdf_path = '/content/ISSS information.pdf'
text = extract_text_from_pdf(pdf_path)
chunks = preprocess_text(text)
index, chunks = create_faiss_index(chunks)

query = "What are the work authorization options for international students?"
retrieved_chunks = retrieve_relevant_chunks(query, index, chunks)
response = generate_response(query, retrieved_chunks)
print(response)

International students on F-1 and J-1 visas have the following work authorization options:

**On-Campus Work:**

* **F-1 Students:** F-1 students are automatically authorized to work on-campus up to 20 hours per week while classes are in session.
* **J-1 Students:** J-1 students must be authorized by ISSS (International Student & Scholar Services) before they can start any on-campus employment.

**Off-Campus Work:**

* **F-1 Students:**  F-1 students need specific written authorization from ISSS or USCIS (US Citizenship and Immigration Services) to work off-campus. 
* **J-1 Students:** J-1 students may have limited off-campus work options depending on their program and visa requirements. You should contact ISSS to inquire about your specific situation.

**Other Work Options:**

* **Optional Practical Training (OPT):** This allows F-1 students to work in their field of study for up to 12 months after graduation. 
* **Curricular Practical Training (CPT):** This allows F-1 students to wor