In [9]:
# Install required packages
!pip install transformers torch accelerate bitsandbytes langchain-community sentence-transformers chromadb tiktoken pypdf



In [10]:
import pandas as pd
import numpy as np
from typing import Dict, List, Any
import json
from tqdm import tqdm
import os
import pickle
import warnings
warnings.filterwarnings('ignore')

# LangChain imports
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter  # ✅ Correct
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma

In [11]:
import os
import json
from typing import Dict, List
import pandas as pd
import numpy as np
from tqdm import tqdm

# Document and embedding imports
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter  # ✅ Correct import
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma

# Cell 3: Setup paths and load PDF
pdf_file = "/content/Dataset_Flykite_Airlines_HRP.pdf"

# Verify file exists
if os.path.exists(pdf_file):
    print(f"✓ File '{pdf_file}' found.")
else:
    print(f"✗ File '{pdf_file}' NOT found. Please upload it.")

# Initialize PDF loader
pdf_loader = PyPDFLoader(pdf_file)

# Initialize embedding model
embedding_model = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')

# Cell 4: RAG Hyperparameter Tuner Class (your existing class remains the same)
class RAGHyperparameterTuner:
    def __init__(self, base_config: Dict):
        self.base_config = base_config
        self.results = []

    def test_text_splitter_params(self, test_cases: List[Dict]):
        """Test different text splitting configurations"""
        print("Testing Text Splitter Parameters...")

        for params in tqdm(test_cases):
            try:
                # Create text splitter with current params
                text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
                    encoding_name=params.get('encoding_name', 'cl100k_base'),
                    chunk_size=params['chunk_size'],
                    chunk_overlap=params['chunk_overlap']
                )

                # Reload and split documents
                document_chunks = pdf_loader.load_and_split(text_splitter)

                # Create vector store
                vectorstore = Chroma.from_documents(
                    document_chunks,
                    embedding_model,
                    collection_name=f"test_{params['chunk_size']}_{params['chunk_overlap']}",
                    persist_directory="./chroma_db_test"
                )

                retriever = vectorstore.as_retriever(
                    search_type=params.get('search_type', 'similarity'),
                    search_kwargs={'k': params.get('k', 6)}
                )

                # Store configuration
                config = {
                    'test_type': 'text_splitter',
                    'chunk_size': params['chunk_size'],
                    'chunk_overlap': params['chunk_overlap'],
                    'num_chunks': len(document_chunks),
                    'avg_chunk_length': np.mean([len(chunk.page_content) for chunk in document_chunks]),
                    'retriever_config': {
                        'search_type': params.get('search_type', 'similarity'),
                        'k': params.get('k', 6)
                    }
                }

                self.results.append(config)
                vectorstore.delete_collection()

            except Exception as e:
                print(f"Error with params {params}: {e}")

    def test_retrieval_params(self, base_chunk_size: int = 512, base_overlap: int = 16):
        """Test different retrieval configurations"""
        print("Testing Retrieval Parameters...")

        # Create base text splitter
        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            encoding_name='cl100k_base',
            chunk_size=base_chunk_size,
            chunk_overlap=base_overlap
        )

        document_chunks = pdf_loader.load_and_split(text_splitter)

        retrieval_test_cases = [
            {'search_type': 'similarity', 'k': 3},
            {'search_type': 'similarity', 'k': 6},
            {'search_type': 'similarity', 'k': 10},
            {'search_type': 'mmr', 'k': 6, 'fetch_k': 20},
            {'search_type': 'mmr', 'k': 6, 'fetch_k': 30},
            {'search_type': 'similarity_score_threshold', 'k': 6, 'score_threshold': 0.5},
            {'search_type': 'similarity_score_threshold', 'k': 6, 'score_threshold': 0.7},
        ]

        for params in tqdm(retrieval_test_cases):
            try:
                vectorstore = Chroma.from_documents(
                    document_chunks,
                    embedding_model,
                    collection_name=f"retrieval_test",
                    persist_directory="./chroma_db_test"
                )

                search_kwargs = {'k': params['k']}
                if params['search_type'] == 'mmr':
                    search_kwargs['fetch_k'] = params['fetch_k']
                elif params['search_type'] == 'similarity_score_threshold':
                    search_kwargs['score_threshold'] = params['score_threshold']

                retriever = vectorstore.as_retriever(
                    search_type=params['search_type'],
                    search_kwargs=search_kwargs
                )

                config = {
                    'test_type': 'retrieval',
                    'chunk_size': base_chunk_size,
                    'chunk_overlap': base_overlap,
                    'search_type': params['search_type'],
                    'search_kwargs': search_kwargs
                }

                self.results.append(config)
                vectorstore.delete_collection()

            except Exception as e:
                print(f"Error with params {params}: {e}")

    def test_embedding_models(self, document_chunks: List[Document]):
        """Test different embedding models"""
        print("Testing Embedding Models...")

        embedding_models = [
            {'name': 'all-MiniLM-L6-v2', 'dimensions': 384},
            {'name': 'all-mpnet-base-v2', 'dimensions': 768},
            {'name': 'multi-qa-MiniLM-L6-cos-v1', 'dimensions': 384},
            {'name': 'paraphrase-MiniLM-L3-v2', 'dimensions': 384},
        ]

        for model_config in tqdm(embedding_models):
            try:
                test_embedding_model = SentenceTransformerEmbeddings(
                    model_name=model_config['name']
                )

                vectorstore = Chroma.from_documents(
                    document_chunks,
                    test_embedding_model,
                    collection_name=f"embedding_test_{model_config['name']}",
                    persist_directory="./chroma_db_test"
                )

                retriever = vectorstore.as_retriever(
                    search_type='similarity',
                    search_kwargs={'k': 6}
                )

                config = {
                    'test_type': 'embedding',
                    'embedding_model': model_config['name'],
                    'dimensions': model_config['dimensions'],
                    'chunk_size': 512,
                    'chunk_overlap': 16
                }

                self.results.append(config)
                vectorstore.delete_collection()

            except Exception as e:
                print(f"Error with model {model_config['name']}: {e}")

    def save_results(self, filename: str = "rag_hyperparameter_results.json"):
        """Save all results to JSON file"""
        with open(filename, 'w') as f:
            json.dump(self.results, f, indent=2)
        print(f"✓ Results saved to {filename}")

    def generate_report(self):
        """Generate a comprehensive report"""
        df = pd.DataFrame(self.results)

        print("\n=== RAG Hyperparameter Tuning Report ===")
        print(f"Total configurations tested: {len(df)}")

        if not df.empty:
            # Group by test type
            for test_type in df['test_type'].unique():
                test_df = df[df['test_type'] == test_type]
                print(f"\n--- {test_type.upper()} Results ---")
                print(test_df.to_string(index=False))

        return df

# Cell 5: Setup test cases (your existing function remains the same)
def setup_test_cases():
    """Setup comprehensive test cases for RAG tuning"""

    text_splitter_cases = [
        # Small chunks
        {'chunk_size': 256, 'chunk_overlap': 32},
        {'chunk_size': 256, 'chunk_overlap': 64},

        # Medium chunks (default range)
        {'chunk_size': 512, 'chunk_overlap': 64},
        {'chunk_size': 512, 'chunk_overlap': 128},

        # Large chunks
        {'chunk_size': 1024, 'chunk_overlap': 128},
        {'chunk_size': 1024, 'chunk_overlap': 256},

        # Very large chunks
        {'chunk_size': 2048, 'chunk_overlap': 256},
    ]

    return text_splitter_cases

# Sample questions for evaluation
test_questions = [
    "What are the effects on the benefits I receive if my probation is extended?",
    "There has been a demise in my family last night, and I need to attend the last rites. How should I inform the office, and will I be granted leave?",
    "What should I do if I notice suspected harassment with my female colleague?"
]

# Cell 6: Run the tuning experiments
# Initialize the tuner
tuner = RAGHyperparameterTuner(base_config={})

# Get test cases
test_cases = setup_test_cases()

# Run text splitter tests
tuner.test_text_splitter_params(test_cases)

# Run retrieval tests
tuner.test_retrieval_params(base_chunk_size=512, base_overlap=64)

# Run embedding model tests (with a standard chunk size)
standard_text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    encoding_name='cl100k_base',
    chunk_size=512,
    chunk_overlap=64
)
standard_chunks = pdf_loader.load_and_split(standard_text_splitter)
tuner.test_embedding_models(standard_chunks)

# Generate report
report_df = tuner.generate_report()

# Save results
tuner.save_results("rag_hyperparameter_tuning_results.json")

print("\n✓ All hyperparameter tuning completed!")
print(f"✓ Tested {len(tuner.results)} configurations")

✓ File '/content/Dataset_Flykite_Airlines_HRP.pdf' found.
Testing Text Splitter Parameters...


100%|██████████| 7/7 [00:15<00:00,  2.15s/it]


Testing Retrieval Parameters...


100%|██████████| 7/7 [00:01<00:00,  4.73it/s]


Testing Embedding Models...


 25%|██▌       | 1/4 [00:03<00:10,  3.40s/it]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

 50%|█████     | 2/4 [00:18<00:20, 10.45s/it]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/383 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

 75%|███████▌  | 3/4 [00:30<00:11, 11.01s/it]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/69.6M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

100%|██████████| 4/4 [00:42<00:00, 10.53s/it]


=== RAG Hyperparameter Tuning Report ===
Total configurations tested: 18

--- TEXT_SPLITTER Results ---
    test_type  chunk_size  chunk_overlap  num_chunks  avg_chunk_length                      retriever_config search_type search_kwargs embedding_model  dimensions
text_splitter         256             32        35.0        551.685714 {'search_type': 'similarity', 'k': 6}         NaN           NaN             NaN         NaN
text_splitter         256             64        36.0        587.861111 {'search_type': 'similarity', 'k': 6}         NaN           NaN             NaN         NaN
text_splitter         512             64        19.0        991.684211 {'search_type': 'similarity', 'k': 6}         NaN           NaN             NaN         NaN
text_splitter         512            128        19.0       1042.947368 {'search_type': 'similarity', 'k': 6}         NaN           NaN             NaN         NaN
text_splitter        1024            128        14.0       1300.714286 {'search_




In [12]:
# Load the results
results_file = "/content/rag_hyperparameter_tuning_results.json"
with open(results_file, 'r') as f:
    results = json.load(f)

# Convert to DataFrame for analysis
df = pd.DataFrame(results)

print("=== RAG Hyperparameter Tuning Results Summary ===")
print(f"Total configurations tested: {len(df)}")
print("\nText Splitter Configurations:")
text_splitter_df = df[df['test_type'] == 'text_splitter']
print(text_splitter_df[['chunk_size', 'chunk_overlap', 'num_chunks', 'avg_chunk_length']].to_string(index=False))

print("\nRetrieval Configurations:")
retrieval_df = df[df['test_type'] == 'retrieval']
print(retrieval_df[['search_type', 'search_kwargs']].to_string(index=False))

print("\nEmbedding Model Configurations:")
embedding_df = df[df['test_type'] == 'embedding']
print(embedding_df[['embedding_model', 'dimensions']].to_string(index=False))

=== RAG Hyperparameter Tuning Results Summary ===
Total configurations tested: 18

Text Splitter Configurations:
 chunk_size  chunk_overlap  num_chunks  avg_chunk_length
        256             32        35.0        551.685714
        256             64        36.0        587.861111
        512             64        19.0        991.684211
        512            128        19.0       1042.947368
       1024            128        14.0       1300.714286
       1024            256        14.0       1300.714286
       2048            256        14.0       1300.714286

Retrieval Configurations:
               search_type                    search_kwargs
                similarity                         {'k': 3}
                similarity                         {'k': 6}
                similarity                        {'k': 10}
                       mmr          {'k': 6, 'fetch_k': 20}
                       mmr          {'k': 6, 'fetch_k': 30}
similarity_score_threshold {'k': 6, 'score_t

In [13]:
# Initialize the best configuration components
best_chunk_size = 512
best_chunk_overlap = 64
best_search_type = 'similarity'
best_k = 6
best_embedding_model = 'all-MiniLM-L6-v2'  # or 'all-mpnet-base-v2' for higher quality

# Load PDF and create vectorstore with best configuration
pdf_loader = PyPDFLoader("/content/Dataset_Flykite_Airlines_HRP.pdf")
best_text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    encoding_name='cl100k_base',
    chunk_size=best_chunk_size,
    chunk_overlap=best_chunk_overlap
)

# Load and split documents
documents = pdf_loader.load_and_split(best_text_splitter)

# Initialize embedding model
embedding_model = SentenceTransformerEmbeddings(model_name=best_embedding_model)

# Create vectorstore
vectorstore = Chroma.from_documents(
    documents,
    embedding_model,
    collection_name="best_rag_config",
    persist_directory="./chroma_db_best"
)

# Create retriever
retriever = vectorstore.as_retriever(
    search_type=best_search_type,
    search_kwargs={'k': best_k}
)

print(f"✓ Vectorstore created with {len(documents)} chunks")
print(f"✓ Best configuration: chunk_size={best_chunk_size}, overlap={best_chunk_overlap}, search_type={best_search_type}, k={best_k}, embedding={best_embedding_model}")

✓ Vectorstore created with 19 chunks
✓ Best configuration: chunk_size=512, overlap=64, search_type=similarity, k=6, embedding=all-MiniLM-L6-v2


In [14]:
# Import transformers and torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import BitsAndBytesConfig

In [20]:
# Import all necessary libraries
import json
import pandas as pd
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import BitsAndBytesConfig

# Load the results from your hyperparameter tuning
results_file = "/content/rag_hyperparameter_tuning_results.json"
with open(results_file, 'r') as f:
    results = json.load(f)

# Define the best configuration based on analysis
best_chunk_size = 512
best_chunk_overlap = 64
best_search_type = 'similarity'
best_k = 6
best_embedding_model = 'all-MiniLM-L6-v2'

print("="*60)
print("RAG HYPERPARAMETER TUNING RESULTS")
print("="*60)
print(f"Best Configuration:")
print(f"  • Chunk Size: {best_chunk_size}")
print(f"  • Chunk Overlap: {best_chunk_overlap}")
print(f"  • Search Type: {best_search_type}")
print(f"  • k (retrieved docs): {best_k}")
print(f"  • Embedding Model: {best_embedding_model}")
print("="*60)

# Load PDF and create vectorstore with best configuration
pdf_file = "/content/Dataset_Flykite_Airlines_HRP.pdf"
pdf_loader = PyPDFLoader(pdf_file)
best_text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    encoding_name='cl100k_base',
    chunk_size=best_chunk_size,
    chunk_overlap=best_chunk_overlap
)

# Load and split documents
documents = pdf_loader.load_and_split(best_text_splitter)

# Initialize embedding model
embedding_model = SentenceTransformerEmbeddings(model_name=best_embedding_model)

# Create vectorstore
vectorstore = Chroma.from_documents(
    documents,
    embedding_model,
    collection_name="best_rag_config",
    persist_directory="./chroma_db_best"
)

# Create retriever
retriever = vectorstore.as_retriever(
    search_type=best_search_type,
    search_kwargs={'k': best_k}
)

print(f"✓ Vectorstore created with {len(documents)} chunks")
print(f"✓ Best configuration implemented successfully")

# Test questions
test_questions = [
    "What are the effects on the benefits I receive if my probation is extended?",
    "There has been a demise in my family last night, and I need to attend the last rites. How should I inform the office, and will I be granted leave?",
    "What should I do if I notice suspected harassment with my female colleague?"
]

# Load Qwen 7B Instruct model with 4-bit quantization for memory efficiency
model_name = "Qwen/Qwen2-7B-Instruct"

print("\n" + "="*60)
print("LOADING Qwen 7B Instruct Model")
print("="*60)

# Configure 4-bit quantization to save memory
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

try:
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quantization_config,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.float16  # Use float16 for efficiency
    )
    print(f"✓ {model_name} model loaded successfully with 4-bit quantization")
except Exception as e:
    print(f"⚠️ Error loading 7B model: {e}")
    print("Falling back to 1.5B model...")
    model_name = "Qwen/Qwen2-1.5B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )
    print(f"✓ Loaded fallback model: {model_name}")

def get_rag_response(question, retriever, model, tokenizer):
    """
    Get RAG response using retrieved documents and LLM
    """
    # Use invoke method to retrieve documents
    retrieved_docs = retriever.invoke(question)

    # Combine retrieved documents into context
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])

    # Create prompt for Qwen model
    prompt = f"<|system|>\nYou are an HR assistant for Flykite Airlines. Use the provided context to answer questions accurately.\n</|system|>\n<|user|>\nContext: {context}\n\nQuestion: {question}\n</|user|>\n<|assistant|>\n"

    # Tokenize and generate response
    inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=3072)

    # Move to GPU if available
    if torch.cuda.is_available():
        inputs = inputs.to(model.device)

    # Generate response with appropriate parameters for Qwen
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=512,
            temperature=0.1,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            max_length=min(len(inputs[0]) + 512, 4096)  # Prevent exceeding max length
        )

    # Decode response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Extract just the answer part (after the assistant tag)
    if "<|assistant|>" in response:
        answer = response.split("<|assistant|>")[-1].strip()
    else:
        answer = response[len(prompt):].strip()

    # Clean up the answer by removing any model-specific tags that might remain
    answer = answer.split("<|user|>")[0].split("</s>")[0].strip()

    return answer, retrieved_docs

# Test all questions with improved formatting
print("\n" + "="*60)
print("RAG TESTING WITH Qwen MODEL")
print("="*60)

for i, question in enumerate(test_questions, 1):
    print(f"\n📋 QUESTION {i}:")
    print("-" * 40)
    print(f"{question}")
    print("-" * 40)

    try:
        answer, retrieved_docs = get_rag_response(question, retriever, model, tokenizer)
        print(f"\n✅ RAG ANSWER:")
        print(f"   {answer}")

        print(f"\n📚 RETRIEVED DOCUMENTS ({len(retrieved_docs)}):")
        print("   " + "-" * 35)
        for j, doc in enumerate(retrieved_docs[:2], 1):  # Show first 2 documents
            print(f"   [{j}] {doc.page_content[:150]}...")
            if len(doc.page_content) > 150:
                print(f"       ... (truncated)")
        if len(retrieved_docs) > 2:
            print(f"   ... and {len(retrieved_docs) - 2} more documents")
        print("   " + "-" * 35)

    except Exception as e:
        print(f"\n❌ Error processing question {i}: {e}")

    print("\n" + "="*60)

# Clean up vectorstore
vectorstore.delete_collection()
print("\n🎉 RAG TESTING COMPLETED SUCCESSFULLY!")
print("="*60)

RAG HYPERPARAMETER TUNING RESULTS
Best Configuration:
  • Chunk Size: 512
  • Chunk Overlap: 64
  • Search Type: similarity
  • k (retrieved docs): 6
  • Embedding Model: all-MiniLM-L6-v2
✓ Vectorstore created with 19 chunks
✓ Best configuration implemented successfully

LOADING Qwen 7B Instruct Model


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Both `max_new_tokens` (=512) and `max_length`(=2472) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


✓ Qwen/Qwen2-7B-Instruct model loaded successfully with 4-bit quantization

RAG TESTING WITH Qwen MODEL

📋 QUESTION 1:
----------------------------------------
What are the effects on the benefits I receive if my probation is extended?
----------------------------------------


Both `max_new_tokens` (=512) and `max_length`(=2988) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



✅ RAG ANSWER:
   If your probation is extended, you will not be eligible for certain benefits while on probation, including:

- Annual leave encashment
- Internal role transfers
- Performance bonuses

Seniority accrual also starts only after successful probation completion. If you have two or more extensions in different roles due to internal transfers, HR will assess contract renewal eligibility based on your performance history; no automatic carry-over is granted.

📚 RETRIEVED DOCUMENTS (6):
   -----------------------------------
   [1] their
 
oﬃcial
 
start
 
date.
 
 ●  For  technical,  safety-critical,  or  senior  management  roles,  probation  is  120  
calendar
 
days
.
 
 ●  A...
       ... (truncated)
   [2] ●  Extensions  are  granted  only  if :  
 
a.
 
The
 
employee
 
has
 
achieved
 
at
 
least
 
60%
 
of
 
their
 
probationary
 
objectives.
 
 
b.
 ...
       ... (truncated)
   ... and 4 more documents
   -----------------------------------


📋 QUESTION 2:
----------

Both `max_new_tokens` (=512) and `max_length`(=2108) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



✅ RAG ANSWER:
   To inform the office about the situation, you should follow these steps:

1. **Inform Your Supervisor**: As per the notification process outlined in the policy, you must inform your direct supervisor at least 48 hours before planned leave, except for emergencies. Since this is a sudden emergency, you can make a verbal notification within 6 hours of the incident.

2. **Submit Required Documentation**: You will need to provide documentation upon your return to work. This includes a death certificate, funeral notice, or obituary (bereavement).

3. **Follow the Leave Duration Limit**: The policy allows up to 5 consecutive working days of bereavement leave per incident.

4. **Notify HR**: Ensure that you also notify Human Resources about the situation so they can approve your leave according to the company's policies.

5. **Compliance with Additional Conditions**: If you are on probation, your leave request might be limited to a maximum of 2 consecutive working days for be

### create app.py file and copy below cells code to app.py

In [None]:
import streamlit as st
import json
import pandas as pd
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import BitsAndBytesConfig
import os

# Set page config
st.set_page_config(
    page_title="Flykite Airlines HR Assistant",
    page_icon="✈️",
    layout="wide"
)

# Initialize session state
if 'vectorstore' not in st.session_state:
    st.session_state.vectorstore = None
if 'retriever' not in st.session_state:
    st.session_state.retriever = None
if 'model' not in st.session_state:
    st.session_state.model = None
if 'tokenizer' not in st.session_state:
    st.session_state.tokenizer = None
if 'chat_history' not in st.session_state:
    st.session_state.chat_history = []

# App title and description
st.title("✈️ Flykite Airlines HR Assistant")
st.markdown("""
This AI assistant helps answer HR-related questions based on Flykite Airlines' HR policies.
Ask questions about benefits, leave policies, harassment procedures, and more.
""")

# Sidebar configuration
st.sidebar.header("Configuration")
st.sidebar.markdown("**Model Configuration:**")
model_option = st.sidebar.selectbox(
    "Select Model Size",
    ["Qwen2-1.5B-Instruct", "Qwen2-7B-Instruct"],
    index=1  # Default to 7B
)

st.sidebar.markdown("**RAG Configuration:**")
chunk_size = st.sidebar.slider("Chunk Size", 256, 2048, 512, step=128)
chunk_overlap = st.sidebar.slider("Chunk Overlap", 16, 256, 64, step=16)
k_retrieved = st.sidebar.slider("Number of Retrieved Documents", 1, 10, 6)

# Initialize or update RAG system
@st.cache_resource
def initialize_rag_system():
    """Initialize the RAG system with best configuration"""
    st.info("Loading RAG system...")

    # Load PDF
    pdf_file = "Dataset_Flykite_Airlines_HRP.pdf"  # Update with your PDF path

    # If PDF doesn't exist, show error
    if not os.path.exists(pdf_file):
        st.error(f"PDF file '{pdf_file}' not found!")
        return None, None, None, None

    pdf_loader = PyPDFLoader(pdf_file)

    # Create text splitter with selected configuration
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        encoding_name='cl100k_base',
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )

    # Load and split documents
    documents = pdf_loader.load_and_split(text_splitter)

    # Initialize embedding model
    embedding_model = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')

    # Create vectorstore
    vectorstore = Chroma.from_documents(
        documents,
        embedding_model,
        collection_name="flykite_hr_docs"
    )

    # Create retriever
    retriever = vectorstore.as_retriever(
        search_type='similarity',
        search_kwargs={'k': k_retrieved}
    )

    # Load model based on selection
    model_name = f"Qwen/Qwen2-{model_option}"

    # Configure 4-bit quantization for memory efficiency
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4"
    )

    try:
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=quantization_config,
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.float16
        )
    except Exception as e:
        st.error(f"Error loading model: {e}")
        st.info("Falling back to smaller model...")
        model_name = "Qwen/Qwen2-1.5B-Instruct"
        tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )

    st.success("RAG system loaded successfully!")
    return vectorstore, retriever, model, tokenizer

# Function to get RAG response
def get_rag_response(question, retriever, model, tokenizer):
    """Get RAG response using retrieved documents and LLM"""
    try:
        # Retrieve relevant documents
        retrieved_docs = retriever.invoke(question)

        # Combine retrieved documents into context
        context = "\n\n".join([doc.page_content for doc in retrieved_docs])

        # Create prompt for Qwen model
        prompt = f"<|system|>\nYou are an HR assistant for Flykite Airlines. Use the provided context to answer questions accurately.\n</|system|>\n<|user|>\nContext: {context}\n\nQuestion: {question}\n</|user|>\n<|assistant|>\n"

        # Tokenize and generate response
        inputs = tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=3072)

        # Move to GPU if available
        if torch.cuda.is_available():
            inputs = inputs.to(model.device)

        # Generate response with appropriate parameters for Qwen
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=512,
                temperature=0.1,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id,
                max_length=min(len(inputs[0]) + 512, 4096)
            )

        # Decode response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract just the answer part (after the assistant tag)
        if "<|assistant|>" in response:
            answer = response.split("<|assistant|>")[-1].strip()
        else:
            answer = response[len(prompt):].strip()

        # Clean up the answer
        answer = answer.split("<|user|>")[0].split("</s>")[0].strip()

        return answer, retrieved_docs
    except Exception as e:
        return f"Error processing request: {str(e)}", []

# Initialize RAG system
if st.session_state.vectorstore is None:
    with st.spinner("Initializing RAG system... This may take a few minutes."):
        st.session_state.vectorstore, st.session_state.retriever, st.session_state.model, st.session_state.tokenizer = initialize_rag_system()

# Main interface
col1, col2 = st.columns([3, 1])

with col1:
    # Chat interface
    st.subheader("Ask a Question")

    # Example questions
    example_questions = [
        "What are the effects on the benefits I receive if my probation is extended?",
        "There has been a demise in my family last night, and I need to attend the last rites. How should I inform the office, and will I be granted leave?",
        "What should I do if I notice suspected harassment with my female colleague?"
    ]

    st.markdown("**Try these example questions:**")
    for i, question in enumerate(example_questions):
        if st.button(f"Example {i+1}", key=f"example_{i}"):
            st.session_state.current_question = question

    # User input
    user_input = st.text_input("Enter your HR question:", key="user_input")

    if st.button("Ask", key="ask_button") or user_input:
        if user_input and st.session_state.retriever and st.session_state.model:
            with st.spinner("Thinking..."):
                answer, retrieved_docs = get_rag_response(
                    user_input,
                    st.session_state.retriever,
                    st.session_state.model,
                    st.session_state.tokenizer
                )

                # Add to chat history
                st.session_state.chat_history.append({
                    "question": user_input,
                    "answer": answer,
                    "docs": retrieved_docs
                })

                # Clear input
                st.session_state.user_input = ""

# Display chat history
with col1:
    st.subheader("Chat History")

    for i, chat in enumerate(st.session_state.chat_history):
        with st.expander(f"Q: {chat['question'][:50]}...", expanded=True):
            st.markdown(f"**Question:** {chat['question']}")
            st.markdown(f"**Answer:** {chat['answer']}")

            with st.popover("View Retrieved Documents"):
                st.markdown("**Retrieved Documents:**")
                for j, doc in enumerate(chat['docs']):
                    st.markdown(f"**Document {j+1}:**")
                    st.text_area("", value=doc.page_content, height=100, key=f"doc_{i}_{j}")

with col2:
    st.subheader("Configuration")
    st.markdown(f"**Model:** {model_option}")
    st.markdown(f"**Chunk Size:** {chunk_size}")
    st.markdown(f"**Chunk Overlap:** {chunk_overlap}")
    st.markdown(f"**Retrieved Docs:** {k_retrieved}")

    st.divider()

    st.subheader("About This System")
    st.info("This RAG system uses:")
    st.markdown("- **Vector Database:** Chroma")
    st.markdown("- **Embeddings:** all-MiniLM-L6-v2")
    st.markdown("- **Model:** Qwen2 (with 4-bit quantization)")
    st.markdown("- **Search:** Similarity search")

    if st.session_state.vectorstore:
        st.success("✅ RAG System Ready!")
    else:
        st.warning("⚠️ RAG System not initialized")

# Footer
st.divider()
st.markdown("*Flykite Airlines HR Assistant - Powered by RAG & Qwen*")

## Create a file called requirements.txt:

In [None]:
streamlit
langchain-community
sentence-transformers
chromadb
transformers
torch
accelerate
bitsandbytes
pypdf
tiktoken

In [21]:
Go to Hugging Face Spaces:
Visit huggingface.co/spaces
Click "Create new Space"
Configure the Space:
Name: Choose a name like your-username/flykite-hr-assistant
SDK: Select "Streamlit"
Hardware: Choose "GPU" (recommended for the 7B model) or "CPU" (for smaller models)
Visibility: Public or Private
Upload Files:
Upload your app.py file
Upload your requirements.txt file
Upload your PDF file: Dataset_Flykite_Airlines_HRP.pdf

SyntaxError: invalid decimal literal (ipython-input-4148323609.py, line 7)

- Go to Hugging Face Spaces:
  - Visit huggingface.co/spaces
  - Click "Create new Space"
- Configure the Space:
  - Name: Choose a name like your-username/flykite-hr-assistant
  - SDK: Select "Streamlit"
  - Hardware: Choose "GPU" (recommended for the 7B model) or "CPU" (for smaller models)
  - Visibility: Public or Private
- Upload Files:
  - Upload your app.py file
  - Upload your requirements.txt file
  - Upload your PDF file: Dataset_Flykite_Airlines_HRP.pdf

#  Alternative Streamlit App (More Lightweight) CPU

In [None]:
import streamlit as st
import json
import pandas as pd
from langchain_core.documents import Document
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from transformers import pipeline
import torch
import os

# Set page config
st.set_page_config(
    page_title="Flykite Airlines HR Assistant",
    page_icon="✈️",
    layout="wide"
)

# Initialize session state
if 'vectorstore' not in st.session_state:
    st.session_state.vectorstore = None
if 'retriever' not in st.session_state:
    st.session_state.retriever = None
if 'qa_pipeline' not in st.session_state:
    st.session_state.qa_pipeline = None

# App title and description
st.title("✈️ Flykite Airlines HR Assistant")
st.markdown("""
This AI assistant helps answer HR-related questions based on Flykite Airlines' HR policies.
Ask questions about benefits, leave policies, harassment procedures, and more.
""")

# Sidebar configuration
st.sidebar.header("Configuration")
model_option = st.sidebar.selectbox(
    "Select Model Size",
    ["Qwen2-1.5B-Instruct", "Qwen2-0.5B-Instruct"],  # Smaller models for CPU
    index=0
)

# Configuration options
chunk_size = st.sidebar.slider("Chunk Size", 256, 1024, 512, step=128)
chunk_overlap = st.sidebar.slider("Chunk Overlap", 16, 128, 64, step=16)
k_retrieved = st.sidebar.slider("Number of Retrieved Documents", 1, 10, 6)

# Initialize or update RAG system
@st.cache_resource
def initialize_rag_system(_model_name, chunk_size, chunk_overlap, k_retrieved):
    """Initialize the RAG system with best configuration"""
    st.info("Loading RAG system...")

    # Load PDF
    pdf_file = "Dataset_Flykite_Airlines_HRP.pdf"  # Update with your PDF path
    if not os.path.exists(pdf_file):
        st.error(f"PDF file '{pdf_file}' not found!")
        return None, None, None

    pdf_loader = PyPDFLoader(pdf_file)

    # Create text splitter
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        encoding_name='cl100k_base',
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )

    # Load and split documents
    documents = pdf_loader.load_and_split(text_splitter)

    # Initialize embedding model
    embedding_model = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')

    # Create vectorstore
    vectorstore = Chroma.from_documents(
        documents,
        embedding_model,
        collection_name="flykite_hr_docs"
    )

    # Create retriever
    retriever = vectorstore.as_retriever(
        search_type='similarity',
        search_kwargs={'k': k_retrieved}
    )

    # Create text generation pipeline (more lightweight)
    from transformers import AutoTokenizer, AutoModelForCausalLM

    tokenizer = AutoTokenizer.from_pretrained(_model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        _model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True
    )

    qa_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto",
        torch_dtype=torch.float16
    )

    st.success("RAG system loaded successfully!")
    return vectorstore, retriever, qa_pipeline

# Function to get RAG response
def get_rag_response(question, retriever, qa_pipeline):
    """Get RAG response using retrieved documents and LLM"""
    try:
        # Retrieve relevant documents
        retrieved_docs = retriever.invoke(question)

        # Combine retrieved documents into context
        context = "\n\n".join([doc.page_content for doc in retrieved_docs])

        # Create prompt
        prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"

        # Generate response using pipeline
        result = qa_pipeline(
            prompt,
            max_new_tokens=256,
            temperature=0.1,
            do_sample=True,
            pad_token_id=qa_pipeline.tokenizer.eos_token_id
        )

        # Extract the answer
        response = result[0]['generated_text']
        answer = response[len(prompt):].strip()

        return answer, retrieved_docs
    except Exception as e:
        return f"Error processing request: {str(e)}", []

# Initialize RAG system (only when needed)
if st.session_state.vectorstore is None:
    model_name = f"Qwen/Qwen2-{model_option}"
    with st.spinner("Initializing RAG system... This may take a few minutes."):
        st.session_state.vectorstore, st.session_state.retriever, st.session_state.qa_pipeline = initialize_rag_system(
            model_name, chunk_size, chunk_overlap, k_retrieved
        )

# Main interface
col1, col2 = st.columns([3, 1])

with col1:
    st.subheader("Ask a Question")

    # Example questions
    example_questions = [
        "What are the effects on the benefits I receive if my probation is extended?",
        "There has been a demise in my family last night, and I need to attend the last rites. How should I inform the office, and will I be granted leave?",
        "What should I do if I notice suspected harassment with my female colleague?"
    ]

    st.markdown("**Try these example questions:**")
    for i, question in enumerate(example_questions):
        if st.button(f"Example {i+1}", key=f"example_{i}"):
            st.session_state.current_question = question

    # User input
    user_input = st.text_input("Enter your HR question:", key="user_input")

    if st.button("Ask", key="ask_button") or user_input:
        if user_input and st.session_state.retriever and st.session_state.qa_pipeline:
            with st.spinner("Thinking..."):
                answer, retrieved_docs = get_rag_response(
                    user_input,
                    st.session_state.retriever,
                    st.session_state.qa_pipeline
                )

                # Display results
                st.markdown(f"**Question:** {user_input}")
                st.markdown(f"**Answer:** {answer}")

                with st.expander("View Retrieved Documents"):
                    for j, doc in enumerate(retrieved_docs):
                        st.markdown(f"**Document {j+1}:**")
                        st.text_area("", value=doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content, height=150, key=f"doc_{j}")

with col2:
    st.subheader("Configuration")
    st.markdown(f"**Model:** {model_option}")
    st.markdown(f"**Chunk Size:** {chunk_size}")
    st.markdown(f"**Chunk Overlap:** {chunk_overlap}")
    st.markdown(f"**Retrieved Docs:** {k_retrieved}")

    st.divider()

    st.subheader("About This System")
    st.info("This RAG system uses:")
    st.markdown("- **Vector Database:** Chroma")
    st.markdown("- **Embeddings:** all-MiniLM-L6-v2")
    st.markdown("- **Model:** Qwen2")
    st.markdown("- **Search:** Similarity search")

    if st.session_state.vectorstore:
        st.success("✅ RAG System Ready!")
    else:
        st.warning("⚠️ RAG System not initialized")

# Footer
st.divider()
st.markdown("*Flykite Airlines HR Assistant - Powered by RAG & Qwen*")