In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from google.colab import userdata
HF_TOKEN = userdata.get('HF_Phi2')

print(f"HuggingFace Token Loaded.")

HuggingFace Token Loaded.


# Integrating FAISS Index with LangChain for RAG
***

In [None]:
!pip install langchain langchain-community langchain-huggingface faiss-cpu sentence-transformers transformers torch

In [4]:
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore
import faiss
import numpy as np
import pickle

In [5]:
# Load FAISS Index
faiss_index_path = '/content/drive/MyDrive/GenAI-CSA/data/embedded/faiss_index_mpnet.index'
faiss_index = faiss.read_index(faiss_index_path)

# Load Embeddings
path_to_embeddings = '/content/drive/MyDrive/GenAI-CSA/data/embedded/embeddings_mpnet.pkl'
with open(path_to_embeddings, 'rb') as file:
    embeddings = pickle.load(file)

print(f"Loaded FAISS index with {faiss_index.ntotal} embeddings.")

Loaded FAISS index with 501606 embeddings.


### Rebuilding LangChain FAISS Object:

In [6]:
# Extract Documents
documents = embeddings['documents']

# Convert into LangChain Document Objects
langchain_docs = [Document(page_content=doc['page_content'], metadata=doc['metadata']) for doc in documents]

# Create Docstore & Mapping for LangChain
docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(langchain_docs)})
index_to_docstore_id = {i: str(i) for i in range(len(langchain_docs))}

# Embedding Model
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-mpnet-base-v2')

# Reconstruct LangChain FAISS Object
faiss_store = FAISS(
    embedding_function=embedding_model,
    index=faiss_index,
    docstore=docstore,
    index_to_docstore_id=index_to_docstore_id
)

print(f"LangChain FAISS store ready with {faiss_store.index.ntotal} embeddings.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/594 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

LangChain FAISS store ready with 501606 embeddings.


### Test Semantic Search w/ LangChain:

In [7]:
# Test Query
query = "Most popular offer."
retrieved_docs = faiss_store.similarity_search(query, k=5)


for i, doc in enumerate(retrieved_docs, 1):
    print(f"Document {i}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

Document 1:
Content: user in multan with a postpaid plan. currently subscribed to offer: offer 49 (offer id: o046). usage details: data browsing allowance of 4990mb, social data allowance of 1813mb, 681 sms, 181 on-net minutes, and 185 off-net minutes. recent transaction on 2024-06-08 11:20:14 with amount charged: 19 units. resource type: data (value: 15). customer support ticket (id: t40586) logged on 2024-01-19 17:15:03 under category: billing. issue description: issue reported under billing category. resolution provided on 2024-01-22 01:15:03: resolved with detailed explanation for billing category.
Metadata: {'city': 'Multan', 'user_type': 'Postpaid', 'offer': 'Offer 49', 'data_allowance': 4990, 'sms_allowance': 681, 'voice_on_net': 181, 'voice_off_net': 185, 'data_social_allowance': 1813, 'amount': 244, 'resource_type': 'Data', 'category': 'Billing'}
Document 2:
Content: user in multan with a postpaid plan. currently subscribed to offer: offer 49 (offer id: o046). usage details: d

In [9]:
# Save LangChain Compatible FAISS Object
faiss_store.save_local('/content/drive/MyDrive/GenAI-CSA/data/embedded/langchain_faiss_index')

# Integrating Phi-2 in RAG Pipeline
***

In [23]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load Phi-2 Model
model_name = "microsoft/phi-2"
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using device: {device}")

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
torch_dtype = torch.float16 if device == "cuda" else torch.float32

print("Tokenizer loaded successfully.")

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch_dtype,
    device_map="auto"
).to(device)

print(f"Model Loaded Successfully on {device}!")

Using device: cuda
Tokenizer loaded successfully.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model Loaded Successfully on cuda!


In [32]:
from langchain.llms.base import LLM
from pydantic import BaseModel, Field
from typing import Optional, List, Any
import torch

class Phi2LLM(LLM, BaseModel):
    model: Any = Field(...)
    tokenizer: Any = Field(...)
    max_length: int = Field(default=512)
    temperature: float = Field(default=0.7)

    @property
    def _llm_type(self) -> str:
        return "phi-2"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        inputs = self.tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=self.max_length,
            padding=True
        )
        input_ids = inputs["input_ids"].to(self.model.device)
        attention_mask = inputs["attention_mask"].to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=64,
                do_sample=True,
                temperature=self.temperature,
                pad_token_id=self.tokenizer.eos_token_id,
                num_return_sequences=1,
                top_p=0.9,
                top_k=50
            )

        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        if stop:
            for token in stop:
                response = response.split(token)[0]

        return response.strip()

# Instantiate Custom Wrapper
phi2_llm = Phi2LLM(model=model, tokenizer=tokenizer)

In [44]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.chains import ConversationalRetrievalChain

# PromptTemplate Implementation for LangChain
engineered_prompt = """
You are an advanced AI-powered customer support agent for a leading telecommunications provider.
Your primary responsibility is to deliver accurate, personalized, and data-driven responses to customer inquiries.
Leverage retrieval-augmented generation (RAG) pipelines and LangChain workflows to combine user-specific data with external knowledge for comprehensive answers.

**Data Context Includes:**
- User Metadata: City, User Type (Prepaid/Postpaid), Offer Name, Data Browsing Allowance, SMS Allowance,
  Voice On-Net and Off-Net Allowances, Social Data Allowance, Amount, Resource Type, Category.
- Usage and Consumption Patterns: Insights from historical consumption data and call detail records (CDRs).
- Purchase History: Offer usage, purchase frequency, and relevant transaction details.
- Support Tickets: Past issues and resolutions for context-aware troubleshooting.

---

**Response Guidelines:**
1. **Strict Data Dependency:** Only answer queries based on available and retrieved context.
   - If data is insufficient, respond with: *"Insufficient data to provide a complete response."*

2. **Analytical Reasoning:** Aggregate and analyze context before responding.
   - Example: For questions like *"Which city has the highest data consumption?"*, aggregate and rank by total usage per city.
   - For behavioral insights, summarize user segments based on consumption patterns.

3. **Recommendation Logic:** Suggest telecom plans or offers only if data indicates relevance.
   - Tailor recommendations to match consumption trends, user preferences, and segmentation profiles.
   - Provide justification for every suggestion based on explicit data points.

4. **Structured JSON Response:** Format responses in the following JSON structure:
{{
    "Answer": "Direct and precise response derived from data and RAG context.",
    "SupportingData": "Key data points and reasoning steps leading to the answer.",
    "Recommendations": "Tailored recommendations (if applicable) supported by contextual analysis."
}}

5. **No Hallucinations:** Never infer information not present in the context.
   - It is acceptable to return *"No data available"* if context retrieval fails.

6. **Precision & Relevance:** Ensure responses are concise, contextually relevant, and personalized.
   - Avoid generic statements; all responses must fit the telecommunications context.

7. **Advanced Reasoning (Phi-2 Adaptation):** Utilize Phi-2’s reasoning capabilities for:
   - Complex pattern analysis (e.g., identifying high-value customers based on multi-factor criteria).
   - Generating actionable insights using RAG-enhanced context retrieval.

---

**Query:** {query}

**Retrieved Context:**
{context}

**Final Response (JSON Structured):**
"""

prompt_template = PromptTemplate(
    input_variables=["query", "context"],
    template=engineered_prompt
)


# Wrapping Phi-2 LLM w/ Prompt
permanent_prompt_chain = LLMChain(
    llm=phi2_llm,
    prompt=prompt_template
)

# Construct RAG Pipeline
rag_pipeline = ConversationalRetrievalChain.from_llm(
    llm=permanent_prompt_chain,
    retriever=faiss_store.as_retriever(search_kwargs={"k": 10}),
    return_source_documents=True
)

### Testing LangChain & RAG Pipeline Implementation:

In [47]:
query = "Which city has the highest number of postpaid customers?"
retrieved_docs = faiss_store.similarity_search(query, k=10)
retrieved_context = "\n".join([doc.page_content for doc in retrieved_docs])

response = permanent_prompt_chain.invoke({
    "query": query,
    "context": retrieved_context
})


print("Generated Response:\n", response)

Generated Response:
 {'query': 'Which city has the highest number of postpaid customers?', 'context': 'user in islamabad with a postpaid plan. currently subscribed to offer: offer 45 (offer id: o013). usage details: data browsing allowance of 2288mb, social data allowance of 1736mb, 268 sms, 289 on-net minutes, and 75 off-net minutes. recent transaction on 2024-05-07 02:33:59 with amount charged: 12 units. resource type: data (value: 43). customer support ticket (id: t45773) logged on 2024-06-01 18:30:27 under category: billing. issue description: issue reported under billing category. resolution provided on 2024-06-01 22:30:27: resolved with detailed explanation for billing category.\nuser in islamabad with a postpaid plan. currently subscribed to offer: offer 45 (offer id: o013). usage details: data browsing allowance of 2288mb, social data allowance of 1736mb, 268 sms, 289 on-net minutes, and 75 off-net minutes. recent transaction on 2024-01-08 21:05:24 with amount charged: 0 units.