In [None]:
# Install necessary packages (if not already installed)
!pip install bitsandbytes accelerate transformers sentence-transformers

In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline

In [None]:
summarizer = pipeline("summarization")

In [None]:
# Load the model and tokenizer
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    quantization_config={"load_in_4bit": True},
    low_cpu_mem_usage=True,
    trust_remote_code=True
)

pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0  # Ensure pipeline uses GPU if available
)

In [None]:
# Load Sentence-BERT model for semantic search
sbert_model = SentenceTransformer('all-MiniLM-L6-v2').to('cuda')

In [None]:
# Load and preprocess the CSV dataset
csv_path = '/content/merged_file.csv'  # Update this with the path to your CSV file
df = pd.read_csv(csv_path)
corpus = df['Merged'].tolist()


In [None]:

# Encode the corpus using Sentence-BERT
corpus_embeddings = sbert_model.encode(corpus, convert_to_tensor=True)

In [None]:
def retrieve_documents(query, top_n=5):
    """Retrieve the top_n most relevant documents from the corpus using semantic search, and summarize them."""
    query_embedding = sbert_model.encode(query, convert_to_tensor=True)
    cos_scores = util.pytorch_cos_sim(query_embedding, corpus_embeddings)[0]
    top_indices = torch.topk(cos_scores, k=top_n).indices

    # Retrieve and summarize documents
    summaries = []
    for i in top_indices:
        document = corpus[i]
        summary = summarizer(document)[0]['summary_text']
        summaries.append(summary)
    return summaries

In [None]:
def chat_with_model():
    print("You can start chatting now. Type 'exit' or 'quit' to end the conversation.")
    messages = [
        {"role": "system", "content": "You are an expert in all fields"}
    ]
    while True:
        user_input = input("User: ")
        if user_input.lower() in ["exit", "quit"]:
            break

        # Retrieve and summarize documents
        retrieved_summaries = retrieve_documents(user_input)

        # Concatenate retrieved summaries into a single context
        context = ' '.join(retrieved_summaries)

        # Add user input to messages
        messages.append({"role": "user", "content": user_input})

        # Create prompt with conversation history and context
        prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages] + [f"Context: {context}"])

        # Tokenize the prompt
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)

        # Move inputs to the same device as the model
        inputs = {k: v.to(model.device) for k, v in inputs.items()}

        # Generate response
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs.get("attention_mask"),  # Pass attention mask if available
            max_new_tokens=1000,
            do_sample=True,
            temperature=0.6,
            top_p=0.9
        )

        # Decode and print the response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Assistant: {response}")

        # Add model response to messages
        messages.append({"role": "assistant", "content": response})


In [None]:
# Start the chat
chat_with_model()