<a href="https://colab.research.google.com/github/tiaratatyanadewi/UNGP_Chatbot/blob/main/REVISED_INTEGRATED_RAG_GEN_AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Workaround to avoid following error at notebook
# NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
#Code to mount Google Drive at Colab Notebook instance
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Huggingface libraries to run LLM.
!pip install -q -U transformers==4.40.2
!pip install -q -U accelerate==0.30.1
!pip install -q -U bitsandbytes==0.43.1
!pip install -q -U huggingface_hub==0.23.0

#LangChain related libraries
!pip install -q -U langchain==0.1.2

#Open-source pure-python PDF library capable of splitting, merging, cropping,
#and transforming the pages of PDF files
!pip install -q -U pypdf==4.2.0

#Python framework for state-of-the-art sentence, text and image embeddings.
!pip install -q -U sentence-transformers==2.7.0

# FAISS Vector Databses specific Libraries
!pip install -q -U faiss-gpu==1.7.2

In [None]:
#from typing import List
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig
import torch
from langchain.llms import HuggingFacePipeline

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain


device = 'cuda' if torch.cuda.is_available() else 'cpu'

print("Device:", device)
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

Device: cuda
Tesla T4


In [None]:
import google.generativeai as genai

# Set up your API key
api_key = input("Please enter your Google Generative AI API key: ")
genai.configure(api_key=api_key)

In [None]:
import pandas as pd
import os
from langchain_community.document_loaders.csv_loader import CSVLoader

# Folder path yang berisi file CSV
folder_path = '/content/drive/MyDrive/UNGP'

all_data = []

# Iterasi melalui semua file dalam folder dan memuat CSV file
for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)
        loader = CSVLoader(file_path=file_path)
        data = loader.load()
        all_data.extend(data)

In [None]:
# Search function to find relevant data in the CSV files
def search_csv(query, data):
    results = []
    for row in data:
        if any(query.lower() in str(value).lower() for value in row.values()):
            results.append(row)
    return results

# Set up the HuggingFace embeddings and FAISS retriever
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunked_docs = text_splitter.split_documents(all_data)


In [None]:
import os
import pandas as pd
import google.generativeai as genai
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import torch

In [None]:
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
db = FAISS.from_documents(chunked_docs, embeddings)

retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 4})


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Load the Mistral model
device = 'cuda' if torch.cuda.is_available() else 'cpu'
hf_token = input("Please enter your Hugging Face token: ")
model_path = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_token
)
tokenizer = AutoTokenizer.from_pretrained(model_path, use_auth_token=hf_token)




Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]



In [None]:
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=300,
    temperature=0.3,
    do_sample=True,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
# Create the Conversational Retrieval Chain
qa_chain = ConversationalRetrievalChain.from_llm(mistral_llm, retriever, return_source_documents=True)

In [None]:
# Function to generate a response
def generate_response(query, data):
    matched_data = search_csv(query, data)

    if matched_data:
        matched_text = "\n".join([str(row) for row in matched_data])
        prompt = f"The user asked: '{query}'. Here is the related information from the CSV data:\n\n{matched_text}\n\nPlease provide a summary or answer."
    else:
        prompt = f"The user asked: '{query}', but no relevant information was found in the data."

    model = genai.GenerativeModel(model_name="gemini-1.5-flash")
    chat_session = model.start_chat(history=[])
    response = chat_session.send_message(prompt)

    return response.text

In [None]:
!pip install gradio



In [None]:
import gradio as gr

# Gradio interface function
def chatbot_interface(query):
    # Retrieve response from QA chain
    result = qa_chain.invoke({'question': query, 'chat_history': []})

    if result['answer']:
        # Extract sources
        sources = [doc.metadata['source'] for doc in result['source_documents']]
        source_files = ', '.join(sources)
        answer_with_sources = f"{result['answer']}\n\nSource files: {source_files}"
        return answer_with_sources
    else:
        return generate_response(query, data)

# Gradio interface setup
iface = gr.Interface(
    fn=chatbot_interface,
    inputs=gr.Textbox(label="Input"),
    outputs="text",
    title="UNGP DEMO CHATBOT",
    description="Ask questions about the data and get responses using the Gemini API and Mistral model."
)

iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://06aabd00c47a180c73.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


