In [None]:
# Installing dependencies
!pip install langchain
!pip install langchain-community
!pip install torch
!pip install transformers
!pip install huggingface-hub
!pip install sentence_transformers
!pip install faiss-cpu
!pip install pypdf
!pip install kagglehub
!pip install -U langchain-huggingface bitsandbytes accelerate
!pip install autoawq
#Using Streamlit Interface : Created a simple web interface
!pip install streamlit 
!npm install localtunnel

In [77]:
# Langchain with Qwen-0.5B model on Kaggle
# importing dependencies
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import kagglehub
import torch
torch.cuda.empty_cache()
# Load PDF files
loader = PyPDFDirectoryLoader('/kaggle/input/resumepdf')
data = loader.load()

# Dividing into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
splits = splitter.split_documents(documents=data)

# Creating embeddings
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

# Create vector stores of chunks by converting it to embeddings
vector_stores = FAISS.from_documents(splits, embedding=embeddings)

# Download and load the different Qwen model and tokenizer
#model_name = kagglehub.model_download("qwen-lm/qwq-32b/transformers/qwq-32b")
model_name = "/kaggle/input/qwen2.5/transformers/0.5b/1"
#model_name = "/kaggle/input/qwen2.5/transformers/3b/1"
#model_name = "/kaggle/input/qwen2.5/transformers/3b-instruct/1"
#model_name = "/kaggle/input/qwen2.5/transformers/1.5b-instruct/1"
#model_name = kagglehub.model_download("qwen-lm/qwq-32b/transformers/qwq-32b-awq")


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create a text generation pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    #depending on maxlength the response may differ- more the token more the context
    max_length=2000,
    num_return_sequences=1,
    do_sample=True,
    temperature=0.01,
    top_p=1
)

# Wrap the pipeline with HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=generator)

#Retrieve relevant documents
retriever = vector_stores.as_retriever(search_kwargs={"k": 2})
query = ":summarize in one paragraph"
docs = retriever.get_relevant_documents(query)

#Combine context into dense format
context = " ".join(doc.page_content.replace("\n", " ").strip() for doc in docs)

#Prepare custom prompt
custom_prompt = f"""
You are a helpful assistant. Given the following context, please answer the question concisely.
Context:{context}
Question: {query}
Answer: """

# Generate the answer
response = generator(custom_prompt, return_full_text=False)[0]["generated_text"]

print(f"context: {context}")
print("--------------------------------")
print(f"question: {query}")
print(f"answer: {response.strip()}")

Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Both `max_new_tokens` (=2048) and `max_length`(=2000) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


context: cycles.  Software  Intern                                                                                                                 Biratnagar,  Nepal   AIT  Technology                                                                                                                          Dec  2020  –  Feb  2021   ●  Completed  a  Bachelor's  thesis  as  part  of  academic  research.  ●  Received  training  in  Python,  JavaScript,  and  web  technologies.  ICT  Trainer         Biratnagar,  Nepal  Biratnagar  Metropolitan  Dec  2018  –  May  2019   ●  Organize  and  clean  collected  data  for  distribution  of  Social  Security  Fund.   ●  Provided  a  3-month  ICT  training  program  for  metropolitan  and  ward  employees.   EDUCATION Santosh  Premi  Adhikari   Wurzburg,  Germany  |  Mob.  015754394063   |   Email |  LinkedIn |  Website  |  Github  An  enthusiastic  software  developer  with  a  background  in  React.js,  Node.js,  and  Python,  who  is  constantly  d

In [91]:
%%writefile app.py
#Using Streamlit Interface : Created a simple web interface
# Importing dependencies
import os
import logging
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFDirectoryLoader
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import kagglehub
import streamlit as st

# Set up logging
logging.basicConfig(level=logging.INFO)

# Load PDF files
def load_pdfs(directory):
    try:
        loader = PyPDFDirectoryLoader(directory)
        data = loader.load()
        logging.info(f"Loaded {len(data)} documents.")
        return data
    except Exception as e:
        logging.error(f"Failed to load PDFs: {e}")
        return []

# Dividing into chunks
def split_documents(documents):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
    splits = splitter.split_documents(documents=documents)
    logging.info(f"Split into {len(splits)} chunks.")
    return splits

# Creating embeddings
def create_embeddings(splits):
    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
    vector_stores = FAISS.from_documents(splits, embedding=embeddings)
    logging.info("Created vector stores.")
    return vector_stores

# Download and load the Qwen-2.5B model and tokenizer
def load_model():
    try:
        model_name = "/kaggle/input/qwen2.5/transformers/0.5b/1"
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype="auto",
            device_map="auto"
        )
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        logging.info("Model and tokenizer loaded successfully.")
        return model, tokenizer
    except Exception as e:
        logging.error(f"Failed to load model and tokenizer: {e}")
        return None, None

# Create a text generation pipeline
def create_pipeline(model, tokenizer):
    try:
        generator = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_length=268,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.01,
            top_p=1
        )
        logging.info("Text generation pipeline created.")
        return generator
    except Exception as e:
        logging.error(f"Failed to create pipeline: {e}")
        return None

# Wrap the pipeline with HuggingFacePipeline
def wrap_pipeline(generator):
    llm = HuggingFacePipeline(pipeline=generator)
    logging.info("Pipeline wrapped with HuggingFacePipeline.")
    return llm

# Create the QA chain
def create_qa_chain(llm, vector_stores):
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_stores.as_retriever(search_kwargs={"k": 2}))
    logging.info("QA chain created.")
    return qa

# Run the query
def run_query(qa, query):
    try:
        result = qa.run(query)
        logging.info(f"Query processed: {query}")
        return result
    except Exception as e:
        logging.error(f"Failed to process query: {e}")
        return str(e)

# Streamlit app
def main():
    st.title("PDF Q/A with Qwen-2.5B")
    
    # File upload
    uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
    
    if uploaded_files:
        # Save uploaded files to a temporary directory
        temp_dir = "/tmp/pdfs"
        os.makedirs(temp_dir, exist_ok=True)
        for file in uploaded_files:
            file_path = os.path.join(temp_dir, file.name)
            with open(file_path, "wb") as f:
                f.write(file.getbuffer())
        
        # Load PDFs
        documents = load_pdfs(temp_dir)
        if not documents:
            st.error("Failed to load PDFs.")
            return
        
        # Split documents
        splits = split_documents(documents)
        if not splits:
            st.error("Failed to split documents.")
            return
        
        # Create embeddings
        vector_stores = create_embeddings(splits)
        if not vector_stores:
            st.error("Failed to create embeddings.")
            return
        
        # Load model and tokenizer
        model, tokenizer = load_model()
        if not model or not tokenizer:
            st.error("Failed to load model and tokenizer.")
            return
        
        # Create pipeline
        generator = create_pipeline(model, tokenizer)
        if not generator:
            st.error("Failed to create pipeline.")
            return
        
        # Wrap pipeline
        llm = wrap_pipeline(generator)
        if not llm:
            st.error("Failed to wrap pipeline.")
            return
        
        # Create QA chain
        qa = create_qa_chain(llm, vector_stores)
        if not qa:
            st.error("Failed to create QA chain.")
            return
        
        # Query input
        query = st.text_input("Enter your query:")
        if st.button("Submit"):
            if query:
                result = run_query(qa, query)
                st.success("Answer:")
                st.markdown(result)
            else:
                st.warning("Please enter a query.")

if __name__ == "__main__":
    main()

Overwriting app.py


In [92]:
print('Use this as a tunnel password:') 
!curl ipv4.icanhazip.com


Use this as a tunnel password:
34.41.103.80


In [None]:
!streamlit run app.py &>./logs.txt & npx localtunnel --port 8501

# --browser.gatherUsageStats false --server.port 8501 --server.address 0.0.0.0

[1G[0K⠙[1G[0Kyour url is: https://long-tools-shake.loca.lt
