In [17]:
# !pip install -Uqqq pip --progress-bar off
# !pip install -qqq torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 --progress-bar off
# !pip install -qqq transformers==4.35.2 --progress-bar off
# !pip install -qqq langchain==0.0.266 --progress-bar off
# !pip install -qqq openai==0.27.4 --progress-bar off
# !pip install -Uqqq watermark==2.3.1 --progress-bar off
# !pip install -Uqqq chromadb==0.4.5 --progress-bar off
# !pip install -Uqqq tiktoken==0.3.3 --progress-bar off
# !pip install -Uqqq youtube-transcript-api==0.5.0 --progress-bar off
# !pip install -Uqqq pytube==12.1.3 --progress-bar off
# !pip install -qqq sentence_transformers==2.2.2 --progress-bar off
# !pip install -qqq InstructorEmbedding==1.0.1  --progress-bar off
# !pip install -qqq xformers==0.0.20  --progress-bar off
# !pip install -Uqqq unstructured[local-inference]==0.5.12 --progress-bar off
# !pip install -Uqqq auto_gptq --progress-bar off

In [18]:
import os
import textwrap
import torch
import chromadb
import langchain
import openai
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, UnstructuredPDFLoader, YoutubeLoader
from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.llms import OpenAI, HuggingFacePipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline, logging, TextStreamer
from langchain.document_loaders.image import UnstructuredImageLoader

In [19]:
def print_response(response: str):
    print("\n".join(textwrap.wrap(response, width=100)))

In [20]:
pdf_loader = UnstructuredPDFLoader("../data/cv/CV _ Đinh Tuấn Nam_BE Developer(.NET)_HCM.pdf")

In [21]:
pdf_pages = pdf_loader.load_and_split()

In [22]:
pdf_pages

[Document(page_content='Dinh Tuan Nam B A C K E N D D E V E L O P E R\n\nPERSONAL DETAILS\n\n0799738634\n\nnamdinhtuan8@gmail.com\n\n26/09/2002\n\nHuynh Man Dat, District 5, TP Hồ Chí Minh\n\ngithub.com/dinhtuannam\n\nABOUT ME\n\nI am college student with a strong passion for technology and programming. My near goal is to become a junior backend developer. I have a good understanding of backend development using ASP.NET core , and I can also work effectively with the frontend development using React Framework with Typescript\n\nEDUCATION\n\nSaiGon University\n\nInformation Technology\n\n10/2020 - NOW\n\nSKILL\n\nIntermediate JWT .NET Core Database\n\nRedis RESTful\n\nSQL server\n\nPostgreSql\n\nC# ReactJS\n\nTypescript\n\nWORK EXPERIENCE\n\nBACK END DEVELOPER\n\n07/2023 - 02/2024\n\nSTVG Co.Ltd\n\nServerWater - Tech : ASP.NET Core , PostgreSQL , SSE - Role : Backend developer - Description: New installation, repair, replacement of water meters. Receive requests and assign tasks. Report

In [23]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=512)
texts = text_splitter.split_documents(pdf_pages)
len(texts)

2

In [24]:
model_name = "hkunlp/instructor-large"

hf_embeddings = HuggingFaceInstructEmbeddings(
    model_name = model_name, cache_folder="../cache") ## , model_kwargs = {'device': 'cuda'}

load INSTRUCTOR_Transformer


max_seq_length  512


In [25]:
db = Chroma.from_documents(texts, hf_embeddings)

In [26]:
model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
# model_name_or_path = "mistralai/Mistral-7B-v0.1"
model_basename = "gpt_model-4bit-128g"


use_triton = False

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, cache_folder="../cache")

# model = AutoGPTQForCausalLM.from_quantized(model_name_or_path, cache_folder="../cache",
#         # model_basename=model_basename,
#         use_safetensors=True,
#         trust_remote_code=True,
#         device='cuda:0',
#         use_triton=use_triton,
#         quantize_config=None)

model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
                                             use_safetensors=True,
                                             device_map = 'cuda',
                                             trust_remote_code=True,
                                             cache_dir="../cache")

In [27]:
streamer = TextStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True)
text_pipeline = pipeline(task = 'text-generation', model = model, tokenizer = tokenizer, streamer = streamer)
llm = HuggingFacePipeline(pipeline = text_pipeline)

In [28]:
def generate_prompt(prompt, sys_prompt):
    return f"[INST] <<SYS>> {sys_prompt} <</SYS>> {prompt} [/INST]"

In [29]:
sys_prompt = "Use following piece of context to answer the question in less than 30 words"
template = generate_prompt(
    """
    {context}

    Question : {question}
    """
    , sys_prompt)

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

In [30]:
chain_type_kwargs = {"prompt": prompt}
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents = True,
    chain_type_kwargs=chain_type_kwargs,
)

In [31]:
result = qa_chain("what projects candidate worked on ?")



 Sure! Here's the answer in less than 30 words:

The candidate worked on two projects: ServerWater and ServerGIS, both using ASP.NET Core and PostgreSQL.


In [32]:
result = qa_chain("where did candidate study?")

 Sure! Here's the answer in less than 30 words:

Candidate studied at SaiGon University, majoring in Information Technology.


In [33]:
result = qa_chain("what skillset candidate has?")

 Sure! Here is the answer to the question in less than 30 words:

Candidate has skills in ASP.NET Core, PostgreSQL, SSE, Redis, RESTful, SQL Server, C#, ReactJS, and Typescript.


In [34]:
model.device

device(type='cuda', index=0)

In [35]:
result = qa_chain("what's the contact detail of candidate?")

 Sure! Here's the answer in less than 30 words:

Contact details: 0799738634 (phone number), namdinhtuan8@gmail.com (email address), Huynh Man Dat, District 5, TP Hồ Chí Minh (address).


In [36]:
result = qa_chain("how many years of work experience candidate has? Include internships and the way you calculate that")

 Sure! Here's the answer in less than 30 words:

The candidate has 1 year and 6 months of work experience, including internships.


In [37]:
result = qa_chain("In which companies with what jobtitle did candidate work?")

 Sure! Here is the answer to your question in less than 30 words:

Candidate worked at STVG Co.Ltd as a Backend Developer and at ServerWater and ServerGIS projects.
