<a href="https://colab.research.google.com/github/kellyshreeve/QA-Chatbot/blob/main/Copy_of_QA_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Question Answer Models**

[Llama 2](#llama)

## **Import Packages and Data**

In [None]:
! pip install transformers
! pip install sentence_transformers
! pip install accelerate
! pip install langchain
! pip install ctransformers
! pip install faiss-gpu
! pip install chromadb

Collecting transformers
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m59.9 MB/s[0m eta [36m0:00:00[0m
Col

In [6]:
import time
from tqdm.auto import tqdm
import pickle
import accelerate
import json
import textwrap

import math
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity

from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords as nltk_stopwords

import torch
import transformers
from transformers import BertForQuestionAnswering, BertTokenizer
from transformers import LlamaForCausalLM, LlamaTokenizer, AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from sentence_transformers import SentenceTransformer

from langchain import HuggingFacePipeline, PromptTemplate, LLMChain
from langchain.memory import ConversationBufferMemory

from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader

import modules.config

In [7]:
from huggingface_hub import login
login(token=config.hugging_face_token)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/kellyshreeve/.cache/huggingface/token
Login successful


In [None]:
start = time.time()

try:
  df_QA = pd.read_csv('/Users/kellyshreeve/Desktop/Data-Sets/Externship/qa_merged_clean.csv',
                    parse_dates=True)
except:
  from google.colab import drive
  drive.mount('/content/drive')
  df_QA = pd.read_csv('/content/drive/MyDrive/Externship/qa_merged_clean.csv')

end = time.time()

print(f'Computation Time: {end - start:.2f}')

Mounted at /content/drive
Computation Time: 18.50


In [None]:
df_QA = pd.read_csv('/content/drive/MyDrive/Externship/qa_merged_clean.csv')

In [None]:
df_QA.info()

In [None]:
print(df_QA.isna().sum())

In [None]:
df_QA=df_QA.reset_index(drop=True)

## **QA Models**

### Create Context

In [None]:
context_short = df_QA.loc[0:100000, 'body_with_sentences_a'].dropna()
context_short = " ".join(context_short)

In [None]:
print(f'Context Length: {len(context_short)} characters')

Context Length: 583419461 characters


In [None]:
with open('/content/drive/MyDrive/Externship/context_short.txt', 'w') as file:
   file.write(context_shortest)

### Llama 2 Generative Chatbot from Context with Memory

In [None]:
start = time.time()

model_id ='TheBloke/Llama-2-7B-chat-GGML'

llm = CTransformers(
  model=model_id,
  model_type='llama',
  max_new_tokens=512,
  temperature=0.5,
  repitition_penalty=1.1,
  top_k = 5,
  config={'context_length':700}
  )

with open('/content/drive/MyDrive/Externship/context_short.txt') as f:
    text = f.read()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=0)
texts = text_splitter.create_documents([text])

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',
                                   model_kwargs={'device':'cuda'})

vectorstore = FAISS.from_documents(texts, embeddings)

custom_prompt_template='''Use the following pieces of information to answer the users question.
If you don't know the answer, please just say you don't know the answer. Don't make up an answer.
You are a helpful assistant, you always only answer for the assistant, then you stop.

Context:{context}
History:{history}
question:{question}

Only returns the helpful answer below and nothing else.
Helpful answer
'''

prompt = PromptTemplate(template=custom_prompt_template, input_variables=['history', 'context', 'question'])

retriever = vectorstore.as_retriever()

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever = retriever,
    chain_type_kwargs={'prompt':prompt,
                       "memory": ConversationBufferMemory(
                                memory_key="history",
                                input_key="question")}
)

end = time.time()

print(f'Computation Time: {end - start:.2f}')

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)ab11864b/config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading (…)chat.ggmlv3.q2_K.bin:   0%|          | 0.00/2.87G [00:00<?, ?B/s]

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Computation Time: 31.17


In [None]:
# 7B-chat Model, top_k = 5, context_shortest.txt
start = time.time()

response = qa_chain.run('what is python?')

end = time.time()

print(response)
print()
print(f'Computation Time: {end - start:.2f}')

Python is a high-level, interpreted programming language with dynamic semantics, created by Guido van Rijn in 1991 at the University of Copenhagen.  It is known for its ease of use, flexibility, and a large and active development community. Python is often used as an all-purpose language, easily scaling up to meet the demands of larger projects, while still being easy to use for simple scripting tasks.

Computation Time: 390.86
