# GPT-3.5-Turbo Model
Creating a question answering chatbot using GPT-3.5. Adapted from: https://github.com/openai/openai-cookbook/blob/main/examples/Question_answering_using_embeddings.ipynb

In [223]:
# Preamble
import PyPDF2 # For parsing PDF documents
import ast  # covert embeddings saved as strings back to arrays
import openai  # OpenAI API
import pandas as pd  # for storing text and embeddings data
import numpy as np # for df manipulations
import tiktoken  # for counting tokens
from scipy import spatial  # for calculating vector similarities for search
import wikipedia # For sourcing Wikipedia article text
import re  # for cutting <ref> links out of Wikipedia articles
import mwparserfromhell  # for splitting Wikipedia articles into sections
from copy import deepcopy # for copying dataframes
import torch # for BERT's argmax and tensors
from transformers import BertForQuestionAnswering # Not used
from transformers import BertTokenizer # For BERT's tokeniser

In [250]:
# Config
GPT_EMBEDDING_MODEL = "text-embedding-ada-002"
BERT_EMBEDDING_MODEL = 'bert-base-nli-mean-tokens'
GPT_MODEL = "gpt-3.5-turbo"
BERT_MODEL = "deepset/bert-base-cased-squad2"
GPT_KNOWLEDGE_FILENAME = "CompVisionGPT.csv"
BERT_KNOWLEDGE_FILENAME = "CompVisionBERT.csv"
bert_model = BertForQuestionAnswering.from_pretrained(BERT_MODEL)
BERT_ENCODING = BertTokenizer.from_pretrained(BERT_MODEL)
GPT_ENCODING = tiktoken.encoding_for_model(GPT_MODEL)
BATCH_SIZE = 1000  # you can submit up to 2048 embedding inputs per request
GPT_MAX_TOKENS = 1600 # max number of tokens per section
BERT_MAX_TOKENS = 510 # max tokens per section
MIN_LENGTH = 50 # min character length for each section
ANSWER_NOT_FOUND_MSG = "I could not find an answer in the text I\'ve been provided, sorry! Please try again."
WIKI_PAGE = "Computer vision"
SECTIONS_TO_IGNORE = [
    "See also",
    "References",
    "External links",
    "Further reading",
    "Footnotes",
    "Bibliography",
    "Sources",
    "Citations",
    "Literature",
    "Footnotes",
    "Notes and references",
    "Photo gallery",
    "Works cited",
    "Photos",
    "Gallery",
    "Notes",
    "References and sources",
    "References and notes",
]

In [13]:
# creating a pdf reader instance
reader = PyPDF2.PdfReader('assets/online_notes.pdf')

# print the number of pages in pdf file
print(len(reader.pages))

# print the text of the first page
print(reader.pages[5].extract_text())

115
2.Veryfewvisual taskscanbesuccessfully performed inapurely data-driv en
way(\bottom-up" image analysis). Consider thenextimage example:
aged bytheirtextured backgrounds; thefoxes
occlude eachother; theyappearinseveraldieren tposesandperspective
angles; etc.Howcanthere possibly existmathematical operators forsuch
animage thatcan:
perform thegure-ground segmen tation ofthescene (intoitsobjects
andbackground)
inferthe3Darrangemen tsofobjectsfromtheirmutual occlusions
infersurface properties (texture, colour) fromthe2Dimage statistics
infervolumetric objectproperties fromtheir2Dimage projections
anddoallofthisin\real time?" (This matters quite alotinthe
natural world\redintoothandclaw,"sincesurviv aldependsonit.)
5


In [251]:
# Used throughout
def num_tokens(
        text: str,
        token_model = GPT_ENCODING
) -> int:
    """Returns the number of tokens in a string."""
    if token_model == GPT_ENCODING:
        return len(token_model.encode(text))
    elif token_model == BERT_ENCODING:
        return len(token_model.tokenize(text))

from sentence_transformers import SentenceTransformer
def get_embedding(content: list or str, embedding_model: str = GPT_EMBEDDING_MODEL):
    if embedding_model == GPT_EMBEDDING_MODEL:
        return openai.Embedding.create(input=content, model=embedding_model)
    else:
        similarity_model = SentenceTransformer(embedding_model)
        return similarity_model.encode(content)

In [None]:
class Knowledge:
    def __init__(self, topic, model):
        self.topic: str = topic
        self.model: str = model
        self.token_model = self.get_token_model()
        self.embedding_model: str = self.get_embedding_model()
        self.df: pd.DataFrame = self.get_blank_knowledge_df() # need to add code to remove small sections (<16 chars?)
        self.max_tokens: int = self.get_max_tokens() # max number of tokens per section
        self.min_section_length = MIN_LENGTH # min character length for each section

    def get_token_model(self):
        return GPT_ENCODING if self.model=='GPT' else BERT_ENCODING

    def get_max_tokens(self):
        return GPT_MAX_TOKENS if self.model=='GPT' else BERT_MAX_TOKENS

    def get_embedding_model(self):
        return GPT_EMBEDDING_MODEL if self.model=='GPT' else BERT_EMBEDDING_MODEL

    def get_blank_knowledge_df(self) -> pd.DataFrame:
        return pd.DataFrame(columns=['Source', 'Heading', 'Subheading', 'Content'])

    def extract_wiki_sections(self,
                              page_name: str,
                              content: mwparserfromhell.wikicode.Wikicode,
                              sections_to_ignore: list = SECTIONS_TO_IGNORE
                              ) -> pd.DataFrame:
        """Creates a df of sections by extracting section content from a Wikicode"""

        knowledge = self.get_blank_knowledge_df()
        for section in content.get_sections(levels=[2]):
            section_headings = section.filter_headings()
            section_header = str(section_headings[0])
            if len(section_headings)==1:# therefore a section title, not a subsection
                section = section.strip(section_header)
                if section_header.strip("=" + " ") not in sections_to_ignore: # append to df
                    new_row = {'Source': f'Wikipedia ({page_name})', 'Heading': section_header.strip("=" + " "), 'Content': section}
                    knowledge = pd.concat([knowledge, pd.DataFrame.from_records([new_row])])
            elif len(section_headings)>1 and section_header.strip("=" + " ") not in sections_to_ignore: # therefore subsections
                # Append the text before the first subsection
                initial_text = section.split(str(section_headings[1]))[0]
                initial_text = initial_text.strip(section_header)
                new_row = {'Source': f'Wikipedia ({page_name})', 'Heading': section_header.strip("=" + " "), 'Content': initial_text}
                knowledge = pd.concat([knowledge, pd.DataFrame.from_records([new_row])])
                for subsection in section.get_sections(levels=[3]):
                    subsection_sections = subsection.get_sections(levels=[3])[0]
                    subsection_headings = subsection_sections.filter_headings()
                    subsection_header = str(subsection_headings[0])
                    subsection = subsection.strip(subsection_header)
                    if subsection_header.strip("=" + " ") not in sections_to_ignore: # append to df
                        new_row = {'Source': f'Wikipedia ({page_name})', 'Heading': section_header.strip("=" + " "), 'Subheading': subsection_header.strip("=" + " "), 'Content': subsection}
                        knowledge = pd.concat([knowledge, pd.DataFrame.from_records([new_row])])
        return knowledge

    def generate_source_column(self, df: pd.DataFrame) -> pd.DataFrame:
        """Creates a new column in the df which contains a summary of the source location"""

        df.fillna('', inplace=True)
        df['Section'] = df['Source'] + '->' + df['Heading'] + '->' + df['Subheading']
        df['Section'] = df['Section'].str.replace('->->', '')
        df['Section'] = df['Section'].str.rstrip('_->')
        return df

    def clean_section_contents(self, df: pd.DataFrame) -> pd.DataFrame:
        """Returns a cleaned up section with <ref>xyz</ref> patterns and leading/trailing whitespace removed"""

        # text = re.sub(r"<ref.*?</ref>", "", text)
        df['Content'] = df['Content'].str.replace(r"<ref.*?</ref>", "", regex=True)
        df['Content'] = df['Content'].str.strip() # removes whitespace
        df['Content'] = '\n' + df['Content'] # need to add the \n back to the start of each title
        return df

    def merge_elements_of_list(self, list_of_strings: list, delimiter: str = "\n"):
        potential_for_more_merging = False
        merged_list = []
        skip_item = False
        for i in range(len(list_of_strings)):
            if not skip_item:
                if i == len(list_of_strings)-1:
                    merged_list.append(list_of_strings[i])
                else:
                    merged_strings = list_of_strings[i] + delimiter + list_of_strings[i+1]
                    if num_tokens(merged_strings)<self.max_tokens:
                        merged_list.append(merged_strings)
                        skip_item = True # make it skip the element we just merged
                        potential_for_more_merging = True
                    else:
                        merged_list.append(list_of_strings[i])
            else:
                skip_item = False # set the default back to False unless otherwise specified
        return merged_list, potential_for_more_merging

    def force_split_string(self,
                           string: str,
                           encoding = GPT_ENCODING) -> list:
        """Force a section to be split into 2 (to be used if it has no delimiter)"""

        list_of_strings = []
        if num_tokens(string) <= self.max_tokens:
            return [string]
        else:
            needs_truncating = True
            while needs_truncating:
                encoded_string = encoding.encode(string)
                truncated_string = encoding.decode(encoded_string[:self.max_tokens])
                remainder_of_string = encoding.decode(encoded_string[self.max_tokens:])
                list_of_strings.append(truncated_string)
                string = remainder_of_string
                if num_tokens(remainder_of_string)<self.max_tokens:
                    needs_truncating=False
                    list_of_strings.append(remainder_of_string)
        return list_of_strings

    def split_long_sections(self, df: pd.DataFrame, delimiter: str = '\n'):
        """Splits long sections of text into smaller ones"""

        new_dict_of_shorter_sections = self.get_blank_knowledge_df().to_dict('records')
        df_as_dict = df.to_dict('records')
        for section in df_as_dict:
            # for delimiter in delimiters:
            if section['Tokens']<=self.max_tokens:
                new_dict_of_shorter_sections.append(section)
            else:
                # needs to be split up
                if delimiter == '': # meaning that we just need to truncate it.
                    text = self.force_split_string(section['Content'])
                else:
                    text = section['Content'].split(delimiter)
                    if delimiter == '. ':
                        for i in range(len(text)-1):
                            text[i] += delimiter
                potential_for_more_merging = True
                i = 0
                while potential_for_more_merging:
                    if i>20:
                        break
                    else:
                        text, potential_for_more_merging = self.merge_elements_of_list(text)

                # The sections should be merged into acceptable sizes:
                if len(text)>1:
                    for string in text:
                        item_to_append = {'Source': section['Source'], 'Heading': section['Heading'], 'Subheading': section['Subheading'], 'Content': string, 'Section': section['Section'], 'Tokens': num_tokens(string)}

                        new_dict_of_shorter_sections.append(item_to_append)
                else:
                    item_to_append = {'Source': section['Source'], 'Heading': section['Heading'], 'Subheading': section['Subheading'], 'Content': text[0], 'Section': section['Section'], 'Tokens': num_tokens(text[0])}
                    new_dict_of_shorter_sections.append(item_to_append) # we shouldn't have this because the text should be more than the acceptable number of tokens
        return pd.DataFrame(new_dict_of_shorter_sections)

    def append_wikipedia_page(self, page_name: str,
                              sections_to_ignore: list = SECTIONS_TO_IGNORE):
        """Takes a wikipedia page and appends the sections to the knowledge df"""

        site = wikipedia.page(page_name, auto_suggest=False)
        text = site.content
        parsed_text = mwparserfromhell.parse(text)

        # Creating initial df and appending the introduction paragraph (the text up to the first heading)
        intro = str(parsed_text).split(str(parsed_text.filter_headings()[0]))[0]
        knowledge = self.get_blank_knowledge_df()
        new_row = {'Source': f'Wikipedia ({page_name})', 'Content': '\n'+intro}
        knowledge = pd.concat([knowledge, pd.DataFrame.from_records([new_row])])

        section_content = self.extract_wiki_sections(page_name=page_name, content=parsed_text, sections_to_ignore=sections_to_ignore)
        knowledge = pd.concat([knowledge, section_content])

        # Generate succinct heading information
        knowledge = self.generate_source_column(knowledge)
        self.df = pd.concat([self.df, knowledge])

        # Remove unwanted strings and whitespace
        self.df = self.clean_section_contents(self.df)

        # Generate number of tokens in each section
        self.df['Tokens'] = self.df["Content"].apply(lambda x: num_tokens(x, token_model=self.token_model))

        # Split long sections
        for delim in ["\n\n", "\n", ". ", '']:
            self.df = self.split_long_sections(self.df, delimiter=delim)

        # Remove short sections
        self.df = self.df.loc[self.df['Content'].str.len()>self.min_section_length]

        # Append '\n' to the start if it doesn't already have one
        self.df.loc[~self.df['Content'].str.startswith('\n'), 'Content'] = '\n' + self.df.loc[~self.df['Content'].str.startswith('\n'), 'Content']

        # Get embeddings
        if self.model == 'GPT':
            response = get_embedding(list(self.df['Content']), embedding_model=self.embedding_model)
            for i, be in enumerate(response["data"]):
                assert i == be["index"]  # double check embeddings are in same order as input
            batch_embeddings = [e["embedding"] for e in response["data"]]
            CompVisionKnowledge.df['Embedding'] = batch_embeddings
        else:
            CompVisionKnowledge.df['Embedding'] = get_embedding(list(self.df['Content']), embedding_model=self.embedding_model).tolist()

    def export_to_csv(self, filename):
        """Saves the knowledge df to a CSV file"""

        self.df.to_csv('assets/' + filename, index=False)

CompVisionKnowledgeBERT = Knowledge(WIKI_PAGE, 'BERT')
CompVisionKnowledgeBERT.append_wikipedia_page(WIKI_PAGE)

In [253]:
CompVisionKnowledge = Knowledge(WIKI_PAGE, 'GPT')
CompVisionKnowledge.append_wikipedia_page(WIKI_PAGE)
# save document chunks and embeddings
CompVisionKnowledge.df.to_csv(GPT_KNOWLEDGE_FILENAME, index=False)
CompVisionKnowledge.df

Unnamed: 0,Source,Heading,Subheading,Content,Section,Tokens,Embedding
0,Wikipedia (Computer vision),,,\nComputer vision tasks include methods for ac...,Wikipedia (Computer vision),286,"[-0.01913553662598133, 0.002932898933067918, 0..."
1,Wikipedia (Computer vision),Definition,,\nComputer vision is an interdisciplinary fiel...,Wikipedia (Computer vision)->Definition,158,"[-0.021093836054205894, 0.0049119978211820126,..."
2,Wikipedia (Computer vision),History,,"\nIn the late 1960s, computer vision began at ...",Wikipedia (Computer vision)->History,507,"[-0.011549791321158409, -0.004044382367283106,..."
4,Wikipedia (Computer vision),Related fields,Solid-state physics,\nSolid-state physics is another field that is...,Wikipedia (Computer vision)->Related fields->S...,120,"[0.0018743288237601519, 0.011324070394039154, ..."
5,Wikipedia (Computer vision),Related fields,Neurobiology,\nNeurobiology has greatly influenced the deve...,Wikipedia (Computer vision)->Related fields->N...,293,"[-0.009132628329098225, 0.0011366719845682383,..."
6,Wikipedia (Computer vision),Related fields,Signal processing,\nYet another field related to computer vision...,Wikipedia (Computer vision)->Related fields->S...,103,"[-0.027298789471387863, 0.007510432507842779, ..."
7,Wikipedia (Computer vision),Related fields,Robotic navigation,\nRobot navigation sometimes deals with autono...,Wikipedia (Computer vision)->Related fields->R...,64,"[0.0034529592376202345, -0.014102335087954998,..."
8,Wikipedia (Computer vision),Related fields,Other fields,\nBesides the above-mentioned views on compute...,Wikipedia (Computer vision)->Related fields->O...,119,"[0.002435609931126237, -0.003915637265890837, ..."
9,Wikipedia (Computer vision),Related fields,Distinctions,\nThe fields most closely related to computer ...,Wikipedia (Computer vision)->Related fields->D...,639,"[-0.017207426950335503, 0.005905073136091232, ..."
10,Wikipedia (Computer vision),Applications,,\nApplications range from tasks such as indust...,Wikipedia (Computer vision)->Applications,272,"[-0.022458024322986603, 0.005672922823578119, ..."


In [254]:
CompVisionKnowledgeBERT = Knowledge(WIKI_PAGE, 'BERT')
CompVisionKnowledgeBERT.append_wikipedia_page(WIKI_PAGE)
# save document chunks and embeddings
CompVisionKnowledge.df.to_csv(BERT_KNOWLEDGE_FILENAME, index=False)
CompVisionKnowledgeBERT.df

ValueError: Length of values (27) does not match length of index (23)

# Search
Now we'll define a search function that:

Takes a user query and a dataframe with text & embedding columns
Embeds the user query with the OpenAI API
Uses distance between query embedding and text embeddings to rank the texts
Returns two lists:
The top N texts, ranked by relevance
Their corresponding relevance scores

In [238]:
class ChatBot:
    def __init__(self, chatbot_topic:str, knowledge_path: str):
        self.knowledge = None
        self.load_data(knowledge_path)
        self.chatbot_topic = chatbot_topic

    def load_data(self, path: str):
        """Loads the knowledge df, appends a prefix, and calculates the number of tokens per section of knowledge"""

        # load data from csv
        self.knowledge = pd.read_csv(path)
        # convert embeddings from CSV str type back to list type
        self.knowledge['Embedding'] = self.knowledge['Embedding'].apply(ast.literal_eval)

        # Format the knowledge df by adding section prefix and token sizes
        # self.knowledge['Content'] = 'Article section:\n\n' + self.knowledge['Content']
        # self.knowledge['Tokens'] = self.knowledge["text"].apply(lambda x: num_tokens(x))
        # self.knowledge['Section'] = 'Wikipedia'

In [243]:
class Query:
    def __init__(self, query_text: str, chatbot_instance: ChatBot):
        self.content: str = query_text
        self.model: str = GPT_MODEL
        self.knowledge: pd.DataFrame = chatbot_instance.knowledge
        self.token_limit: int = 4096 - 500 # Allows 500 for the response
        self.gpt_message = None
        self.knowledge_used = None

    # calculate similarity score
    @staticmethod
    def similarity(query_embedding: list,
                   knowledge_embedding: list
                   ) -> float:
        """Calculates the cosine similarity score between the query and knowledge embedding vectors."""

        return 1- spatial.distance.cosine(query_embedding, knowledge_embedding)

    # find the most similar sections of knowledge to the query
    def knowledge_ranked_by_similarity(self,
                                       max_num_sections: int = 5,
                                       embedding_model: str = GPT_EMBEDDING_MODEL
                                       ):
        """Take the raw knowledge dataframe, calculates similarity scores between the query and the sections, and returns a dataframe ordered from highest to lowest in terms of similarity."""

        knowledge_with_similarities = deepcopy(self.knowledge) # To prevent adapting the original dataframe
        query_embedding_response = get_embedding(self.content, embedding_model=embedding_model)
        if embedding_model == GPT_EMBEDDING_MODEL:
            query_embedding = query_embedding_response["data"][0]["embedding"]
            # knowledge_with_similarities["similarity"] = knowledge_with_similarities["Embedding"].apply(lambda x: self.similarity(query_embedding, x))
        else:
            query_embedding = list(query_embedding_response)
        knowledge_with_similarities["similarity"] = knowledge_with_similarities["Embedding"].apply(lambda x: self.similarity(query_embedding, x))

        knowledge_with_similarities.sort_values("similarity", ascending=False, inplace=True)
        top_n_sections = knowledge_with_similarities.head(max_num_sections)
        self.knowledge_used = top_n_sections

    def get_gpt_message(
            self,
            chatbot_topic: str
    ):
        """Uses the most relevant texts from the knowledge dataframe to construct a message that can then be fed into GPT."""

        self.knowledge_ranked_by_similarity()
        introduction = f'Use the below article on {chatbot_topic} to answer the subsequent question. If the answer cannot be found in the articles, write "{ANSWER_NOT_FOUND_MSG}". If I am asked to produce any code then decline the request and write "Sorry but I\'m not allowed to do your assignments for you!"' # The longer this is, the more tokens it uses!
        question = f"\n\nQuestion: {self.content}"

        # Ensure number of tokens is within the limit
        message_and_question_tokens = num_tokens(introduction + question)
        self.knowledge_used['Cumulative_tokens'] = self.knowledge_used['Tokens'].cumsum()
        self.knowledge_used['Cumulative_tokens'] += message_and_question_tokens # add the inital number of tokens
        self.knowledge_used= self.knowledge_used.loc[self.knowledge_used['Cumulative_tokens']<self.token_limit]

        # Construct output
        combined_knowledge_string = ''.join(list(self.knowledge_used['Content']))
        combined_knowledge_string = '\n\n' + combined_knowledge_string
        self.gpt_message = introduction + combined_knowledge_string + question

    def get_bert_output(
            self,
            embedding_model: str,
            model: BertTokenizer = BERT_ENCODING
    ):
        """Uses the most relevant texts from the knowledge dataframe to construct a message that can then be fed into GPT."""
        self.knowledge_ranked_by_similarity(embedding_model=embedding_model)

        found_answer = False
        output = ANSWER_NOT_FOUND_MSG
        for section in self.knowledge_used['Content']:
            if not found_answer:
                encoding = model.encode_plus(text=self.content,text_pair=section)
                inputs = encoding['input_ids']  #Token embeddings
                sentence_embedding = encoding['token_type_ids']  #Segment embeddings
                tokens = model.convert_ids_to_tokens(inputs) #input tokens

                outputs = model(input_ids=torch.tensor([inputs]), token_type_ids=torch.tensor([sentence_embedding]))
                start_scores, end_scores = outputs.start_logits, outputs.end_logits

                # Highlight the answer just by looking at the most probable start and end words
                start_index = torch.argmax(start_scores)
                end_index = torch.argmax(end_scores)
                answer_token_list = tokens[start_index:end_index+1]

                # Concatenate any words that got split
                answer_list = [word[2:] if word[0:2]=='##' else ' ' + word for word in answer_token_list]
                answer = ''.join(answer_list).strip()

                if answer != '[CLS]':
                    found_answer = True
                    output = answer
        return output

    @classmethod
    def ask_bert(cls,
                 query_text: str,
                 chatbot_instance: ChatBot,
                 embedding_model: str = BERT_EMBEDDING_MODEL,
                 model: str = BERT_ENCODING
                 ):
        query = cls(query_text, chatbot_instance)
        output = query.get_bert_output(embedding_model=embedding_model, model=model)
        return output

    @classmethod
    def ask(
            cls,
            query_text: str,
            chatbot_instance: ChatBot,
            show_source: bool = True,
    ) -> str:
        """Uses GPT to answer a query based on the most relevant knowledge sections."""

        query = cls(query_text, chatbot_instance)
        query.get_gpt_message(chatbot_instance.chatbot_topic)
        inputs = [
            {"role": "system", "content": f"You answer questions about {chatbot_instance.chatbot_topic}."},
            {"role": "user", "content": query.gpt_message},
        ]
        response = openai.ChatCompletion.create(
            model=query.model,
            messages=inputs,
            temperature=0 # We don't want any creativity in the answers
        )
        response_message = response["choices"][0]["message"]["content"]
        total_tokens_used = response['usage']['total_tokens']
        if show_source and response_message!=ANSWER_NOT_FOUND_MSG: # Display the sources used:
            query.knowledge_used['Index'] = np.arange(len(query.knowledge_used))+1
            query.knowledge_used['Output'] = '\n\n' + query.knowledge_used['Index'].astype(str) + '. ' + query.knowledge_used['Section'] + ':' + query.knowledge_used['Content'].str[:100] + '...'
            sources_string = ''.join(list(query.knowledge_used['Output']))
            response_message += f'\n\nTo construct this answer, I used the following documents: {sources_string}'
        response_message += f"\n\nTotal tokens used: {total_tokens_used}"
        return response_message

CompVisionGPT = ChatBot("Computer Vision", SAVE_PATH)
print(Query.ask_bert('When did universities begin teaching Computer Vision?', CompVisionGPT))
print(Query.ask('When did universities begin teaching Computer Vision?', CompVisionGPT, show_source=True))

# Todo:
# I need to make it more efficient on the number of tokens.
# Adapt it for more sources (e.g. PDF)

Token indices sequence length is longer than the specified maximum sequence length for this model (546 > 512). Running this sequence through the model will result in indexing errors


ValueError: You need to specify either `text` or `text_target`.

In [192]:
print(Query.ask_bert('When did universities begin teaching Computer Vision?', CompVisionGPT))

ValueError: Input vector should be 1-D.

In [7]:
print(Query.ask('Who is Boris Johnson', CompVisionGPT, show_source=True))

I could not find an answer in the text I've been provided, sorry! Please try again.

Total tokens used: 1460
