In [1]:
import json
from openai import OpenAI
import time

import sqlalchemy as sa
from sqlalchemy import create_engine

from s2_openai_info import USERNAME, PASSWORD, CONN_STR, PORT, DATABASE, API_KEY, EMBEDDING_MODEL, GPT_MODEL

In [2]:
client = OpenAI(api_key=API_KEY)
engine = create_engine(f'mysql+pymysql://{USERNAME}:{PASSWORD}@{CONN_STR}:{PORT}/{DATABASE}')
conn = engine.connect()
print('Connected to SingleStore')

Connected to SingleStore


In [3]:
def get_embedding(text, model=EMBEDDING_MODEL):
    '''Generates the OpenAI embedding from an input `text`.'''
    if isinstance(text, str):
        response = client.embeddings.create(input=[text], model=model)
        embedding = response.data[0].embedding
        # return np.array(embedding).tobytes()
        return json.dumps(embedding)

In [5]:
def search_wiki_page(query, limit=5):
    '''Returns a df of the top k matches to the query ordered by similarity.'''
    query_embedding_vec = get_embedding(query)
    # cast query embedding to vector data type :> vector(1536) - returning json dumps in embedding
    statement = sa.text(
        f'''SELECT paragraph, v <*> :query_embedding :> vector(1536) AS similarity
        FROM vecs
        ORDER BY similarity USE INDEX (ivfpq) DESC
        LIMIT :limit;'''
    )
    print("Searching for matches...")
    start_time = time.time()
    results = conn.execute(statement, {"query_embedding": query_embedding_vec, "limit": limit})
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Search complete in {execution_time} seconds.")
    results_as_dict = results.fetchall()
    return results_as_dict

In [6]:
def ask_wiki_page(query, limit=5, temp=0.0):
    '''Uses RAG to answer a question from the wiki page'''
    results = search_wiki_page(query, limit)
    print("Asking Chatbot...")
    prompt = f'''Excerpt from the conversation history: 
        {results}
        Question: {query}
        
        Based on the conversation history, try to provide the most accurate answer to the question. 
        Consider the details mentioned in the conversation history to formulate a response that is as 
        helpful and precise as possible. please provide links to WIKIPEDIA ARTICLES TO LOOK AT FOR MORE INFORMATION.

        Most importantly, IF THE INFORMATION IS NOT PRESENT IN THE CONVERSATION HISTORY, DO NOT MAKE UP AN ANSWER.'''
    response = client.chat.completions.create(
        model=GPT_MODEL,
        messages=[
            {"role": "system", "content": "You are a helpful assistant who is answering questions about an article."},
            {"role": "user", "content": prompt}
        ],
        temperature=temp
    )
    response_message = response.choices[0].message.content
    return response_message

In [8]:
print(ask_wiki_page("Tell me about the Mario video game franchise and its history. What is it known for?", limit=15))

Searching for matches...
Search complete in 0.18797683715820312 seconds.
Asking Chatbot...
The Mario video game franchise is known for its iconic platforming games featuring the character Mario, created by Nintendo. The franchise began with the release of the arcade game "Mario Bros." in 1983, designed by Shigeru Miyamoto and Gunpei Yokoi. It was followed by the critically acclaimed "Super Mario RPG: Legend of the Seven Stars," which inspired other role-playing games in the series such as "Paper Mario" and "Mario & Luigi." Another notable entry is "Super Mario Bros. 3," regarded as one of the greatest video games of all time. The franchise has expanded to include various spin-offs and adaptations, including animated television series and films. For more detailed information, you can refer to the Wikipedia article on the Mario franchise: [Mario Franchise - Wikipedia](https://en.wikipedia.org/wiki/Mario_franchise)
