In [None]:
import json
from openai import OpenAI
import time

import sqlalchemy as sa
from sqlalchemy import create_engine

from s2_openai_info import USERNAME, PASSWORD, CONN_STR, PORT, DATABASE, API_KEY, EMBEDDING_MODEL, GPT_MODEL

In [None]:
client = OpenAI(api_key=API_KEY)
engine = create_engine(f'mysql+pymysql://{USERNAME}:{PASSWORD}@{CONN_STR}:{PORT}/{DATABASE}')
conn = engine.connect()
print('Connected to SingleStore')

In [None]:
def get_embedding(text, model=EMBEDDING_MODEL):
    '''Generates the OpenAI embedding from an input `text`.'''
    if isinstance(text, str):
        response = client.embeddings.create(input=[text], model=model)
        embedding = response.data[0].embedding
        # return np.array(embedding).tobytes()
        return json.dumps(embedding)

In [None]:
def search_wiki_page(query, limit=5):
    '''Returns a df of the top k matches to the query ordered by similarity.'''
    query_embedding_vec = get_embedding(query)
    statement = sa.text(
        f'''SELECT paragraph, v <*> :query_embedding :> vector(1536) AS similarity
        FROM vecs
        ORDER BY similarity USE INDEX (ivfpq_nlist) DESC
        LIMIT :limit;'''
    )
    print("Searching for matches...")
    start_time = time.time()
    results = conn.execute(statement, {"query_embedding": query_embedding_vec, "limit": limit})
    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Search complete in {execution_time} seconds.")
    results_as_dict = results.fetchall()
    return results_as_dict

In [None]:
def ask_wiki_page(query, limit=5, temp=0.0):
    '''Uses RAG to answer a question from the wiki page'''
    results = search_wiki_page(query, limit)
    print("Asking Chatbot...")
    prompt = f'''Excerpt from the conversation history: 
        {results}
        Question: {query}
        
        Based on the conversation history, try to provide the most accurate answer to the question. 
        Consider the details mentioned in the conversation history to formulate a response that is as 
        helpful and precise as possible. please provide links to WIKIPEDIA ARTICLES TO LOOK AT FOR MORE INFORMATION.

        Most importantly, IF THE INFORMATION IS NOT PRESENT IN THE CONVERSATION HISTORY, DO NOT MAKE UP AN ANSWER.'''
    response = client.chat.completions.create(
        model=GPT_MODEL,
        messages=[
            {"role": "system", "content": "You are a helpful assistant who is answering questions about an article."},
            {"role": "user", "content": prompt}
        ],
        temperature=temp
    )
    response_message = response.choices[0].message.content
    return response_message

In [27]:
print(ask_wiki_page("Tell me about the Mario video game franchise and its history. What is it known for?"))

Searching for matches...
Search complete in 0.1532001495361328 seconds.
Asking Chatbot...
The Mario video game franchise is known for its iconic platform games and puzzle games. It was developed and published by Nintendo, starting with the arcade game Mario Bros. in 1983. The franchise was designed by Shigeru Miyamoto and Gunpei Yokoi. Mario Bros. was the first game produced by Intelligent Systems and originally began as a spin-off from the Donkey Kong series. 

One of the notable games in the franchise is Super Mario Bros., which was released for the Nintendo Entertainment System in 1985. It introduced elements such as spinning bonus coins, turtles that can be flipped onto their backs, and Luigi, which became staples of the series. The development of Super Mario Bros. showcased early specialization in the video game industry, made possible by the Famicom's hardware capabilities.

Another popular game in the franchise is Dr. Mario, a puzzle video game released in 1990. It was developed