In [None]:
import openai
from typing import List
import concurrent.futures

In [None]:
def set_model_key():
    openai.api_type = "azure"
    openai.api_base = "https://shu-audrey-joey.openai.azure.com/"
    openai.api_version = "2023-07-01-preview"
    openai.api_key = "54a4a22814764408b727b9791ed2a544"

# Example of DuckDB UDF and List

In [1]:
import duckdb
from typing import List 

def my_function(x: List) -> str:
    return x

duckdb.create_function("my_func", my_function)
sample_list = [32, 42]
duckdb.sql(f"SELECT my_func({sample_list})")

┌──────────────────────────────────┐
│ my_func(main.list_value(32, 42)) │
│             varchar              │
├──────────────────────────────────┤
│ [32, 42]                         │
└──────────────────────────────────┘

In [2]:
import duckdb

# Establish a connection to the database (this creates an in-memory database if no path is provided)
conn = duckdb.connect(database=':memory:', read_only=False)

# You would typically load your data here or ensure that your database contains the necessary tables.
# For demonstration, I'll create a sample table and insert some data into it.
conn.execute("CREATE TABLE example_table (example_column INT)")
conn.execute("INSERT INTO example_table VALUES (1), (2), (3), (4), (5)")

# Now, let's convert a column from the table into a Python list
query_result = conn.execute("SELECT example_column FROM example_table").fetchall()

# Convert the result into a list (note that fetchall returns a list of tuples, so we extract the first element)
example_list = [row[0] for row in query_result]

# Close the connection
conn.close()

# Now you have your column data in a Python list
print(example_list)

[1, 2, 3, 4, 5]


# Read Dataset

In [9]:
# Path to your DuckDB database file
duckdb_db_path = 'chinook.duckdb'
conn = duckdb.connect(duckdb_db_path)
query = "SELECT * FROM Albums;"
df = conn.execute(query).df()
print(df.head())

   album_id                                  title  artist_id
0         1  For Those About To Rock We Salute You          1
1         2                      Balls to the Wall          2
2         3                      Restless and Wild          2
3         4                      Let There Be Rock          1
4         5                               Big Ones          3


In [10]:
query = "SELECT * FROM Artists;"
df = conn.execute(query).df()
print(df.head())

   artist_id               name
0          1              AC/DC
1          2             Accept
2          3          Aerosmith
3          4  Alanis Morissette
4          5    Alice In Chains


# UDF

In [None]:
def llm(query: str, context: List[str]):
    """
    query: e.x. find the artist mentioned in the {review}
    context: e.x. List[review]
    """
    
#     set_model_key()
#     llm_chain = llm_dict[query]
#     output = llm_chain.batch(context)

    def invoke_llm(query: str, context: str, fields: List[str]):
        """
        query: e.x. find the artist mentioned in the {review}
        context: e.x. review 
        """
        set_model_key()

        # TODO UPDATE THIS SYSTEM PROMPT
        response_format = """
            You are a data analysis assistant who will respond with the artist name mentioned on reviews inputted. Answer with only the name.
            For instance:
            - If the review is 'I love the songs by Taylor Swift.', the answer is 'Taylor Swift'.
            - If the review is 'This album reminds me of the Beatles', the answer is 'the Beatles'.
        """
        
        num_fields = len(fields)
        prompt = query

        for i in range(num_fields):
            field_val = context[i] if context[i] else "None"
            if isinstance(field_val, list):
                field_val = field_val[0]
            prompt += fields[i] + ": " + field_val + "\n"
        if len(prompt) > 16000:
            prompt = query + "N/A"

        response = openai.ChatCompletion.create(
            engine=DEPLOYMENT_NAME, # engine = "deployment_name".
            messages=[
                {"role": "system", "content": response_format},
                {"role": "user", "content": prompt}
            ]
        )

        output = response['choices'][0]['message']['content']
        return output

    
    responses = []
    # We can use a with statement to ensure threads are cleaned up promptly
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        # Start the load operations and mark each future with its URL
        future_to_response = {executor.submit(invoke_llm, query, row): row for row in context}
        for future in concurrent.futures.as_completed(future_to_response):
            row = future_to_response[future]
            try:
                data = future.result()
                responses.append(data)
            except Exception as exc:
                print('%r generated an exception: %s' % (row, exc))
    
    
    return responses

In [None]:
context = {}
fields = ["movie_info", "review_type"]
types = ["VARCHAR", "VARCHAR"]
for i in len(fields):
    context[fields[i]] = types[i]
duckdb.struct_type(context)