In [None]:
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import utils
import slm


MODEL = "gpt-4o"

## Model Training


In [None]:
opening = ["("]
closing = [")"]
vocalulary = opening + closing + ["E", ""]


def sequence_verifyer(sequence):
    stack = []
    for token in sequence:
        if token in opening:
            stack.append(token)
        elif token in closing:
            if len(stack) == 0:
                return False
            if closing.index(token) != opening.index(stack.pop()):
                return False
    return len(stack) == 0


def gen_parenthesis_util(n, open, close, s, ans):

    if open == n and close == n:
        ans.append(s)
        return

    if open < n:
        gen_parenthesis_util(n, open + 1, close, s + "(", ans)

    if close < open:
        gen_parenthesis_util(n, open, close + 1, s + ")", ans)


def generate_balanced_expressions(n):

    ans = []
    if n > 0:
        gen_parenthesis_util(n, 0, 0, "", ans)
    return ans


nice_one = generate_balanced_expressions(10)
corpus = [item + "E" for item in nice_one]
tokens = vocalulary

In [3]:
small_transformer_model = slm.model
slm.count_parameters(small_transformer_model)

12124

In [None]:
batch_size = 1
vocab_size = len(tokens)
epochs = 5
model_best_name = "model_best.pkl"
model_last_name = "model_last.pkl"

dataset = slm.ExpressionDataset(corpus, tokens)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

criterion = nn.NLLLoss(ignore_index=tokens.index(""))
optimizer = optim.Adam(small_transformer_model.parameters(), lr=0.001)

device = torch.device("cpu")
model = small_transformer_model.to(device)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    current_loss = 1000000000

    for batch in tqdm(dataloader):
        input_indices, target_indices = batch
        input_indices = input_indices.transpose(0, 1).to(device).long()
        target_indices = target_indices.transpose(0, 1).to(device).long()

        optimizer.zero_grad()
        output = model(input_indices)

        loss = criterion(
            output.view(-1, output.size(-1)),
            target_indices.reshape(-1),
        )

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    torch.save(model, model_last_name)
    if total_loss < current_loss:
        current_loss = total_loss
        torch.save(model, model_best_name)
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

100%|██████████| 16796/16796 [01:29<00:00, 187.60it/s]


Epoch 1/5, Loss: 0.5162


100%|██████████| 16796/16796 [01:21<00:00, 207.24it/s]


Epoch 2/5, Loss: 0.5146


100%|██████████| 16796/16796 [01:20<00:00, 207.78it/s]


Epoch 3/5, Loss: 0.5145


100%|██████████| 16796/16796 [01:19<00:00, 210.45it/s]


Epoch 4/5, Loss: 0.5141


100%|██████████| 16796/16796 [01:16<00:00, 219.34it/s]

Epoch 5/5, Loss: 0.5137





In [None]:
def generate_text(model, seed_text, max_length, tokens, temperature=1.0):
    model.eval()

    device = torch.device("cpu")
    generated_sequence = [tokens.index(char) for char in seed_text]

    for _ in range(max_length - len(seed_text)):
        input_tensor = (
            torch.tensor(generated_sequence).unsqueeze(1).to(device).long()
        )

        with torch.no_grad():
            output = model(input_tensor)
            logits = output[-1, 0, :] / temperature
            probabilities = F.softmax(logits, dim=-1).cpu().numpy()

            next_token = np.random.choice(len(tokens), p=probabilities)

        generated_sequence.append(next_token)

        if tokens[next_token] == "E":
            break

    return "".join(tokens[idx] for idx in generated_sequence)


example = generate_text(model, "(())", 24, tokens, temperature=0.8)[:-1]
print(example)
print(sequence_verifyer(example))

(())()(((()))()()())
True


## Prompt Engineering

Good practices:
1) Explain clearly the task
2) Provide examples
3) Provide a baseline
4) Explain the output exptected
5) Provide very few options
6) Make the model adopt a persona
7) Clearly delimit the instructions, context and the prompt
8) Provide steps to follow during the task completion
9) Ask the model to cite the references provided
10) Use intent classification models to classify the intent of the user and select the appropriate prompt
11) Summarize previous conversations
12) Ask the model to use inner monologue or to provide each step for the solution
13) Ask the model to evaluate the solution and see if anything was missed
14) Evaluate the output of the model

In [None]:
# Examples for 1,2 and 3
# What not to do:

system_message = "You are a helpful assistant. Here is a question, now provide a correct and quick answer."
# no example, so the answer is not clear or correct
question = """Evaluate the sentiment of the following statement: "I am feeling great today." """

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": question},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

In [None]:
# Examples for 1,2 and 3
# What not to do:
system_message = "You are a helpful assistant. Here is a question, now provide a correct and quick answer."
# examples
example_question_1 = "Evaluate the sentiment of the following statement: 'I am feeling great today.'"
example_response_1 = "Positive"
example_question_2 = "Evaluate the sentiment of the following statement: 'I am feeling terrible today.'"
example_answer_2 = "Negative"
example_question_3 = "Evaluate the sentiment of the following statement: 'I am feeling okay today.'"
example_answer_3 = "Positive"
question = "Evaluate the sentiment of the following statement: 'I am feeling outstanding today.'"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": example_question_1},
    {"role": "assistant", "content": example_response_1},
    {"role": "user", "content": example_question_2},
    {"role": "assistant", "content": example_answer_2},
    {"role": "user", "content": example_question_3},
    {"role": "assistant", "content": example_answer_3},
    {"role": "user", "content": question},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

In [None]:
# Exanmple for 4
# What not to do:
system_message = "You are a helpful assistant. Here is a question, now provide a correct and quick answer."  # not provising instructions for output
question = "What is the capital of France?"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": question},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

In [None]:
# Exanmple for 4
# What to do:
system_message = "You are a helpful assistant. Here is a question, now provide a correct and quick answer. We expect a single word answer, for example: Question: What is the capital of France? Answer: Paris."
question = "What is the capital of Brazil?"

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": question},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

In [None]:
# example for 5
# What not to do:
system_message = "You are a helpful assistant. You're given text and you must choose the most appropriate classification from the options provided and only them: Status Report, Progress Report, Incident Report, Summary Report, Blocker Report, Personal Report, Issue Report."
text = "The project is going well. We have completed the first phase and are now moving on to the second phase. We are on track to meet the deadline."

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": text},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

In [None]:
# example for 5
# What to do: provide very few options that are very distinct from each other
system_message = "You are a helpful assistant. You're given text and you must choose the most appropriate classification from the options provided and only them: Project Report, Personal Report, Incident Report."
text = "The project is going well. We have completed the first phase and are now moving on to the second phase. We are on track to meet the deadline."

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": text},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

'Project Report'

In [None]:
# example for 6 and 8, 12 and 13
# what not to do:
system_message = "You are a helpful assistant."
query = "You have a table called 'my_table' with 4 columns: 'decade', 'name', 'gender', 'quantity', which represents que amount of babies born in each decade that have each name, containing also the gender information related to the name (in this case, either 'male'or 'female'). I want you to retrieve a table containing each decade and the most popular male and female names in the following columns: 'decade', 'most_popular_female_name', and 'most_popular_male_name'. This table must also be ordered by decade."

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": query},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

To achieve your goal, you can use SQL to query the `my_table` and identify the most popular male and female names for each decade. Below is an example SQL query that does this:

```sql
WITH FemaleRanks AS (
    SELECT
        decade,
        name AS most_popular_female_name,
        RANK() OVER (PARTITION BY decade ORDER BY quantity DESC) AS rank_female
    FROM
        my_table
    WHERE
        gender = 'female'
),
MaleRanks AS (
    SELECT
        decade,
        name AS most_popular_male_name,
        RANK() OVER (PARTITION BY decade ORDER BY quantity DESC) AS rank_male
    FROM
        my_table
    WHERE
        gender = 'male'
),
MostPopularNames AS (
    SELECT
        F.decade,
        F.most_popular_female_name,
        M.most_popular_male_name
    FROM
        FemaleRanks F
    JOIN
        MaleRanks M
    ON
        F.decade = M.decade
    WHERE
        F.rank_female = 1 AND M.rank_male = 1
)
SELECT
    decade,
    most_popular_female_name,
    most_popular_male_name
FROM
  

In [22]:
# example for 6 and 8, 12 and 13
# what to do:
system_message = "You are a MySQL database specialist, having graduated in Computer Science in MIT, a MsC Degree in Stanford and having worked with databases for 5 years in Facebook/Meta. You are given a query in english together with a schema and you must provide the correct MySQL query to retrieve the data as requested. Your query is at the same time very elegant and optimized to be able to run spending the least amount possible of resources. From your experience, you first write the basic idea of the query to only, after some reflection, give the final answer. If necessary, write the query in multiple steps, but always in a clear and concise way, and then the final answer."
query = "Given a table called 'my_table' with 4 columns: 'decade', 'name', 'gender', 'quantity', which represents que amount of babies born in each decade that have each name, containing also the gender information related to the name (in this case, either 'male'or 'female'). I want you to retrieve a table containing each decade and the most popular male and female names in the following columns: 'decade', 'most_popular_female_name', and 'most_popular_male_name'. This table must also be ordered by decade."
second_system_message = "The following is an answer by your colleague, a junior developer, to the same question. Please review it, providing feedback on how it could be improved. Then give a final answer."

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": query},
]

first_response_response = utils.get_chat_completionprompt(MODEL, messages)
messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": query},
    {"role": "system", "content": second_system_message},
    {"role": "user", "content": first_response_response},
]
final_response = utils.get_chat_completionprompt(MODEL, messages)
print(final_response)

Your solution effectively breaks down the problem into manageable parts and steps through the process with clarity. However, there are some areas for potential optimization and clearer understanding. Let's go through the feedback and then provide a more optimized solution.

### Feedback
1. **Subquery Approach**: Using `(decade, quantity) IN` is a good approach for maximum quantity determination, but it can often lead to issues if there are ties (i.e., more than one name with the same quantity). A better approach can be achieved using `JOIN` with subqueries or `ROW_NUMBER` techniques if available.
  
2. **Efficiency**: The use of multiple subqueries and joins might be improved with a cleaner single step, especially if the database system supports Common Table Expressions (CTE) or other window functions.

3. **Clarity and Maintenance**: Complex queries should be as clear as possible, highlighting how they logically construct the desired output. Using aliases more consistently and methodi

## RAG (Retrieval Augmented Generation)

In [1]:
import utils

In [2]:
# chunking example
text = """Minneapolis

Article
Talk
Read
View source
View history

Tools
Appearance hide
Text

Small

Standard

Large
Width

Standard

Wide
Color (beta)

Automatic

Light

Dark
Coordinates: 44°58′55″N 93°16′09″W
Featured article
Page extended-protected
From Wikipedia, the free encyclopedia
This article is about the city in Minnesota. For other uses, see Minneapolis (disambiguation).
For the Twin Cities region, see Minneapolis–Saint Paul.
"Mpls." redirects here. For other topics, see mpls (disambiguation).
Minneapolis
City
Downtown Minneapolis (from the Mississippi River)
Downtown Minneapolis (from the Mississippi River)
Bde Maka Ska
Bde Maka Ska
Mill City Museum
Mill City Museum
First Avenue, a nightclub
First Avenue
Minnehaha Falls
Minnehaha Falls
Flag of Minneapolis
Flag
Official seal of Minneapolis
Seal
Official logo of Minneapolis
Logo
Etymology: Dakota mni 'water' with Greek polis 'city'
Nicknames: "City of Lakes",[1] "Mill City",[1] "Twin Cities"[2] (with Saint Paul), "Mini Apple"[1]
Motto: En Avant (French: 'Forward')[3]
MapWikimedia | © OpenStreetMap
Show Minneapolis
Show Hennepin County
Show Minnesota
Show the United States
Show all
Coordinates: 44°58′55″N 93°16′09″W[4]
Country	United States
State	Minnesota
County	Hennepin
Incorporated	1867
Founded by	Franklin Steele and John H. Stevens
Government
 • Type	Mayor–council (strong mayor)[5]
 • Body	Minneapolis City Council
 • Mayor	Jacob Frey (DFL)
Area[6]
 • City
57.51 sq mi (148.94 km2)
 • Land	54.00 sq mi (139.86 km2)
 • Water	3.51 sq mi (9.08 km2)
Elevation[4]	830 ft (250 m)
Population (2020)[7]
 • City
429,954
 • Estimate (2023)[8]
425,115
 • Rank	
46th (US)
1st (Minnesota)
 • Density	7,962.11/sq mi (3,074.21/km2)
 • Urban[9]	2,914,866
 • Urban density	2,872.4/sq mi (1,109/km2)
 • Metro[10]	3,693,729
Demonym	Minneapolitan
GDP[11]
 • MSA	$323.9 billion (2022) ($337 billion in 2023)[12]
Time zone	UTC–6 (Central)
 • Summer (DST)	UTC–5 (CDT)
ZIP Codes	
55401-55419, 55423, 55429-55430, 55450, 55454-55455, 55484-55488
Area code	612
FIPS code	27-43000[4]
GNIS ID	655030[4]
Website	minneapolismn.gov
Minneapolis[a] is a city in and the county seat of Hennepin County, Minnesota, United States.[4] With a population of 429,954, it is the state's most populous city as of the 2020 census.[7] Located in the state's center near the eastern border, it occupies both banks of the Upper Mississippi River and adjoins Saint Paul, the state capital of Minnesota. Minneapolis, Saint Paul, and the surrounding area are collectively known as the Twin Cities, a metropolitan area with 3.69 million residents.[14] Minneapolis is built on an artesian aquifer on flat terrain and is known for cold, snowy winters and hot, humid summers. Nicknamed the "City of Lakes",[15] Minneapolis is abundant in water, with thirteen lakes, wetlands, the Mississippi River, creeks, and waterfalls. The city's public park system is connected by the Grand Rounds National Scenic Byway.

Dakota people originally inhabited the site of today's Minneapolis. European colonization and settlement began north of Fort Snelling along Saint Anthony Falls—the only natural waterfall on the Mississippi River.[16] Location near the fort and the falls' power—with its potential for industrial activity—fostered the city's early growth. For a time in the 19th century, Minneapolis was the lumber and flour milling capital of the world, and as home to the Federal Reserve Bank of Minneapolis, it has preserved its financial clout into the 21st century. A Minneapolis Depression-era labor strike brought about federal worker protections. Work in Minneapolis contributed to the computing industry, and the city is the birthplace of General Mills, the Pillsbury brand, Target Corporation, and Thermo King mobile refrigeration.

The city's major arts institutions include the Minneapolis Institute of Art, the Walker Art Center, and the Guthrie Theater. Four professional sports teams play downtown. Prince is survived by his favorite venue, the First Avenue nightclub. Minneapolis is home to the University of Minnesota's main campus. The city's public transport is provided by Metro Transit, and the international airport, serving the Twin Cities region, is located towards the south on the city limits.

Residents adhere to more than fifty religions. Despite its well-regarded quality of life,[17] Minneapolis has stark disparities among its residents—arguably the most critical issue confronting the city in the 21st century.[18] Governed by a mayor-council system, Minneapolis has a political landscape dominated by the Minnesota Democratic–Farmer–Labor Party (DFL), with Jacob Frey serving as mayor since 2018.

History
Main article: History of Minneapolis
Dakota homeland
Further information: Dakota people, Ojibwe, Bdóte, and US–Dakota War of 1862
Two Indigenous nations inhabited the area now called Minneapolis.[19] Archaeologists have evidence that since 1000 A.D.,[20] they were the Dakota (one half of the Sioux nation),[21] and, after the 1700s,[22] the Ojibwe (also known as Chippewa, members of the Anishinaabe nations).[23] Dakota people have different stories to explain their creation.[24] One widely accepted story says the Dakota emerged from Bdóte,[24] the confluence of the Minnesota and Mississippi rivers. Dakota are the only inhabitants of the Minneapolis area who claimed no other land;[25] they have no traditions of having immigrated.[26] In 1680, cleric Louis Hennepin, who was probably the first European to see the Minneapolis waterfall the Dakota people call Owámniyomni, renamed it the Falls of St. Anthony of Padua for his patron saint.[27]

Island covered with hundreds of teepees
Dakota non-combatants living in a concentration camp at Fort Snelling during the winter of 1862[28][29]
In the space of sixty years, the US seized all of the Dakota land and forced them out of their homeland.[30] Purchasing most of modern-day Minneapolis, Zebulon Pike made the 1805 Treaty of St. Peter with the Dakota.[b] Pike bought a 9-square-mile (23 km2) strip of land—coinciding with the sacred place of Dakota origin[24]—on the Mississippi south of Saint Anthony Falls,[34] with the agreement the US would build a military fort and trading post there and the Dakota would retain their usufructuary rights.[35] In 1819, the US Army built Fort Snelling[36] to direct Native American trade away from British-Canadian traders and to deter war between the Dakota and Ojibwe in northern Minnesota.[37] Under pressure from US officials[38] in a series of treaties, the Dakota ceded their land first to the east and then to the west of the Mississippi, the river that runs through Minneapolis.[39][c] Dakota leaders twice refused to sign the next treaty until they were paid for the previous one.[51] In the decades following these treaty signings, the federal US government rarely honored their terms.[52] At the beginning of the American Civil War, annuity payments owed in June 1862 to the Dakota by treaty were late, causing acute hunger among the Dakota.[53][d] Facing starvation[55] a faction of the Dakota declared war in August and killed settlers.[56] Serving without any prior military experience, US commander Henry Sibley commanded raw recruits,[57] volunteer mounted troops from Minneapolis and Saint Paul with no military experience.[58] The war went on for six weeks in the Minnesota River valley.[59] After a kangaroo court,[60][e] 38 Dakota men were hanged.[59] [f] The army force-marched 1,700 non-hostile Dakota men, women, children, and elders 150 miles (240 km) to a concentration camp at Fort Snelling.[28][77] Minneapolitans reportedly threatened more than once to attack the camp.[78] In 1863, the US "abrogated and annulled" all treaties with the Dakota.[79] With Governor Alexander Ramsey calling for their extermination,[80] most Dakota were exiled from Minnesota.[81]

While the Dakota were being expelled, Franklin Steele laid claim to the east bank of Saint Anthony Falls,[82] and John H. Stevens built a home on the west bank.[83] In the Dakota language, the city's name is Bde Óta Othúŋwe ('Many Lakes Town').[g] Residents had divergent ideas on names for their community. Charles Hoag proposed combining the Dakota word for 'water' (mni [h]) with the Greek word for 'city' (polis), yielding Minneapolis. In 1851, after a meeting of the Minnesota Territorial Legislature, leaders of east bank St. Anthony lost their bid to move the capital from Saint Paul, but they eventually won the state university.[90] In 1856, the territorial legislature authorized Minneapolis as a town on the Mississippi's west bank.[86] Minneapolis was incorporated as a city in 1867, and in 1872, it merged with St. Anthony.[91]

Industries develop
Waterfall surrounded by mills and scaffolding
Saint Anthony Falls c. 1850s
Two men loaded flour
Loading flour, Pillsbury, 1939
Minneapolis originated around a source of energy: Saint Anthony Falls, the only natural waterfall on the Mississippi.[16] Each of the city's two founding industries—flour and lumber milling—developed in the 19th century nearly concurrently, and each came to prominence for about fifty years.[j] In 1884, the value of Minneapolis flour milling was the world's highest.[96] In 1899, Minneapolis outsold every other lumber market in the world.[97] Through its expanding mill industries, Minneapolis earned the nickname "Mill City".[98] Due to the occupational hazards of milling, six companies manufactured artificial limbs.[99]

Disasters struck in the late 19th century: the Eastman tunnel under the river leaked in 1869; twice, fire destroyed the entire row of sawmills on the east bank;[100] an explosion of flour dust at the Washburn A mill killed eighteen people[101] and demolished about half the city's milling capacity;[102] and in 1893, fire spread from Nicollet Island to Boom Island to northeast Minneapolis, destroyed twenty blocks, and killed two people.[103]

The lumber industry was built around forests in northern Minnesota, largely by lumbermen emigrating from Maine's depleting forests.[104][105] The region's waterways were used to transport logs well after railroads developed; the Mississippi River carried logs to St. Louis until the early 20th century.[106] In 1871, of the thirteen mills sawing lumber in St. Anthony, eight ran on water power, and five ran on steam power.[107] Auxiliary businesses on the river's west bank included woolen mills, iron works, a railroad machine shop, and mills for cotton, paper, sashes, and wood-planing.[108] Minneapolis supplied the materials for farmsteads and settlement of rapidly expanding cities on the prairies that lacked wood.[109] White pine milled in Minneapolis built Miles City, Montana; Bismarck, North Dakota; Sioux Falls, South Dakota; Omaha, Nebraska; and Wichita, Kansas.[110] Growing use of steam power freed lumbermen and their sawmills from dependence on the falls.[111] Lumbering's decline began around the turn of the century,[112] and sawmills in the city including the Weyerhauser mill closed by 1919.[113] After depleting Minnesota's white pine,[114] some lumbermen moved on to Douglas fir in the Pacific Northwest.[115]

Large computer terminal
Seymour Cray and colleagues began work on the CDC 6600 (pictured) in downtown Minneapolis and completed the project in Chippewa Falls, Wisconsin, in 1963.[116]
In 1877, Cadwallader C. Washburn co-founded Washburn-Crosby,[117] the company that became General Mills.[118][k] Washburn and partner John Crosby[119] sent Austrian civil engineer William de la Barre to Hungary where he acquired innovations through industrial espionage.[120] De la Barre calculated and managed the power at the falls and encouraged steam for auxiliary power.[121] Charles Alfred Pillsbury and the C. A. Pillsbury Company across the river hired Washburn-Crosby employees and began using the new methods.[120] The hard red spring wheat grown in Minnesota became valuable, and Minnesota "patent" flour was recognized at the time as the best bread flour in the world.[120] In 1900, fourteen percent of America's grain was milled in Minneapolis[120] and about one third of that was shipped overseas.[122] Overall production peaked at 18.5 million barrels in 1916.[123] Decades of soil exhaustion, stem rust, and changes in freight tariffs combined to quash the city's flour industry.[124] In the 1920s, Washburn-Crosby and Pillsbury developed new milling centers in Buffalo, New York, and Kansas City, Missouri, while maintaining their headquarters in Minneapolis.[125] The falls became a national historic district,[126] and the upper St. Anthony lock and dam is permanently closed.[127]

"""

chunks = utils.document_based_chunking(text)
chunks

["Minneapolis[a] is a city in and the county seat of Hennepin County, Minnesota, United States. [4] With a population of 429,954, it is the state's most populous city as of the 2020 census. [7] Located in the state's center near the eastern border, it occupies both banks of the Upper Mississippi River and adjoins Saint Paul, the state capital of Minnesota.  Minneapolis, Saint Paul, and the surrounding area are collectively known as the Twin Cities, a metropolitan area with 3. 69 million residents.",
 '[14] Minneapolis is built on an artesian aquifer on flat terrain and is known for cold, snowy winters and hot, humid summers.  Nicknamed the "City of Lakes",[15] Minneapolis is abundant in water, with thirteen lakes, wetlands, the Mississippi River, creeks, and waterfalls.  The city\'s public park system is connected by the Grand Rounds National Scenic Byway. .',
 "Dakota people originally inhabited the site of today's Minneapolis.  European colonization and settlement began north of Fort

In [3]:
embedding = utils.get_chunk_embeddings(chunks[7], "text-embedding-3-small")
print(embedding)

[0.016855744644999504, -0.008043641224503517, 0.055278778076171875, 0.037616778165102005, -0.08470704406499863, -0.030637646093964577, -0.014890500344336033, -0.01698172092437744, 0.004176142625510693, -0.01457555778324604, -0.00018916257249657065, 0.0036690847482532263, 0.0057603055611252785, -0.028848769143223763, -0.02597649022936821, 0.018228894099593163, 0.018002135679125786, -0.022134186699986458, -0.007004329934716225, 0.03696169704198837, 0.025220626965165138, 0.05225532501935959, -0.03338394686579704, 0.0009802597342059016, 0.02019413933157921, 0.02635442093014717, -0.03681052476167679, 0.040136322379112244, -0.03701208904385567, -0.012056014500558376, -0.03031010366976261, -0.008969573304057121, -0.004576120525598526, 0.012465439736843109, 0.04812327399849892, 0.024389177560806274, -0.0010306506883352995, -0.017498226836323738, -0.036734938621520996, -0.029982563108205795, 0.013819694519042969, 0.013089027255773544, 0.012188290245831013, -0.002258140593767166, -0.036231029778

In [None]:
database = pd.DataFrame({"chunks": chunks})

In [5]:
database["embeddings"] = database["chunks"].apply(
    lambda x: np.array(utils.get_chunk_embeddings(x, "text-embedding-3-small"))
)

In [6]:
database

Unnamed: 0,chunks,embeddings
0,Minneapolis[a] is a city in and the county sea...,"[-0.03441869467496872, -0.040999993681907654, ..."
1,[14] Minneapolis is built on an artesian aquif...,"[-0.008452524431049824, 0.011000331491231918, ..."
2,Dakota people originally inhabited the site of...,"[0.0007054487941786647, -0.0065779597498476505..."
3,"For a time in the 19th century, Minneapolis wa...","[-0.042372457683086395, -0.002427722094580531,..."
4,The city's major arts institutions include the...,"[-0.002938700607046485, -0.010737665928900242,..."
5,Residents adhere to more than fifty religions....,"[-0.020965415984392166, 0.006626776419579983, ..."
6,Two Indigenous nations inhabited the area now ...,"[0.042408354580402374, -0.020726656541228294, ..."
7,Dakota are the only inhabitants of the Minneap...,"[0.016855744644999504, -0.008043641224503517, ..."
8,"In the space of sixty years, the US seized all...","[0.017029542475938797, -0.010994214564561844, ..."
9,[b] Pike bought a 9-square-mile (23 km2) strip...,"[0.007397182751446962, -0.022310152649879456, ..."


In [7]:
query = "what is the terrain and climate characteristics of Minneapolis?"
result = utils.vector_sim_retriever(database, query)

In [11]:
result["chunks"]

1     [14] Minneapolis is built on an artesian aquif...
5     Residents adhere to more than fifty religions....
2     Dakota people originally inhabited the site of...
4     The city's major arts institutions include the...
16    Minneapolis originated around a source of ener...
Name: chunks, dtype: object

In [16]:
# Generation without RAG:

system_message_1 = "You are a helpful assistant."

messages = [
    {"role": "system", "content": system_message},
    {"role": "user", "content": query},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

Minneapolis is located in the Upper Midwest region of the United States and has a relatively flat terrain characterized by rolling plains and numerous lakes. The city is situated along the Mississippi River, and the landscape incorporates a mix of water features, parks, and urban development.

The climate of Minneapolis is classified as humid continental, featuring cold, snowy winters and warm, humid summers. Winters are long and can be frigid, with January average temperatures often below freezing, and snow accumulation is common. Summers are warm, with July temperatures averaging in the mid-70s Fahrenheit (around 24°C), though temperatures can occasionally rise above 90°F (32°C). The city experiences a moderate amount of precipitation, distributed fairly evenly throughout the year, often with thunderstorms during the summer months.


In [None]:
# Generation without RAG:

system_message_1 = "You are a helpful assistant. Here is some information that might help with answering questions:"
contextual_information = "\n".join(
    [f"[{index+1}] " + chunk for index, chunk in enumerate(result["chunks"])]
)
system_message_2 = "With this information in mind, please answer the following question, providing the reference information right after the answer as a complete citation:"

messages = [
    {"role": "system", "content": system_message},
    {"role": "system", "content": contextual_information},
    {"role": "system", "content": system_message_2},
    {"role": "user", "content": query},
]

response = utils.get_chat_completionprompt(MODEL, messages)
print(response)

Minneapolis is built on flat terrain and is characterized by a climate with cold, snowy winters and hot, humid summers. [1]
