In [5]:
# !pip install openai
# !pip install pinecone

In [6]:
from datasets import load_dataset
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from tqdm.auto import tqdm
# from DLAIUtils import Utils

import ast
import os
import pandas as pd

In [7]:
PINECONE_API_KEY=""

In [10]:
pinecone = Pinecone(api_key=PINECONE_API_KEY)
pinecone.create_index(name="wiki-index",dimension=1536, metric='cosine',
  spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) )

In [11]:
index = pinecone.Index("wiki-index")

In [13]:
# !wget -q -O lesson2-wiki.csv.zip "https://www.dropbox.com/scl/fi/yxzmsrv2sgl249zcspeqb/lesson2-wiki.csv.zip?rlkey=paehnoxjl3s5x53d1bedt4pmc&dl=0"
# !unzip lesson2-wiki.csv.zip

zsh:1: command not found: wget
unzip:  cannot find or open lesson2-wiki.csv.zip, lesson2-wiki.csv.zip.zip or lesson2-wiki.csv.zip.ZIP.


In [16]:
max_articles_num = 500
df = pd.read_csv('wiki.csv', nrows=max_articles_num)
df.head()

Unnamed: 0,id,metadata,values
1,1-0,"{'chunk': 0, 'source': 'https://simple.wikiped...","[-0.011254455894231796, -0.01698738895356655, ..."
2,1-1,"{'chunk': 1, 'source': 'https://simple.wikiped...","[-0.0015197008615359664, -0.007858820259571075..."
3,1-2,"{'chunk': 2, 'source': 'https://simple.wikiped...","[-0.009930099360644817, -0.012211072258651257,..."
4,1-3,"{'chunk': 3, 'source': 'https://simple.wikiped...","[-0.011600767262279987, -0.012608098797500134,..."
5,1-4,"{'chunk': 4, 'source': 'https://simple.wikiped...","[-0.026462381705641747, -0.016362832859158516,..."


In [19]:
prepped = []

for i, row in tqdm(df.iterrows(), total=df.shape[0]):
    meta = ast.literal_eval(row['metadata'])
    prepped.append({'id':row['id'], 
                    'values':ast.literal_eval(row['values']), 
                    'metadata':meta})
    if len(prepped) >= 250:
        index.upsert(prepped)
        prepped = []

  0%|          | 0/500 [00:00<?, ?it/s]

In [20]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 500}},
 'total_vector_count': 500}

In [21]:
OPENAI_API_KEY = ""
openai_client = OpenAI(api_key=OPENAI_API_KEY)

def get_embeddings(articles, model="text-embedding-ada-002"):
   return openai_client.embeddings.create(input = articles, model=model)

In [22]:
query = "what is the berlin wall?"

embed = get_embeddings([query])

In [26]:
# embed.data[0].embedding
res = index.query(vector=embed.data[0].embedding, top_k=3, include_metadata=True)
text = [r['metadata']['text'] for r in res['matches']]
print('\n'.join(text))


August 13  1961: Building of the Berlin Wall begins.
 August 14  1945: Japan announces its surrender at the end of World War II.
 August 14/15  1947: India is partitioned at independence from the UK, as the new mainly Islamic state of Pakistan is created.
 August 15  1960: The Republic of the Congo becomes independent.
 August 15  1971: Bahrain becomes independent.
 August 16  1977: Elvis Presley dies aged 42, leading to a worldwide outpouring of grief.
 August 17  1945: Indonesia declares independence from the Netherlands.
 August 17  1960: Gabon becomes independent.
 August 17  1962: Peter Fechter becomes the first person to be shot dead at the Berlin Wall.
 August 19  43 BC: Augustus becomes Roman consul.
 August 19  14: Augustus dies.
 August 19  1919: Afghanistan becomes independent.
 August 19  1991: The August Coup against Mikhail Gorbachev, in the Soviet Union, begins.
 August 20  1940: Leon Trotsky is fatally wounded with an ice pick in Mexico.
 August 20  1968: The Prague Spr

In [27]:
query = "write an article titled: what is the berlin wall?"
embed = get_embeddings([query])
res = index.query(vector=embed.data[0].embedding, top_k=3, include_metadata=True)

contexts = [
    x['metadata']['text'] for x in res['matches']
]

prompt_start = (
    "Answer the question based on the context below.\n\n"+
    "Context:\n"
)

prompt_end = (
    f"\n\nQuestion: {query}\nAnswer:"
)

prompt = (
    prompt_start + "\n\n---\n\n".join(contexts) + 
    prompt_end
)

print(prompt)


Answer the question based on the context below.

Context:
August 13  1961: Building of the Berlin Wall begins.
 August 14  1945: Japan announces its surrender at the end of World War II.
 August 14/15  1947: India is partitioned at independence from the UK, as the new mainly Islamic state of Pakistan is created.
 August 15  1960: The Republic of the Congo becomes independent.
 August 15  1971: Bahrain becomes independent.
 August 16  1977: Elvis Presley dies aged 42, leading to a worldwide outpouring of grief.
 August 17  1945: Indonesia declares independence from the Netherlands.
 August 17  1960: Gabon becomes independent.
 August 17  1962: Peter Fechter becomes the first person to be shot dead at the Berlin Wall.
 August 19  43 BC: Augustus becomes Roman consul.
 August 19  14: Augustus dies.
 August 19  1919: Afghanistan becomes independent.
 August 19  1991: The August Coup against Mikhail Gorbachev, in the Soviet Union, begins.
 August 20  1940: Leon Trotsky is fatally wounded wi

In [28]:
res = openai_client.completions.create(
    model="gpt-3.5-turbo-instruct",
    prompt=prompt,
    temperature=0,
    max_tokens=636,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None
)
print('-' * 80)
print(res.choices[0].text)

--------------------------------------------------------------------------------

The Berlin Wall was a physical barrier that divided the city of Berlin, Germany from 1961 to 1989. It was built by the German Democratic Republic (GDR), also known as East Germany, in an effort to prevent its citizens from fleeing to the democratic West Germany.

The construction of the Berlin Wall began on August 13, 1961, and it consisted of a concrete wall, barbed wire, and guard towers. The wall stretched for 96 miles, dividing the city into two separate parts. The East German government claimed that the wall was built to protect its citizens from Western imperialism and to prevent spies and agents from entering the country.

However, the real purpose of the Berlin Wall was to prevent East Germans from escaping to the West. Before the wall was built, thousands of East Germans had been fleeing to West Germany every day, seeking better economic opportunities and political freedom. The wall effectively c