### Libraries needed for the notebook

In [12]:
# pip install pandas - Library for reading and writing CSV files into data frames
# pip install python-dotenv - Library to load environment variables such as API Keys
# pip install openai - OpenAI library
# pip install ast - Library for converting string to list
# pip install pinecone-client - Pinecone library
# pp install tdqm - For keeping track of upload progress

### Prepare the input for embedding

#### 1. Read the input CSV file

In [1]:
import pandas as pd

df = pd.read_csv('./data/wikihow-cleaned-100-rows.csv')
df.head(2)

Unnamed: 0.1,Unnamed: 0,summary,title,text
0,0,keep related supplies in the same area . make ...,how to be an organized artist 1,"if youre a photographer , keep all the necessa..."
1,1,create a sketch in the neopoprealist manner of...,how to create a neopoprealist art work,see the image for how this drawing develops st...


#### 2. Rename the `Unnamed: 0` Column to `id`

In [2]:
df.rename( columns={'Unnamed: 0':'id'}, inplace=True )
df.head(2)

Unnamed: 0,id,summary,title,text
0,0,keep related supplies in the same area . make ...,how to be an organized artist 1,"if youre a photographer , keep all the necessa..."
1,1,create a sketch in the neopoprealist manner of...,how to create a neopoprealist art work,see the image for how this drawing develops st...


#### 3. Check the data types of each column 

In [3]:
df.dtypes

id          int64
summary    object
title      object
text       object
dtype: object

#### 4. change the id column from int to string as pinecone vector database accepts only strings and lists

In [4]:
df['id'] = df['id'].apply(str)
df.dtypes

id         object
summary    object
title      object
text       object
dtype: object

### Load environment variables and API keys

In [5]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

### Embed using OpenAI `text-embedding-3-small` model with `1536` dimensions

In [6]:
from openai import OpenAI
client = OpenAI(api_key=OPENAI_API_KEY)

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding


df['text-embeddings'] = df.title.apply(lambda x: get_embedding(x, model='text-embedding-3-small'))
df.to_csv('./data/embedded_100_wikihow.csv', index=False)

### Prepare the embeddings for uploading to pinecone

#### 1. Load the embeddings file that was created in previous step

In [7]:
df = pd.read_csv('./data/embedded_100_wikihow.csv')
df.head(2)

Unnamed: 0,id,summary,title,text,text-embeddings
0,0,keep related supplies in the same area . make ...,how to be an organized artist 1,"if youre a photographer , keep all the necessa...","[0.01380640733987093, 0.0018019392155110836, 0..."
1,1,create a sketch in the neopoprealist manner of...,how to create a neopoprealist art work,see the image for how this drawing develops st...,"[0.016886457800865173, 0.04798700660467148, 0...."


#### 2. Check the data types for each column

In [8]:
df.dtypes

id                  int64
summary            object
title              object
text               object
text-embeddings    object
dtype: object

#### 3. change the id column from int to string as pinecone vector database accepts only strings and lists

In [9]:
df['id'] = df['id'].apply(str)
df.dtypes

id                 object
summary            object
title              object
text               object
text-embeddings    object
dtype: object

#### 4. Change the data type of `text-embeddings` from `string` to `list`

In [10]:
# install the ast library by running pip install ast
import ast

df['text-embeddings'] = df['text-embeddings'].apply(ast.literal_eval)

#### 5. Prepare one sample row to upsert

In [15]:
vector0 =   { "id": df.iloc[0]['id'], 
      "values" : df.iloc[0]['text-embeddings'], 
      "metadata": {"title": df.iloc[0]['title'], "text": df.iloc[0]['text'], "summary": df.iloc[0]['summary'] }
    }

### Upsert to Pinecone

#### 1. Load the index (this assumes you have created an index in pinecone

In [29]:
from pinecone import Pinecone

pc = Pinecone(api_key=PINECONE_API_KEY)
pinecone_index = pc.Index("wikihow-1")

#### 2. Upsert a single row to test if the upsert works

In [30]:
pinecone_index.upsert(
  vectors=[vector0]
)

{'upserted_count': 1}

#### 3. Now iterate over all rows and upsert to pinecone index

In [32]:
for index, row in df.iterrows():
    vector =   { "id": row['id'], "values" : row['text-embeddings'], "metadata": {"title": row['title'], "text": row['text'], "summary": row['summary'] }}
    pinecone_index.upsert(vectors=[vector])

### Query Pinecone Data with a sample query

#### 1. Embed the sample query with the OpenAI model

In [43]:
sample_query = 'how to become artist'
sample_query_embedding = get_embedding(sample_query)

#### 2. Query Pinecone with the sample query to retrive top 3 matching documents

In [45]:
results = pinecone_index.query(
  vector=sample_query_embedding,
  top_k=3,
  include_values=False,
  include_metadata=True
)

In [63]:
matches = results['matches']
idx=1
for row in matches:
    
    score=row['score']
    metadata = row['metadata']
    summary = metadata['summary']
    title = metadata['title']
    text = metadata['text']
    print(idx," - ", title,"\n\nSummary: ",summary,"\n\nDetails:",text)
    print('---')
    idx=idx+1
    

1  -  how to become an art investor 

Summary:  start with some experience or interest in art . understand the difference between art collectors , art investors and art speculators . figure out what you are willing to pay for art , before going to an auction house . pay attention to what schools of art are selling well , and which are down . focus art investments on fine art paintings , rather than decorative art . reach out to trusted auction houses and dealers when you are looking to buy art . buy your investment art when you feel confident of its worth , its price and its ability to grow in value . study how art is properly stored . have your art investments appraised occasionally . consider renting out your art investments . understand that selling an art investment can take time . 

Details: the best art investors do their research on the pieces of art that they buy , so someone with some education or interest in the art world is more likely to understand this niche market . as we

### Send the response to LLM to get a summarized response across the top matches

### 1. Prepare the context

In [65]:
contexts = [
    x['metadata']['text'] for x in results['matches']
]

prompt_start = (
    "Answer the question based on the context below.\n\n"+
    "Context:\n"
)

prompt_end = (
    f"\n\nQuestion: {sample_query}\nAnswer:"
)

prompt = (
    prompt_start + "\n\n---\n\n".join(contexts) + 
    prompt_end
)

print(prompt)

Answer the question based on the context below.

Context:
the best art investors do their research on the pieces of art that they buy , so someone with some education or interest in the art world is more likely to understand this niche market . as well as personal research , you will need to have contacts with people in the art world , such as auctioneers , gallery directors and dealers , who can give you good investment advice . you may confuse these three terms , if you are not careful . each of them has a slightly different goal in mind when looking to buy art . art collectors do not buy art for investment purposes . they buy it to decorate and display in their home . because they consider them to be an important part of their home or life , most art collectors have a hard time parting with pieces of their collection . while many collectors do end up selling some pieces of art , it may be done because of necessity . collectors often loan their works out to museums and occasionally d

#### 2. Get the summary from OpenAI

In [67]:
res = client.completions.create(
    model="gpt-3.5-turbo-instruct",
    prompt=prompt,
    temperature=0,
    max_tokens=636,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None
)
print('-' * 80)
print(res.choices[0].text)

--------------------------------------------------------------------------------
 To become a successful artist, it is important to do thorough research on the art market and have contacts within the art world. It is also important to keep a well-organized workspace and to cultivate a diverse network of other artists, curators, and gallery assistants. Additionally, treating your art practice like a business and keeping track of expenses and pricing your work appropriately is crucial.
