## Loading data into Pinecone

Make sure you have your Pinecone API Key and Environment URL handy.

In [1]:
## Install the client library into your Jupyter environment
!pip3 install pinecone-client
!pip3 install pandas
!pip3 install openai

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [1]:
# Import dependencies
import pinecone
import pandas as pd
import openai
import numpy as np

  from tqdm.autonotebook import tqdm


In [2]:
# Init the openai client
## IMPORTANT: YOUR API KEY IS SENSITIVE INFORMATION
## IMPORTANT: DELETE THE API KEY FROM THIS NOTEBOOK BEFORE UPLOADING TO GITHUB
openai.api_key = ""

In [3]:
# Load our sample data
df = pd.read_csv('Project_Management_1687408330.csv')


In [4]:
df

Unnamed: 0,Name,Subitems,Person,Status,Date,Accounts,Deals
0,Gong.io Integration,,,Working on it,,,
1,SFDC Integration,,,,,,
2,Zoom Integration,,,,,,
3,Fix the Multi Column Error,,,,,,
4,Monday Context Hook Doesn't Work,,,,,,


In [5]:
# This is our embedding function that allows pandas to apply it across each
# row of our dataframe
def get_embedding(text, model="text-embedding-ada-002"):
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']

In [6]:
## Utilize our embedding functions to generate embeddings and set them
## to a new column called ada_embedding
df['ada_embedding'] = df.Name.apply(lambda x: get_embedding(x, model='text-embedding-ada-002'))

In [7]:
df

Unnamed: 0,Name,Subitems,Person,Status,Date,Accounts,Deals,ada_embedding
0,Gong.io Integration,,,Working on it,,,,"[0.000319324346492067, -0.014362137764692307, ..."
1,SFDC Integration,,,,,,,"[-0.00421494897454977, -0.003558073891326785, ..."
2,Zoom Integration,,,,,,,"[0.003471081843599677, -0.011688053607940674, ..."
3,Fix the Multi Column Error,,,,,,,"[-0.025359127670526505, 0.030919456854462624, ..."
4,Monday Context Hook Doesn't Work,,,,,,,"[-0.03184713050723076, 0.005752094555646181, -..."


In [8]:
df["metadata"] = [{"type": "feature_request"}] * len(df)

In [9]:
df

Unnamed: 0,Name,Subitems,Person,Status,Date,Accounts,Deals,ada_embedding,metadata
0,Gong.io Integration,,,Working on it,,,,"[0.000319324346492067, -0.014362137764692307, ...",{'type': 'feature_request'}
1,SFDC Integration,,,,,,,"[-0.00421494897454977, -0.003558073891326785, ...",{'type': 'feature_request'}
2,Zoom Integration,,,,,,,"[0.003471081843599677, -0.011688053607940674, ...",{'type': 'feature_request'}
3,Fix the Multi Column Error,,,,,,,"[-0.025359127670526505, 0.030919456854462624, ...",{'type': 'feature_request'}
4,Monday Context Hook Doesn't Work,,,,,,,"[-0.03184713050723076, 0.005752094555646181, -...",{'type': 'feature_request'}


In [10]:
# Init the pinecone client
## IMPORTANT: YOUR API KEY IS SENSITIVE INFORMATION
## IMPORTANT: DELETE THE API KEY FROM THIS NOTEBOOK BEFORE UPLOADING TO GITHUB
client = pinecone.init(api_key="", environment="")

In [11]:
index = pinecone.Index("cohesive-ai")

In [12]:
## Pinecone expects a list of tuples with the key of the vector being
## first and the vector being second. As a result, we need to convert
## out Pandas dataframe to a list of tuples using the functions below.
index.upsert(list(df[["Name", "ada_embedding", "metadata"]].itertuples(index=False)), )

{'upserted_count': 5}