In [48]:
!pip install -qU \
pinecone-client==3.1.0\
tenacity==8.2.3




In [13]:
import json
import os

baseFolder = '/Users/shota/github/content-ge/startup-school'
nodesJSONFile = os.path.join(baseFolder, 'nodes.json')
f = open(nodesJSONFile)
nodes = json.load(f)
f.close()


In [46]:
data = []
for key, node in nodes.items():
    textFileName = os.path.join(baseFolder, 'nodes', key, "text1.md")
    with open(textFileName, 'r') as file:
        text = file.read()
        data.append({
            'id': key,
            'name': node['name'],
            'text': text
        })


In [56]:
from openai import OpenAI
orgID = %env CHAT_GPT_ORGANIZATION_ID
api_key = %env CHAT_GPT_API_KEY

openai_client = OpenAI(
    api_key = api_key,
    organization=orgID,
)



In [61]:
from tenacity import retry, wait_random_exponential, stop_after_attempt

# Retry up to 6 times with exponential backoff, starting at 1 second and maxing out at 20 seconds delay
@retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embedding(text: str, model="text-embedding-3-small") -> list[float]:
    response = openai_client.embeddings.create(input=[text], model=model)
    return response.data[0].embedding

def get_vector(d):
    emb = get_embedding(d['text'])
    return {
        "id": d['id'],
        "values": emb,
        "metadata": {"name": d['name']}
    }

def create_vector_json(data):
    vectors = []
    for d in data:
        vec = get_vector(d)
        vectors.append(vec)
    with open("vectors.json", "w") as f:
        json.dump(vectors, f)
        
create_vector_json(data)


In [81]:
from pinecone import Pinecone

# pinecone_api_key = %env PINECONE_API_KEY
pinecone_api_key = os.environ.get('PINECONE_API_KEY') or ''
pc = Pinecone(api_key=pinecone_api_key)
index_name = 'vitsi-ai-first-index'
index = pc.Index(index_name)
index.describe_index_stats()



{'dimension': 1536,
 'index_fullness': 0.00244,
 'namespaces': {'': {'vector_count': 244}},
 'total_vector_count': 244}

In [69]:

f = open("vectors.json")
vectors = json.load(f)
f.close()

# vectors
index.upsert(vectors)





{'upserted_count': 202}

In [71]:
f=open("intents.json")
intents = json.load(f)
f.close();

intents

[{'id': 'show_video_1', 'message': 'show me a video', 'intent': 'show_video'},
 {'id': 'show_video_2',
  'message': 'Could you play the video?',
  'intent': 'show_video'},
 {'id': 'show_video_3',
  'message': "I'd like to see the video, please.",
  'intent': 'show_video'},
 {'id': 'show_video_4',
  'message': 'Can you put on the video?',
  'intent': 'show_video'},
 {'id': 'show_video_5',
  'message': "Let's watch the video.",
  'intent': 'show_video'},
 {'id': 'show_video_6',
  'message': 'Would you mind displaying the video?',
  'intent': 'show_video'},
 {'id': 'show_video_7',
  'message': "I'm interested in viewing the video.",
  'intent': 'show_video'},
 {'id': 'show_video_8',
  'message': 'Could we check out the video?',
  'intent': 'show_video'},
 {'id': 'show_video_9',
  'message': 'Please, present the video.',
  'intent': 'show_video'},
 {'id': 'show_video_10',
  'message': "I'm keen to watch the video.",
  'intent': 'show_video'},
 {'id': 'show_video_11',
  'message': 'Can we h

In [74]:
def get_intents_vector(d):
    emb = get_embedding(d['message'])

    return {
        "id": d['id'],
        "values": emb,
        "metadata": {"intent": d['intent']}
    }

def create_intents_vector_json(data):
    vectors = []
    for d in intents:
        vec = get_intents_vector(d)
        vectors.append(vec)
    with open("intents_vectors.json", "w") as f:
        json.dump(vectors, f)
        
create_intents_vector_json(data)

In [78]:
f = open("intents_vectors.json")
vectors = json.load(f)
f.close()

# vectors
index.upsert(vectors)


{'upserted_count': 42}