In [3]:
import openai
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import os
import requests

# Load environment variables
load_dotenv()

def get_api_key(email):
    url = "http://52.66.239.27:8504/get_keys"
    payload = {"email": email}
    response = requests.post(url, json=payload)
    return response.json().get('key')

# Fetch and set OpenAI API key
email = os.getenv('EMAIL')
openai.api_key = get_api_key(email)

# Function to load scraped data
def load_data(filename='scraped_data.csv'):
    return pd.read_csv(filename)

# Function to get embeddings for a given text using the latest API format
def get_embedding(text):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": text}],
        max_tokens=100
    )
    return response.choices[0].message['content']

# Function to process the data and get embeddings
def process_data(df):
    df['embedding'] = df['summary'].apply(get_embedding)
    return df

# Function to save embeddings to a file
def save_embeddings(df, filename='embeddings.csv'):
    df.to_csv(filename, index=False)
    print(f"Embeddings saved to {filename}")

if __name__ == '__main__':
    # Load scraped data
    data = load_data()

    # Process data to get embeddings
    data_with_embeddings = process_data(data)

    # Save embeddings to a file
    save_embeddings(data_with_embeddings)


Embeddings saved to embeddings.csv
