In [1]:
#!/usr/bin/env python
# coding: utf-8

import os
import pandas as pd
from dotenv import load_dotenv
import openai
from IPython.display import Markdown, display, update_display

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-"):
    print("An API key was found, but it doesn't start with 'sk-'; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

openai.api_key = api_key
model = 'gpt-4o-mini'

# CSV file path
file_path = 'sitemap.csv'  # Replace with your file path

def read_urls_from_csv(file_path, urls_column='URL'):
    try:
        df = pd.read_csv(file_path)
        urls_list = {index: row[urls_column] for index, row in df.iterrows()}
        return urls_list
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

urls_list = read_urls_from_csv(file_path, 'URL')
print(urls_list)

system_prompt = """You are a web application developer and security consultant. 
Your role is to analyze URLs and determine which URLs might be similar in function. Respond in markdown. Group similar URLs into groups."""

def get_url_user_prompt(urls_list):
    truncated_urls = []
    total_length = 0
    for url in urls_list.values():
        if total_length + len(url) + 1 > 5000:
            break
        truncated_urls.append(url)
        total_length += len(url) + 1
    return "Here is the list of URLs to analyze:\n" + "\n".join(truncated_urls)

def analyse_urls(urls_list):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_url_user_prompt(urls_list)}
        ]
    )
    display(Markdown(response.choices[0].message.content))

def stream_url(urls_list):
    stream = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_url_user_prompt(urls_list)}
        ],
        stream=True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.get('content', '')
        response = response.replace("```", "").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

# Example usage
analyse_urls(urls_list)
# stream_url(urls_list)  # Uncomment to use streaming

API key found and looks good so far!
{0: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/', 1: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/snippets.gtl', 2: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/newsnippet.gtl', 3: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/upload.gtl', 4: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/editprofile.gtl', 5: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/logout', 6: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/deletesnippet?index=0', 7: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/deletesnippet?index=1', 8: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/cheese.png', 9: 'https://google-gruyere.appspot.com/662787643234587727633733504808217804147/feed.gtl?uid=admin', 10: 'https://go

APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
