In [10]:
import tkinter as tk
from tkinter import scrolledtext, messagebox
import requests
from bs4 import BeautifulSoup
import openai
import json

def scrape_website(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')
        links = [a['href'] for a in soup.find_all('a', href=True)]
    
        content = {}
        for link in links:
            if link.startswith('/'):
                link = url.rstrip('/') + link  # Handle relative URLs
            try:
                page = requests.get(link)
                page_soup = BeautifulSoup(page.text, 'html.parser')
                page_text = page_soup.get_text(strip=True)
                content[link] = page_text
            except Exception as e:
                print(f"Failed to retrieve content from {link}: {e}")

        return content
    except Exception as e:
        messagebox.showerror("Scraping Error", str(e))
        return {}

def generate_questions(content, num_questions=10):
    try:
        messages = [
            {"role": "system", "content": "You are a helpful assistant that generates concise questions."},
            {"role": "user", "content": f"Generate {num_questions} concise questions (each under 80 characters) based on the following content:\n\n{content}\n\nQuestions:"}
        ]

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            max_tokens=300,
            n=1,
            temperature=0.7
        )

        questions = response['choices'][0]['message']['content'].strip().split("\n")
        return [q.strip() for q in questions if len(q.strip()) > 0]
    except Exception as e:
        messagebox.showerror("Generation Error", str(e))
        return []

def get_relevant_links(main_link, all_links, main_content, num_links=5):
    try:
        main_words = set(main_content.split())
        relevant_links = []
        for link, content in all_links.items():
            if link == main_link:
                continue
            score = sum(1 for word in content.split() if word in main_words)
            relevant_links.append((score, link))

        relevant_links.sort(reverse=True)
        return [link for _, link in relevant_links[:num_links]]
    except Exception as e:
        messagebox.showerror("Link Extraction Error", str(e))
        return []

def process_website():
    api_key = api_key_entry.get().strip()
    url = url_entry.get().strip()
    
    if not api_key:
        messagebox.showerror("Input Error", "Please enter your OpenAI API key.")
        return
    
    if not url:
        messagebox.showerror("Input Error", "Please enter a website URL.")
        return
    
    openai.api_key = api_key
    
    # Display the wait message
    wait_message_label.config(text="Please wait, this might take 2 to 5 minutes...")
    app.update_idletasks()  # Ensure the message is updated before processing begins
    
    try:
        scraped_content = scrape_website(url)

        output = {"webpages": []}

        for link, content in scraped_content.items():
            questions = generate_questions(content)
            relevant_links = get_relevant_links(link, scraped_content, content)
            topics = list(set(content.split()))[:5]

            webpage_info = {
                "url": link,
                "questions": questions[:10],
                "relevant_links": relevant_links[:5],
                "topics": topics
            }

            output["webpages"].append(webpage_info)

        result_text.delete(1.0, tk.END)
        result_text.insert(tk.END, json.dumps(output, indent=4))

    except Exception as e:
        messagebox.showerror("Processing Error", str(e))
    
    # Clear the wait message after processing is done
    wait_message_label.config(text="Processing complete!")

# Create the main application window
app = tk.Tk()
app.title("Website Content Analyzer: GEN AI PS")
app.geometry("800x600")

# Create and place widgets
frame = tk.Frame(app)
frame.pack(expand=True, fill=tk.BOTH, padx=10, pady=10)

api_key_label = tk.Label(frame, text="OpenAI API Key:")
api_key_label.pack(pady=5)

api_key_entry = tk.Entry(frame, width=80, show='*')
api_key_entry.pack(pady=5)

url_label = tk.Label(frame, text="Website URL and wait for 2 to 5 minutes:")
url_label.pack(pady=5)

url_entry = tk.Entry(frame, width=80)
url_entry.pack(pady=5)

process_button = tk.Button(frame, text="Process", command=process_website)
process_button.pack(pady=10)

result_text = scrolledtext.ScrolledText(frame, wrap=tk.WORD, width=100, height=25)
result_text.pack(expand=True, fill=tk.BOTH, pady=5)

# Wait message label
wait_message_label = tk.Label(frame, text="")
wait_message_label.pack(pady=5)

# Start the Tkinter event loop
app.mainloop()
