In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
import os
import pandas as pd

ahrefs = pd.read_csv('ahref_urls.csv')
ahref_urls = ahrefs['URL'].tolist()
ahref_urls

 'https://help.ahrefs.com/en/articles/2754358-what-does-the-noindex-page-in-sitemap-issue-in-site-audit-mean',
 'https://help.ahrefs.com/en/articles/2427853-what-does-canonical-points-to-4xx-issue-in-site-audit-mean',
 'https://help.ahrefs.com/en/articles/2754354-what-does-the-redirect-loop-issue-in-site-audit-mean',
 'https://help.ahrefs.com/en/articles/2754350-what-does-the-redirected-page-has-no-incoming-internal-links-issue-in-site-audit-mean',
 'https://help.ahrefs.com/en/articles/2754344-what-does-the-self-referencing-hreflang-annotation-missing-issue-in-site-audit-mean',
 'https://help.ahrefs.com/en/articles/2754309-what-does-the-timed-out-issue-in-site-audit-mean',
 'https://help.ahrefs.com/en/articles/2747339-page-referenced-for-more-than-one-language-in-hreflang-error-in-site-audit',
 'https://help.ahrefs.com/en/articles/2753767-canonical-points-to-redirect-error-in-site-audit',
 'https://help.ahrefs.com/en/articles/2753767-canonical-points-to-redirect-error-in-site-audit',
 

In [2]:
data = {}

browser = Browser('chrome', headless=True)
for url in ahref_urls:
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    title = str(soup.find('title'))
    h_tags = ' '.join([str(tag) for tag in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])])
    p_tags = ' '.join([str(tag) for tag in soup.find_all('p')])
    print('URL:', url)
    print('Title:', title)
    print('H tags:', h_tags)
    print('Paragraphs:', p_tags)
    
    # Concatenate the title, h_tags, and p_tags and store them in the dictionary
    data[url] = {'Content': ' '.join([title, h_tags, p_tags])}

# Convert the dictionary to a DataFrame
df = pd.DataFrame.from_dict(data, orient='index')

# Reset the index to move the URLs into a column
df.reset_index(inplace=True)

# Rename the columns
df.columns = ['URL', 'Content']

df


H tags: <h2 id="what-triggers-this-issue">What triggers this issue?</h2> <h2 id="why-is-it-important">Why is it important?</h2> <h2 id="how-to-fix-it">How to fix it?</h2>
Paragraphs: <p>Learn more about the "CSS broken" issue in Ahrefs' Site Audit and how to fix it on your website.</p> <p>This issue reports CSS files that return one of the 4xx or 5xx status codes.</p> <p> </p> <p>CSS files are plain-text files used for formatting content on web pages.</p> <p> </p> <p>If a CSS file cannot be accessed, the content on your web page will not be rendered the way it was supposed to, damaging the user experience on your website.</p> <p> </p> <p>These CSS files could have been deleted, moved or renamed. Also, the external website hosting the file could have been unavailable during the crawl.</p> <p> </p> <p>Replace, fix or remove links to the broken CSS files on your pages.</p> <p> </p> <p>To get the list of pages that reference the broken internal or external CSS file, click on the number in 

Unnamed: 0,URL,Content
0,https://help.ahrefs.com/en/articles/2405181-3x...,"<title>""3xx redirect in sitemap"" error in Site..."
1,https://help.ahrefs.com/en/articles/2427783-30...,"<title>""302 redirect"" warning in Site Audit | ..."
2,https://help.ahrefs.com/en/articles/2427853-wh...,<title>What does 'Canonical points to 4XX' iss...
3,https://help.ahrefs.com/en/articles/2456799-br...,"<title>""Broken redirect"" error in Site Audit |..."
4,https://help.ahrefs.com/en/articles/2491113-ca...,"<title>""Canonical points to 5XX"" error in Site..."
5,https://help.ahrefs.com/en/articles/2582744-3x...,"<title>""3xx page receives organic traffic"" err..."
6,https://help.ahrefs.com/en/articles/2585460-40...,"<title>""403 page in sitemap"" error in Site Aud..."
7,https://help.ahrefs.com/en/articles/2586152-40...,"<title>""403 page receives organic traffic"" err..."
8,https://help.ahrefs.com/en/articles/2586375-4x...,"<title>""4xx page in sitemap"" error in Site Aud..."
9,https://help.ahrefs.com/en/articles/2586436-4x...,"<title>""4xx page receives organic traffic"" err..."


In [16]:
# df.to_csv('content.csv', index=False)

In [4]:
from openai import OpenAI
from config import openai_api_key

client = OpenAI(
  api_key= openai_api_key,  
)

# Define the prompt
prompt = ("rewrite this content maintaining the technical detail but answering the following questions well: an explanation of the issue, "
"why does it matter, how to fix or correct it, and further resources with links to places where the client/reader can learn more. "
"This content will be posted on the Loud Interactive blog located on the Loud.us domain and Brent D. Payne will be listed as the author. "
"Write in Brent D. Payne's style using the pronouns of we/us however. Write from the perspective of Loud Interactive doing the SEO audit. "
"Maintain the resource links if provided. Output it in plain text maintaining formats of bold, bullets, italics, etc. "
"AND you MUST maintain the links to further resources or additional information (if they exist). Here is the page to rewrite:")

# Assuming df is your DataFrame and it's already defined
df['Summary'] = ''

for index, row in df.iterrows():
    content = row['Content']
    
    # Generate the summary with the appropriate engine for GPT-3.5 Turbo
    response = openai.ChatCompletion.create(
      model="gpt-3.5-turbo",  # Adjust as necessary for the model you're using
      messages=[
          {"role": "system", "content": "You are a helpful assistant."},
          {"role": "user", "content": f"{prompt}\n{content}"}
      ],
      temperature=0.3,
      max_tokens=2000,  # Adjust max_tokens if needed
      top_p=1,
      frequency_penalty=0,
      presence_penalty=0
    )
    
    # Store the summary in the DataFrame
    df.at[index, 'Summary'] = response['choices'][0]['message']['content'].strip()


NameError: name 'openai' is not defined