In [None]:
%pip install requests beautifulsoup4 openai

In [None]:
import requests
from bs4 import BeautifulSoup
import time
import logging
from openai import OpenAI

In [None]:
# Load environment variables from .env file
load_dotenv()

In [None]:
# Set OpenAI and SERP API keys

openai_api_key = os.getenv('openai_api_key')
serp_api_key = os.getenv('serp_api_key')

In [None]:
# Initialize the OpenAI client with API key
client = OpenAI(api_key=openai_api_key)

In [None]:
# Set up logging
logging.basicConfig(
    level=logging.INFO,  # Capture INFO level and above
    format='%(asctime)s - %(levelname)s - %(message)s'  # Simple format with timestamp, log level, and message
)

In [None]:
# Define scraping function to extract content for a given URL
def scrape_targeted_content(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for HTTP errors
        soup = BeautifulSoup(response.text, 'html.parser')

        # Extract basic content
        #title = soup.title.string if soup.title else ''
        #meta_description = soup.find('meta', attrs={'name': 'description'})
        #meta_description = meta_description['content'] if meta_description else ''
        body_text = ' '.join(p.get_text() for p in soup.find_all('p'))
        
        return {
            #"title": title,
            #"meta_description": meta_description,
            "body_text": body_text
        }
    except Exception as e:
        logging.error(f"Error scraping {url}: {e}")
        return {}

In [None]:
# Define Search function using SERP API

def search_company(company_name):
    search_url = "https://serpapi.com/search"
    params = {
        "engine": "google",
        "q": company_name,
        "api_key": serp_api_key  # Use the SERP API key here
    }
    response = requests.get(search_url, params=params)
    
    if response.status_code == 200:
        search_results = response.json()
        results = []
        for result in search_results.get('organic_results', [])[:3]:  # Limit to top 3 results
            results.append({
                'url': result.get('link'),
                'text': result.get('snippet', '')
            })
        return results
    else:
        logging.error(f"Request failed with status code: {response.status_code}")
        return None

In [None]:
# Define a function to summarize search results' content extracted from scraping using Open AI

def summarize_search_results(search_results):
    all_texts = []
    for result in search_results:
        content = scrape_targeted_content(result['url'])
        text = content.get("body_text", "")
        if text:
            all_texts.append(text)
        time.sleep(1)  # Respectful delay between requests
    
    combined_text = ' '.join(all_texts)
    max_length = 1000  # Maximum token limit for GPT-4 is around 4096 tokens
    if len(combined_text) > max_length:
        combined_text = combined_text[:max_length]
    
    print(f"Combined text for summarization:\n{combined_text[:1000]}...")  # Print only the first 500 characters for brevity
    
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"Summarize the following text: {combined_text}"}
        ],
        model="gpt-4",
        max_tokens=300,
        temperature=0.7
    )
    
    #summary = response['choices'][0]['message']['content'].strip()
    summary = response.choices[0].message.content
    return summary

In [None]:
# Combine the functions to create an AI sales agent

def ai_sales_agent(company_name):
    search_results = search_company(company_name)
    if search_results:
        for result in search_results:
            content = scrape_targeted_content(result['url'])
            #print(f"URL: {result['url']}\nTitle: {content['title']}\nMeta Description: {content['meta_description']}\nBody Text: {content['body_text'][:200]}...")  # Print only the first 200 characters of the body text
        
        summary = summarize_search_results(search_results)
        results_output = "\n\n".join([f"URL: {result['url']}\nText: {result['text']}" for result in search_results])
        return f"Summary for {company_name}:\n{summary}\n\nURLs identified:\n{results_output}"
    else:
        return "No results found for the company name."

In [None]:
# Run the AI sales agent for a given company name

company_name = "Shell"  # Replace with the company name you want to search for
summary = ai_sales_agent(company_name)
print(f"Summary for {company_name}:\n{summary}")