<a href="https://colab.research.google.com/github/riya2003-star/demo/blob/main/mozilor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai # Import Gemini library
import pandas as pd
from google.colab import files

# Set Gemini API key
genai.configure(api_key="AIzaSyBxpKU8ACgnXN-CndUmos7ZpOFYAXSpxYA") # Replace with your key

# Define website scraping function (remains the same)
def scrape_website(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        return soup.get_text(strip=True)
    except Exception as e:
        return f"Error scraping {url}: {str(e)}"

# Define keyword analysis function (remains the same)
def analyze_keywords(content):
    keywords = ["services", "web design", "agency", "marketing", "branding"]
    found_keywords = [kw for kw in keywords if kw in content.lower()]
    return found_keywords

# Define Gemini analysis function
def analyze_with_gemini(content):
    try:
        model = genai.GenerativeModel("gemini-pro") # Use Gemini model
        response = model.generate_content(
            f"Analyze the following content to determine if it belongs to an agency:\n\n{content}"
        )
        return response.text
    except Exception as e:
        return f"Error analyzing content with Gemini: {str(e)}"

# Define the main processing function
def process_websites(websites):
    results = []
    for website in websites:
        print(f"Processing: {website}")
        content = scrape_website(website)

        if "Error scraping" in content:
            results.append({
                "Website": website,
                "Keywords": "N/A",
                "AI Analysis": content,
                "Decision": "Reject (Unable to Analyze)"
            })
            continue

        found_keywords = analyze_keywords(content)
        keyword_result = ", ".join(found_keywords) if found_keywords else "No keywords found"
        ai_analysis = analyze_with_gemini(content[:1000])  # Limit to 1000 characters

        # Decision logic (remains the same)
        if found_keywords and "agency" in keyword_result:
            decision = "Onboard (Agency Identified)"
        elif found_keywords:
            decision = "Review Further (Relevant Keywords Found)"
        else:
            decision = "Reject (No Relevant Keywords)"

        results.append({
            "Website": website,
            "Keywords": keyword_result,
            "AI Analysis": ai_analysis,
            "Decision": decision
        })

    return results

# Main execution block (remains the same)
websites = [
    "https://www.digitalsilk.com/",
    "https://www.baunfire.com/",
    "https://fourbynorth.com/"
]

results = process_websites(websites)

# Save results to CSV (remains the same)
results_df = pd.DataFrame(results)
results_df.to_csv("agency_analysis_results.csv", index=False)

# Display results (remains the same)
print("\nAnalysis Results:")
print(results_df)

Processing: https://www.digitalsilk.com/
Processing: https://www.baunfire.com/
Processing: https://fourbynorth.com/

Analysis Results:
                        Website                                 Keywords  \
0  https://www.digitalsilk.com/                                      N/A   
1     https://www.baunfire.com/  services, web design, agency, marketing   
2      https://fourbynorth.com/                                 services   

                                         AI Analysis  \
0  Error scraping https://www.digitalsilk.com/: 4...   
1  Yes, the content belongs to an agency.\n\nThe ...   
2  Yes, the content belongs to an agency.\n\nThe ...   

                                   Decision  
0                Reject (Unable to Analyze)  
1               Onboard (Agency Identified)  
2  Review Further (Relevant Keywords Found)  
