<a href="https://colab.research.google.com/github/k-dinakaran/automation-of-wordpress-post-publication-using-AI-tools/blob/main/AI_driven_content_audit_tool_for_SEO_health.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install requests beautifulsoup4 nltk textstat




In [None]:
import requests
from bs4 import BeautifulSoup
import nltk
from collections import Counter
import re
import textstat  # Importing textstat for readability

# Ensure you have the necessary NLTK data
nltk.download('punkt')

def fetch_content(url):
    """Fetch content from a given URL."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.text
    else:
        raise Exception(f"Failed to fetch content from {url} with status code: {response.status_code}")

def extract_text_and_metadata(html_content):
    """Extract text and metadata from HTML content."""
    soup = BeautifulSoup(html_content, 'html.parser')
    text = soup.get_text(separator=' ', strip=True)

    title = soup.title.string if soup.title else 'No title'
    meta_description = soup.find('meta', attrs={'name': 'description'})
    meta_description = meta_description['content'] if meta_description else 'No description'

    return text, title, meta_description

def analyze_keywords(text, keywords):
    """Analyze keyword density in the text."""
    words = nltk.word_tokenize(text.lower())
    word_count = len(words)

    keyword_count = Counter(words)
    density_report = {keyword: (keyword_count[keyword.lower()] / word_count) * 100 for keyword in keywords}

    return density_report, word_count

def analyze_content_structure(soup):
    """Analyze the content structure for headings."""
    headings = {
        'H1': len(soup.find_all('h1')),
        'H2': len(soup.find_all('h2')),
        'H3': len(soup.find_all('h3')),
        'Bullets': len(soup.find_all(['ul', 'ol']))
    }
    return headings

def calculate_readability(text):
    """Calculate the readability score using textstat."""
    return textstat.flesch_kincaid_grade(text)

def generate_report(url, text, keyword_density, word_count, title, meta_description, headings, readability_score):
    """Generate SEO audit report."""
    print(f"SEO Audit Report for: {url}")
    print("=" * 40)
    print(f"Total Word Count: {word_count}")
    print(f"Readability Score (Flesch-Kincaid Grade Level): {readability_score:.2f}")
    print(f"\nTitle: {title}")
    print(f"Meta Description: {meta_description}\n")

    print("Keyword Density Report:")
    for keyword, density in keyword_density.items():
        print(f" - {keyword}: {density:.2f}%")

    print("\nContent Structure:")
    for heading, count in headings.items():
        print(f" - {heading}: {count}")

    print("=" * 40)

    # Recommendations
    if readability_score > 12:
        print("Recommendations:")
        print(" - Consider simplifying your language to improve readability.")

    if len(keyword_density) == 0 or all(density == 0 for density in keyword_density.values()):
        print(" - Consider adding relevant keywords to the content.")

    print("=" * 40)

def main():
    # Input: URL and target keywords
    url = input("Enter the URL of the content to audit: ")
    keywords = input("Enter the keywords to analyze (comma-separated): ").split(',')

    # Fetch and analyze content
    try:
        html_content = fetch_content(url)
        text, title, meta_description = extract_text_and_metadata(html_content)
        keyword_density, word_count = analyze_keywords(text, [k.strip() for k in keywords])
        soup = BeautifulSoup(html_content, 'html.parser')
        headings = analyze_content_structure(soup)
        readability_score = calculate_readability(text)
        generate_report(url, text, keyword_density, word_count, title, meta_description, headings, readability_score)
    except Exception as e:
        print(str(e))

if __name__ == "__main__":
    main()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Enter the URL of the content to audit: https://temstechsolutions.com/
Enter the keywords to analyze (comma-separated): hiring,AI,jobs
SEO Audit Report for: https://temstechsolutions.com/
Total Word Count: 576
Readability Score (Flesch-Kincaid Grade Level): 32.90

Title: Home - TEMS Tech Solutions
Meta Description: No description

Keyword Density Report:
 - hiring: 0.00%
 - AI: 0.35%
 - jobs: 0.35%

Content Structure:
 - H1: 0
 - H2: 7
 - H3: 0
 - Bullets: 28
Recommendations:
 - Consider simplifying your language to improve readability.
