In [1]:
import requests
from bs4 import BeautifulSoup

def get_urls_from_sitemap(sitemap_url):
    response = requests.get(sitemap_url)
    soup = BeautifulSoup(response.content, 'xml')
    urls = [loc.text for loc in soup.find_all('loc')]
    return urls

def keywords_in_url(url, keywords):
    response = requests.get(url)
    keywords_found = []
    for keyword in keywords:
        if keyword.lower() in response.text.lower():
            keywords_found.append(keyword)
    return keywords_found

def load_keywords_from_file(filename):
    with open(filename, 'r') as file:
        return [line.strip() for line in file if line.strip()]

def main():
    sitemap_url = 'https://www.davidkolbconsultancy.com/pages-sitemap.xml'  # Replace with the actual sitemap URL
    keywords = load_keywords_from_file('keywords.txt')
    keywords_not_found = set(keywords)
    keyword_counts = {keyword: 0 for keyword in keywords}

    urls = get_urls_from_sitemap(sitemap_url)
    for url in urls:
        found_keywords = keywords_in_url(url, keywords)
        for keyword in found_keywords:
            keyword_counts[keyword] += 1
            keywords_not_found.discard(keyword)

    print("Keyword counts:")
    for keyword, count in keyword_counts.items():
        print(f"{keyword}: {count}")

    if keywords_not_found:
        print("\nExceptions:")
        for keyword in keywords_not_found:
            print(f"Keyword '{keyword}' was not found in any URL.")

if __name__ == '__main__':
    main()


Keyword counts:
creativity: 7
digital strategy: 0
banks with online banking: 0
fixed mindset: 0
fintech: 1
growth mindset: 1
innovation: 7
leaders in transformation: 0
leadership and transformation: 0
learning in machine learning: 0
machine learning: 0
online banks: 0
problem solving: 1
problem solving skills: 0
prototyping: 1
transformational leadership: 0

Exceptions:
Keyword 'learning in machine learning' was not found in any URL.
Keyword 'banks with online banking' was not found in any URL.
Keyword 'fixed mindset' was not found in any URL.
Keyword 'leaders in transformation' was not found in any URL.
Keyword 'leadership and transformation' was not found in any URL.
Keyword 'machine learning' was not found in any URL.
Keyword 'transformational leadership' was not found in any URL.
Keyword 'online banks' was not found in any URL.
Keyword 'problem solving skills' was not found in any URL.
Keyword 'digital strategy' was not found in any URL.
