In [11]:
pip install requests beautifulsoup4



In [3]:
import requests
from bs4 import BeautifulSoup
import json
from getpass import getpass
import time

# Credly login URL and dashboard URL
login_url = "https://www.credly.com/users/sign_in"
dashboard_url = "https://www.credly.com/users/earned"

# Create a session to maintain cookies
session = requests.Session()

# Set headers to mimic a browser
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Referer": "https://www.credly.com/",
    "Connection": "keep-alive"
}

def login_to_credly(email, password):
    try:
        # Get the login page to retrieve CSRF token
        login_page = session.get(login_url, headers=headers, allow_redirects=True)
        if not login_page.ok:
            print(f"Failed to load login page. Status code: {login_page.status_code}")
            return False

        soup = BeautifulSoup(login_page.text, "html.parser")

        # Try to find CSRF token
        csrf_token_input = soup.find("input", {"name": "authenticity_token"})
        if csrf_token_input is None:
            print("CSRF token not found. Inspecting page HTML...")
            # Save HTML for debugging
            with open("login_page.html", "w", encoding="utf-8") as f:
                f.write(login_page.text)
            print("Login page HTML saved to 'login_page.html'. Please inspect it for the CSRF token field.")
            # Try alternative token names or meta tags
            meta_token = soup.find("meta", {"name": "csrf-token"})
            if meta_token and "content" in meta_token.attrs:
                csrf_token = meta_token["content"]
                print("Found CSRF token in meta tag.")
            else:
                print("No CSRF token found in input or meta tags.")
                return False
        else:
            csrf_token = csrf_token_input["value"]
            print("CSRF token found.")

        # Create payload for login
        payload = {
            "user[email]": email,
            "user[password]": password,
            "authenticity_token": csrf_token,
            "commit": "Sign in"
        }

        # Add delay to avoid rate limiting
        time.sleep(1)

        # Perform login
        response = session.post(login_url, data=payload, headers=headers, allow_redirects=True)

        # Check if login was successful
        if response.ok and "sign_out" in response.text.lower():
            print("Login successful!")
            return True
        else:
            print(f"Login failed. Status code: {response.status_code}. Check credentials or inspect 'login_page.html'.")
            return False

    except Exception as e:
        print(f"Error during login: {str(e)}")
        return False

def scrape_badges():
    try:
        # Add delay to ensure session is stable
        time.sleep(1)

        # Access the dashboard
        dashboard_page = session.get(dashboard_url, headers=headers, allow_redirects=True)

        if not dashboard_page.ok:
            print(f"Failed to access dashboard. Status code: {dashboard_page.status_code}")
            return []

        # Parse the dashboard HTML
        soup = BeautifulSoup(dashboard_page.text, "html.parser")

        # Find badge elements (adjust selector based on Credly's HTML structure)
        badge_elements = soup.find_all("div", class_="cr-public-earned-badge-grid-item")

        if not badge_elements:
            print("No badges found. Saving dashboard HTML to 'dashboard_page.html' for inspection.")
            with open("dashboard_page.html", "w", encoding="utf-8") as f:
                f.write(dashboard_page.text)
            return []

        badges = []
        for badge in badge_elements:
            # Extract badge name
            badge_name_tag = badge.find("h3") or badge.find("div", class_="cr-public-earned-badge-grid-item__title")
            badge_name = badge_name_tag.text.strip() if badge_name_tag else "Unknown Badge"

            # Extract badge URL
            badge_url_tag = badge.find("a", href=True)
            badge_url = badge_url_tag["href"] if badge_url_tag else None
            if badge_url and not badge_url.startswith("http"):
                badge_url = "https://www.credly.com" + badge_url

            badges.append({"name": badge_name, "url": badge_url})

        return badges

    except Exception as e:
        print(f"Error during badge scraping: {str(e)}")
        return []

def main():
    # Prompt for credentials
    print("Enter your Credly login credentials:")
    email = input("Email: ")
    password = getpass("Password: ")

    # Log in to Credly
    if not login_to_credly(email, password):
        return

    # Scrape badge data
    badges = scrape_badges()

    if not badges:
        print("No badges found or error occurred.")
        return

    # Print badges and save to JSON
    print("\nFound Badges:")
    for badge in badges:
        print(f"Badge: {badge['name']}, URL: {badge['url']}")

    # Save to JSON file
    with open("credly_badges.json", "w", encoding="utf-8") as f:
        json.dump(badges, f, indent=4)
    print("\nBadges saved to credly_badges.json")

if __name__ == "__main__":
    main()

Enter your Credly login credentials:
Email: rk98991439@gmail.com
Password: ··········
CSRF token not found. Inspecting page HTML...
Login page HTML saved to 'login_page.html'. Please inspect it for the CSRF token field.
Found CSRF token in meta tag.
Login successful!
No badges found. Saving dashboard HTML to 'dashboard_page.html' for inspection.
No badges found or error occurred.


In [1]:
pip install selenium beautifulsoup4 requests



In [29]:
from bs4 import BeautifulSoup
import json

# Read the HTML file
with open('badges.html', 'r', encoding='utf-8') as file:
    html_content = file.read()

# Parse HTML with BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Find all badge cards
badge_cards = soup.find_all('div', role='button', attrs={'data-testid': 'desktop-badge-card'})

badges = []
for card in badge_cards:
    # Extract badge name
    name_tag = card.find('span', class_='Typographystyles__Container-fredly__sc-1jldzrm-0 enJnLg EarnedBadgeCardstyles__BadgeNameText-fredly__sc-gsqjwh-7 hqrelZ')
    badge_name = name_tag.text.strip() if name_tag else 'Unknown Badge'

    # Extract relative URL
    relative_url = card.get('href')

    # Construct full URL with correct base URL
    full_url = f"https://www.credly.com{relative_url}" if relative_url else None

    badges.append({'name': badge_name, 'url': full_url})

# Print badges
print("\nFound Badges:")
for badge in badges:
    print(f"Badge: {badge['name']}, URL: {badge['url']}")

# Save to JSON file
with open("credly_badges.csv", "w", encoding="utf-8") as f:
    json.dump(badges, f, indent=4)
print("\nBadges saved to credly_badges.csv")


Found Badges:
Badge: Generative AI Essentials, URL: https://www.credly.com/badges/088c28ae-9e57-427e-8a3e-adb439d59a40
Badge: Generative AI: Prompt Engineering, URL: https://www.credly.com/badges/da5b7c6a-f231-42fc-aec4-59c136739467
Badge: Statistics Essentials Using Excel, URL: https://www.credly.com/badges/e6d30d6c-e66e-47ab-872f-16650741315c
Badge: Generative AI Essentials for Data Analytics, URL: https://www.credly.com/badges/368502f7-d95f-46eb-b327-1027dc98aeb9
Badge: IBM Data Analyst Professional Certificate (V3), URL: https://www.credly.com/badges/31a81f62-640a-4d65-ad88-78cf56c15584
Badge: Data Analysis with Python, URL: https://www.credly.com/badges/3eff670d-8d11-4b52-93f4-e3f38356eaf6
Badge: Data Analyst Capstone Project, URL: https://www.credly.com/badges/7fa23fc7-d7a8-48d8-b418-14474e5856f7
Badge: Databases and SQL for Data Science, URL: https://www.credly.com/badges/01f5ed90-fe23-48dc-b57a-6c0b4bb1c97c
Badge: Python for Data Science and AI, URL: https://www.credly.com/bad