In [13]:
import requests
import pandas as pd
import time

def fetch_courses(limit_per_page=100, max_pages=5):
    base_url = "https://api.coursera.org/api/courses.v1"
    
    fields = "name,description,slug,level,primaryLanguages,workload,domainTypes,certificates"
    
    all_courses = []
    start = 0
    page_count = 0
    
    print(f"Starting fetch process...")
    
    while page_count < max_pages:
        params = {
            "start": start,
            "limit": limit_per_page,
            "fields": fields
        }
        
        try:
            response = requests.get(base_url, params=params)
            
            if response.status_code == 200:
                data = response.json()
                elements = data.get('elements', [])
                
                if not elements:
                    print("No more data available.")
                    break
                
                filtered_count = 0
                for item in elements:
                    languages = item.get("primaryLanguages", [])
                    if 'en' not in languages:
                        continue

                    domains = item.get("domainTypes", [])
                    if domains:
                        category = domains[0].get("subdomainId") or domains[0].get("domainId") or "General"
                    else:
                        category = "General"

                    certs = item.get("certificates", [])
                    cert_str = ", ".join(certs) if certs else "Standard Course Certificate"

                    course_info = {
                        "id": item.get("id"),
                        "title": item.get("name"),
                        "description": item.get("description"),
                        "level": item.get("level", "Not Specified"),
                        "duration": item.get("workload", "Self-paced"),
                        "category": category,
                        "certificate_type": cert_str,
                        "url": f"https://www.coursera.org/learn/{item.get('slug')}"
                    }
                    all_courses.append(course_info)
                    filtered_count += 1
                
                print(f"Page {page_count + 1}: Kept {filtered_count} courses.")
                
                if 'paging' in data and 'next' in data['paging']:
                    start = int(data['paging']['next'])
                    page_count += 1
                else:
                    break
                
                time.sleep(1)
                
            else:
                print(f"Error: {response.status_code}")
                break
                
        except Exception as e:
            print(f"Exception: {e}")
            break

    return all_courses

def save_to_csv(courses, filename="coursera_dataset.csv"):
    if not courses:
        print("No data to save.")
        return

    df = pd.DataFrame(courses)
    df = df.dropna(subset=['description']) 
    
    if 'category' in df.columns:
        df['category'] = df['category'].str.replace('-', ' ').str.title()

    df.to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"Saved {len(df)} courses to {filename}")

if __name__ == "__main__":
    courses = fetch_courses(max_pages=5)
    save_to_csv(courses)

Starting fetch process...
Page 1: Kept 79 courses.
Page 2: Kept 76 courses.
Page 3: Kept 70 courses.
Page 4: Kept 78 courses.
Page 5: Kept 86 courses.
Saved 389 courses to coursera_dataset.csv


In [None]:
import pandas as pd

# 1. ‡∏≠‡πà‡∏≤‡∏ô‡πÑ‡∏ü‡∏•‡πå CSV
df = pd.read_csv("coursera_dataset.csv")

pd.set_option('display.max_colwidth', 50)  # ‡πÉ‡∏´‡πâ‡πÇ‡∏ä‡∏ß‡πå‡πÅ‡∏Ñ‡πà 50 ‡∏ï‡∏±‡∏ß‡∏≠‡∏±‡∏Å‡∏©‡∏£‡∏û‡∏≠ ‡πÄ‡∏î‡∏µ‡πã‡∏¢‡∏ß‡∏•‡πâ‡∏ô‡∏à‡∏≠
df

In [None]:
pip install langchain-chroma langchain-huggingface chromadb

In [None]:
import pandas as pd
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
import os

def build_database(csv_path="coursera_dataset.csv", db_path="./vector_store"):
    if not os.path.exists(csv_path):
        print(f"File not found: {csv_path}")
        return

    df = pd.read_csv(csv_path)
    
    # ‡πÉ‡∏ä‡πâ Model ‡∏ü‡∏£‡∏µ‡∏Ç‡∏≠‡∏á HuggingFace ‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏ö‡∏ô CPU ‡πÑ‡∏î‡πâ ‡πÑ‡∏°‡πà‡∏ï‡πâ‡∏≠‡∏á‡πÉ‡∏ä‡πâ API Key
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

    documents = []
    print(f"Processing {len(df)} courses...")

    for _, row in df.iterrows():
        # ‡∏£‡∏ß‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏à‡∏∞‡πÉ‡∏´‡πâ AI ‡πÉ‡∏ä‡πâ‡∏Ñ‡πâ‡∏ô‡∏´‡∏≤‡∏Ñ‡∏ß‡∏≤‡∏°‡∏´‡∏°‡∏≤‡∏¢
        content = f"""
        Title: {row['title']}
        Category: {row['category']}
        Level: {row['level']}
        Description: {row['description']}
        """
        
        # ‡πÄ‡∏Å‡πá‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ô‡∏≥‡πÑ‡∏õ‡πÅ‡∏™‡∏î‡∏á‡∏ú‡∏• (AI ‡πÑ‡∏°‡πà‡πÄ‡∏≠‡∏≤‡πÑ‡∏õ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì ‡πÅ‡∏ï‡πà‡πÄ‡∏Å‡πá‡∏ö‡πÑ‡∏ß‡πâ‡πÉ‡∏´‡πâ)
        metadata = {
            "id": str(row['id']),
            "title": row['title'],
            "url": row['url'],
            "duration": str(row['duration']),
            "certificate": str(row['certificate_type']),
            "level": str(row['level'])
        }

        doc = Document(page_content=content.strip(), metadata=metadata)
        documents.append(doc)

    print("Creating Vector Database...")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á DB ‡πÅ‡∏•‡∏∞‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏•‡∏á‡πÇ‡∏ü‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå (Persist)
    Chroma.from_documents(
        documents=documents,
        embedding=embedding_model,
        persist_directory=db_path
    )
    
    print(f"Database saved to {db_path}")

def test_search(query, db_path="./vector_store"):
    print(f"\nTesting Search: '{query}'")
    
    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = Chroma(persist_directory=db_path, embedding_function=embedding_model)
    
    # ‡∏Ñ‡πâ‡∏ô‡∏´‡∏≤ 3 ‡∏≠‡∏±‡∏ô‡∏î‡∏±‡∏ö‡πÅ‡∏£‡∏Å‡∏ó‡∏µ‡πà‡πÉ‡∏Å‡∏•‡πâ‡πÄ‡∏Ñ‡∏µ‡∏¢‡∏á‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î
    results = db.similarity_search(query, k=3)
    
    for i, doc in enumerate(results):
        print(f"{i+1}. {doc.metadata['title']} ({doc.metadata['level']})")
        print(f"   Link: {doc.metadata['url']}")
        print("-" * 20)

if __name__ == "__main__":
    # 1. ‡∏™‡∏£‡πâ‡∏≤‡∏á DB
    build_database()
    
    # 2. ‡∏•‡∏≠‡∏á‡∏Ñ‡πâ‡∏ô‡∏´‡∏≤‡∏î‡∏π
    test_search("I want to learn about Python for Data Science")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


Processing 389 courses...
Creating Vector Database...
Database saved to ./vector_store

Testing Search: 'I want to learn about Python for Data Science'
1. Python Programming Fundamentals (BEGINNER)
   Link: https://www.coursera.org/learn/microsoft-python-programming-fundamentals
--------------------
2. Intermediate Python ‚Äì Libraries, Tools & Practical Projects (INTERMEDIATE)
   Link: https://www.coursera.org/learn/packt-intermediate-python-libraries-tools-and-practical-projects-0d9as
--------------------
3. Data Visualization and Modeling in Python (INTERMEDIATE)
   Link: https://www.coursera.org/learn/python-data-modeling
--------------------


In [4]:
test_search("i want to learn TailwindCSS")


Testing Search: 'i want to learn TailwindCSS'
1. Tailwind CSS Practice Project: Build a Product Card (INTERMEDIATE)
   Link: https://www.coursera.org/learn/build-a-product-card-with-tailwind-css
--------------------
2. Create a Dark Moody Atmospheric 2D Game with Unity and C# (BEGINNER)
   Link: https://www.coursera.org/learn/packt-create-a-dark-moody-atmospheric-2d-game-with-unity-and-c-2qqm9
--------------------
3. Unix System Overview and Command (INTERMEDIATE)
   Link: https://www.coursera.org/learn/unix-system-overview-and-command
--------------------


In [11]:
import os

try:
    current_path = os.path.dirname(os.path.abspath(__file__))
except NameError:
    current_path = os.getcwd() # ‡∏Å‡∏±‡∏ô‡πÄ‡∏´‡∏ô‡∏µ‡∏¢‡∏ß‡πÄ‡∏ú‡∏∑‡πà‡∏≠‡∏£‡∏±‡∏ô‡πÉ‡∏ô Notebook

project_root = current_path
    
    # 2. ‡∏ß‡∏ô‡∏•‡∏π‡∏õ‡πÄ‡∏î‡∏¥‡∏ô‡∏ñ‡∏≠‡∏¢‡∏´‡∏•‡∏±‡∏á‡∏Ç‡∏∂‡πâ‡∏ô‡πÑ‡∏õ‡πÄ‡∏£‡∏∑‡πà‡∏≠‡∏¢‡πÜ ‡∏à‡∏ô‡∏Å‡∏ß‡πà‡∏≤‡∏à‡∏∞‡πÄ‡∏à‡∏≠‡πÇ‡∏ü‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå‡∏ä‡∏∑‡πà‡∏≠ 'data'
while True:
    possible_data_path = os.path.join(project_root, 'data')
    
    if os.path.exists(possible_data_path):
        # ‡πÄ‡∏à‡∏≠‡πÅ‡∏•‡πâ‡∏ß! ‡∏´‡∏¢‡∏∏‡∏î‡∏ï‡∏£‡∏á‡∏ô‡∏µ‡πâ‡πÅ‡∏´‡∏•‡∏∞‡∏Ñ‡∏∑‡∏≠ Root
        print(f"‚úÖ Found project root at: {project_root}")
        break
    
    # ‡∏ñ‡πâ‡∏≤‡πÑ‡∏°‡πà‡πÄ‡∏à‡∏≠ ‡πÉ‡∏´‡πâ‡∏ñ‡∏≠‡∏¢‡∏´‡∏•‡∏±‡∏á‡∏Ç‡∏∂‡πâ‡∏ô‡πÑ‡∏õ‡∏≠‡∏µ‡∏Å 1 ‡∏ä‡∏±‡πâ‡∏ô
    parent_dir = os.path.dirname(project_root)
    
    if parent_dir == project_root:
        # ‡∏ñ‡πâ‡∏≤‡∏ñ‡∏≠‡∏¢‡∏à‡∏ô‡∏™‡∏∏‡∏î‡∏ó‡∏≤‡∏á (C:\) ‡πÅ‡∏•‡πâ‡∏ß‡∏¢‡∏±‡∏á‡πÑ‡∏°‡πà‡πÄ‡∏à‡∏≠ ‡πÅ‡∏™‡∏î‡∏á‡∏ß‡πà‡∏≤‡∏ß‡∏≤‡∏á‡πÑ‡∏ü‡∏•‡πå‡∏ú‡∏¥‡∏î‡∏ó‡∏µ‡πà
        print("‚ùå Error: Could not find 'data' folder.")
        print("Please check if 'data' folder exists inside your project.")
        
    project_root = parent_dir

    # 3. ‡∏Å‡∏≥‡∏´‡∏ô‡∏î path ‡∏ó‡∏µ‡πà‡∏ñ‡∏π‡∏Å‡∏ï‡πâ‡∏≠‡∏á‡∏à‡∏≤‡∏Å Root ‡∏ó‡∏µ‡πà‡∏´‡∏≤‡πÄ‡∏à‡∏≠
csv_path = os.path.join(project_root, 'data', 'coursera_dataset.csv')
db_path = os.path.join(project_root, 'vector_store')

‚úÖ Found project root at: c:\SUPERPROJECT


In [None]:
import requests
import pandas as pd
import time
import re
import random # ‡πÄ‡∏û‡∏¥‡πà‡∏° random ‡∏°‡∏≤‡∏ä‡πà‡∏ß‡∏¢‡∏™‡∏∏‡πà‡∏°‡πÄ‡∏ß‡∏•‡∏≤

def remove_html_tags(text):
    if not text:
        return ""
    clean = re.compile('<.*?>')
    return re.sub(clean, ' ', text).strip()

def fetch_skilllane_data(limit=100):
    all_courses = []
    
    # User-Agent ‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç‡∏°‡∏≤‡∏Å ‡∏ï‡πâ‡∏≠‡∏á‡πÉ‡∏™‡πà‡πÉ‡∏´‡πâ‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô Browser ‡∏à‡∏£‡∏¥‡∏á
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Referer": "https://www.skilllane.com/" # ‡πÉ‡∏™‡πà Referer ‡∏´‡∏•‡∏≠‡∏Å‡∏ß‡πà‡∏≤‡∏°‡∏≤‡∏à‡∏≤‡∏Å‡∏´‡∏ô‡πâ‡∏≤‡πÄ‡∏ß‡πá‡∏ö‡πÄ‡∏Ç‡∏≤
    }

    print(f"üöÄ Fetching SkillLane Single Courses (Limit: {limit})...")
    
    offset = 0
    batch_size = 15 # API ‡∏ï‡∏±‡∏ß‡∏ô‡∏µ‡πâ‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö‡∏ó‡∏µ‡∏•‡∏∞ 15
    
    while len(all_courses) < limit:
        url = "https://b2c-api.skilllane.com/v1/courses"
        params = {
            "limit": batch_size, 
            "offset": offset
        }
        
        try:
            resp = requests.get(url, params=params, headers=headers)
            
            # --- Anti-Ban Logic ---
            if resp.status_code == 429:
                print("‚ö†Ô∏è ‡πÇ‡∏î‡∏ô Rate Limit (‡∏¢‡∏¥‡∏á‡πÄ‡∏£‡πá‡∏ß‡πÑ‡∏õ)! ‡∏´‡∏¢‡∏∏‡∏î‡∏û‡∏±‡∏Å 60 ‡∏ß‡∏¥‡∏ô‡∏≤‡∏ó‡∏µ...")
                time.sleep(60)
                continue # ‡∏•‡∏≠‡∏á‡πÉ‡∏´‡∏°‡πà
                
            if resp.status_code == 403:
                print("‚ùå ‡πÇ‡∏î‡∏ô‡∏ö‡∏•‡πá‡∏≠‡∏Å IP (Forbidden)! ‡∏à‡∏ö‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏ó‡∏±‡∏ô‡∏ó‡∏µ")
                break
                
            if resp.status_code != 200: 
                print(f"‚ùå Error: {resp.status_code}")
                break
            # ----------------------
            
            items = resp.json().get('items', [])
            if not items: 
                print("‚úÖ ‡∏´‡∏°‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÅ‡∏•‡πâ‡∏ß")
                break
            
            for item in items:
                # Logic ‡πÄ‡∏î‡∏¥‡∏°
                instructors = item.get('instructors_info', [])
                inst_name = "SkillLane Instructor"
                inst_title = ""
                if instructors:
                    fname = instructors[0].get('firstname', '').strip()
                    lname = instructors[0].get('lastname', '').strip()
                    inst_name = f"{fname} {lname}"
                    inst_title = instructors[0].get('highlight_desc', '')

                desc = remove_html_tags(item.get('description', ''))
                
                course_info = {
                    "id": f"skilllane_{item.get('id')}",
                    "title": item.get('name'),
                    "description": desc,
                    "instructor": f"{inst_name} ({inst_title})".strip(),
                    "price": f"{item.get('price', 0)} THB",
                    "duration": f"{item.get('duration', 0) // 60} mins",
                    "category": "General", # SkillLane ‡πÑ‡∏°‡πà‡∏™‡πà‡∏á category ‡∏°‡∏≤‡πÉ‡∏ô‡∏´‡∏ô‡πâ‡∏≤‡∏ô‡∏µ‡πâ ‡∏ï‡πâ‡∏≠‡∏á‡∏ó‡∏≥‡πÉ‡∏à
                    "url": f"https://www.skilllane.com/courses/{item.get('id_name')}",
                    "source": "SkillLane"
                }
                all_courses.append(course_info)
            
            print(f"   Fetched {len(items)} courses... (Total: {len(all_courses)})")
            
            offset += batch_size
            
            # [Safe Mode] ‡∏™‡∏∏‡πà‡∏°‡πÄ‡∏ß‡∏•‡∏≤‡∏û‡∏±‡∏Å ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÉ‡∏´‡πâ‡∏î‡∏π‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡∏Ñ‡∏ô‡∏Å‡∏î‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡∏´‡∏ô‡πâ‡∏≤
            sleep_time = random.uniform(1.5, 3.5) 
            time.sleep(sleep_time)
            
        except Exception as e:
            print(f"‚ùå Exception: {e}")
            break

    return all_courses[:limit] # ‡∏ï‡∏±‡∏î‡πÉ‡∏´‡πâ‡∏û‡∏≠‡∏î‡∏µ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ó‡∏µ‡πà‡∏Ç‡∏≠

def save_to_csv(courses, filename="skilllane_dataset.csv"):
    if not courses: return
    df = pd.DataFrame(courses)
    df = df.dropna(subset=['description'])
    df.to_csv(filename, index=False, encoding='utf-8-sig')
    print(f"‚úÖ Saved {len(df)} items to {filename}")

if __name__ == "__main__":
    data = fetch_skilllane_data(limit=100)
    save_to_csv(data)

üöÄ Fetching SkillLane Single Courses (Limit: 100)...
   Fetched 15 courses... (Total: 15)
   Fetched 15 courses... (Total: 30)
   Fetched 15 courses... (Total: 45)
   Fetched 15 courses... (Total: 60)
   Fetched 15 courses... (Total: 75)
   Fetched 15 courses... (Total: 90)
   Fetched 15 courses... (Total: 105)
‚úÖ Saved 100 items to skilllane_dataset.csv


In [7]:
pd.read_csv("skilllane_dataset.csv")

Unnamed: 0,id,title,description,instructor,price,duration,category,url,source
0,skilllane_7743,HPT : High-Performing Team ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ó‡∏µ‡∏°‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à‡∏™‡∏π‡∏á,‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå ‡πÉ‡∏ô‡πÇ‡∏•‡∏Å‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏ó‡∏∏‡∏Å‡∏ß‡∏±‡∏ô‡∏ô‡∏µ‡πâ ‡∏´...,‡∏≠‡∏¥‡∏®‡∏£‡∏≤ ‡∏™‡∏°‡∏¥‡∏ï‡∏∞‡∏û‡∏¥‡∏ô‡∏ó‡∏∏ (‡∏≠.‡πÄ‡∏≠‡πâ) (‡∏ú‡∏π‡πâ‡∏Å‡πà‡∏≠‡∏ï‡∏±‡πâ‡∏á‡∏ö‡∏£‡∏¥‡∏©‡∏±‡∏ó ‡∏≠‡∏¥‡∏ô...,4990 THB,84 mins,General,https://www.skilllane.com/courses/high-perform...,SkillLane
1,skilllane_7860,n8n AI Agent & OCR Automation ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏ù‡πà‡∏≤‡∏¢‡∏ö‡∏±‡∏ç‡∏ä‡∏µ ...,‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏ô‡∏µ‡πâ‡∏≠‡∏≠‡∏Å‡πÅ‡∏ö‡∏ö‡∏°‡∏≤‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÉ‡∏´‡πâ‡∏Ñ‡∏∏‡∏ì‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏£‡∏∞‡∏ö‡∏ö‡∏≠‡∏±‡∏ï‡πÇ‡∏ô‡∏°‡∏±‡∏ï‡∏¥‡∏ó...,‡∏î‡∏£.‡∏≠‡∏°‡∏£‡πÄ‡∏ó‡∏û ‡∏ó‡∏≠‡∏á‡∏ä‡∏¥‡∏ß (‡∏ô‡∏±‡∏Å‡∏ß‡∏¥‡∏à‡∏±‡∏¢‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• ‡∏ß‡∏¥‡∏ó‡∏¢‡∏≤‡∏Å‡∏£‡∏î‡πâ‡∏≤‡∏ô‡∏Å‡∏≤...,0 THB,243 mins,General,https://www.skilllane.com/courses/n8n-OCR-Auto...,SkillLane
2,skilllane_3981,AutoCAD ‡πÄ‡∏Ç‡∏µ‡∏¢‡∏ô‡πÅ‡∏ö‡∏ö‡πÄ‡∏Ñ‡∏£‡∏∑‡πà‡∏≠‡∏á‡∏Å‡∏•‡πÄ‡∏ö‡∏∑‡πâ‡∏≠‡∏á‡∏ï‡πâ‡∏ô,‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå AutoCAD ‡πÄ‡∏õ‡πá‡∏ô‡πÇ‡∏õ‡∏£‡πÅ‡∏Å‡∏£‡∏°‡∏ó‡∏µ‡πà‡∏™‡∏≤...,‡∏£‡∏±‡∏ê‡∏ß‡∏¥‡∏™‡∏∏‡∏ó‡∏ò‡∏¥‡πå ‡∏Å‡∏•‡πà‡∏≠‡∏°‡∏à‡∏¥‡∏ï‡∏ï‡πå (‡∏ß‡∏¥‡∏ó‡∏¢‡∏≤‡∏Å‡∏£‡πÅ‡∏•‡∏∞‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∂‡∏Å‡∏©‡∏≤‡∏ó‡∏≤‡∏á...,2990 THB,842 mins,General,https://www.skilllane.com/courses/AutoCAD-for-...,SkillLane
3,skilllane_7726,‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏• 3D ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏°‡∏∑‡∏≠‡∏≠‡∏≤‡∏ä‡∏µ‡∏û ‡∏î‡πâ‡∏ß‡∏¢ AutoCAD ‡∏â‡∏ö‡∏±‡∏ö‡πÄ...,‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå ‡∏Ñ‡∏≠‡∏£‡πå‡∏™ ‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏• 3 ‡∏°‡∏¥‡∏ï‡∏¥ ...,‡∏£‡∏±‡∏ê‡∏ß‡∏¥‡∏™‡∏∏‡∏ó‡∏ò‡∏¥‡πå ‡∏Å‡∏•‡πà‡∏≠‡∏°‡∏à‡∏¥‡∏ï‡∏ï‡πå (‡∏ß‡∏¥‡∏ó‡∏¢‡∏≤‡∏Å‡∏£‡πÅ‡∏•‡∏∞‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∂‡∏Å‡∏©‡∏≤‡∏ó‡∏≤‡∏á...,1490 THB,470 mins,General,https://www.skilllane.com/courses/AutoCAD-3D-b...,SkillLane
4,skilllane_7221,‡πÄ‡∏Ñ‡∏•‡πá‡∏î‡∏•‡∏±‡∏ö‡∏≠‡∏≠‡∏°‡πÄ‡∏á‡∏¥‡∏ô ‡∏â‡∏ö‡∏±‡∏ö‡∏Ñ‡∏ô‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ó‡∏≥‡∏á‡∏≤‡∏ô,‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Ñ‡∏ß‡∏≤‡∏°‡∏£‡∏π‡πâ‡∏Ñ‡∏ß‡∏≤‡∏°‡πÄ‡∏Ç‡πâ‡∏≤‡πÉ‡∏à‡πÄ‡∏Å‡∏µ‡πà‡∏¢‡∏ß‡∏Å‡∏±‡∏ö‡πÅ‡∏ô‡∏ß‡∏ó‡∏≤‡∏á‡∏Å‡∏≤‡∏£‡∏ß‡∏≤‡∏á‡πÅ‡∏ú‡∏ô...,SET Instructor (None),0 THB,66 mins,General,https://www.skilllane.com/courses/SET-WMD1012,SkillLane
...,...,...,...,...,...,...,...,...,...
95,skilllane_7674,"‡∏™‡∏≠‡∏ô‡∏ó‡∏≥ ""‡∏Å‡πã‡∏ß‡∏¢‡πÄ‡∏ï‡∏µ‡πã‡∏¢‡∏ß‡∏£‡∏≤‡∏î‡∏´‡∏ô‡πâ‡∏≤‡∏´‡∏°‡∏π‡∏´‡∏°‡∏±‡∏Å"" ‡∏™‡∏π‡∏ï‡∏£‡∏°‡∏≤‡∏ï‡∏£‡∏ê‡∏≤‡∏ô-‡∏™...",‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå ‡∏´‡πâ‡∏≤‡∏°‡∏û‡∏•‡∏≤‡∏î!! ‡∏Ñ‡∏≠‡∏£‡πå‡∏™ ‡∏™‡∏≠‡∏ô‡∏ó‡∏≥ ...,‡∏ò‡∏ô‡∏∞‡∏®‡∏±‡∏Å‡∏î‡∏¥‡πå ‡∏û‡∏≤‡∏´‡∏∏‡∏à‡∏¥‡∏ô‡∏î‡∏≤ (‡πÄ‡∏ä‡∏ü‡πÅ‡∏î‡∏ô) (Food Consultant ...,999 THB,29 mins,General,https://www.skilllane.com/courses/stir-fried-n...,SkillLane
96,skilllane_7669,‡∏™‡∏≠‡∏ô‡∏™‡∏£‡πâ‡∏≤‡∏á Template ‡∏£‡∏∞‡∏ö‡∏ö‡∏™‡∏ï‡πä‡∏≠‡∏Å‡∏™‡∏¥‡∏ô‡∏Ñ‡πâ‡∏≤‡πÉ‡∏ô Google Sheets,‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™ ‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏ô‡∏µ‡πâ‡∏™‡∏≠‡∏ô‡∏Å‡∏≤‡∏£‡∏™‡∏£‡πâ‡∏≤‡∏á ‚Äú‡∏£‡∏∞‡∏ö‡∏ö‡∏™‡∏ï‡πä‡∏≠‡∏Å‡∏™...,"‡∏ß‡∏á‡∏®‡∏Å‡∏£ ‡∏ô‡∏¥‡∏°‡∏°‡∏¥‡∏ï (‡πÄ‡∏à‡πâ‡∏≤‡∏Ç‡∏≠‡∏á‡πÄ‡∏û‡∏à ""‡∏ö‡∏£‡∏¥‡∏´‡∏≤‡∏£‡∏á‡∏≤‡∏ô‡πÅ‡∏ö‡∏ö‡∏Ñ‡∏ô‡πÑ‡∏≠‡∏ó‡∏µ"" ...",0 THB,166 mins,General,https://www.skilllane.com/courses/GoogleSheets...,SkillLane
97,skilllane_3879,‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡πÑ‡∏ß‡∏¢‡∏≤‡∏Å‡∏£‡∏ì‡πå‡πÄ‡∏Å‡∏≤‡∏´‡∏•‡∏µ ‡∏£‡∏∞‡∏î‡∏±‡∏ö 4 ‡∏°.‡πÇ‡∏ã‡∏• (‡πÄ‡∏•‡πà‡∏°Sn...,‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå ‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡πÑ‡∏ß‡∏¢‡∏≤‡∏Å‡∏£‡∏ì‡πå‡πÄ‡∏Å‡∏≤‡∏´‡∏•‡∏µ...,‡∏ò‡∏¥‡∏î‡∏≤ ‡∏û‡∏±‡∏ä‡∏£‡∏û‡∏£‡∏û‡∏á‡∏®‡πå (‡∏Ñ‡∏£‡∏π‡∏™‡∏≠‡∏ô‡∏†‡∏≤‡∏©‡∏≤‡πÄ‡∏Å‡∏≤‡∏´‡∏•‡∏µ ‡πÅ‡∏•‡∏∞‡∏ô‡∏±‡∏Å‡∏®‡∏∂‡∏Å‡∏©‡∏≤‡∏ó...,6500 THB,239 mins,General,https://www.skilllane.com/courses/Korean-Gramm...,SkillLane
98,skilllane_7622,Notebook LM 101: ‡πÄ‡∏Ñ‡∏•‡πá‡∏î‡∏•‡∏±‡∏ö‡πÉ‡∏ä‡πâ AI ‡πÄ‡∏û‡∏¥‡πà‡∏° Producti...,‡∏Ñ‡∏≥‡∏≠‡∏ò‡∏¥‡∏ö‡∏≤‡∏¢‡∏Ñ‡∏≠‡∏£‡πå‡∏™‡∏≠‡∏≠‡∏ô‡πÑ‡∏•‡∏ô‡πå Notebook LM 101: ‡πÄ‡∏Ñ‡∏•‡πá‡∏î‡∏•‡∏±‡∏ö...,‡∏†‡∏Ñ‡∏ß‡∏î‡∏µ ‡∏™‡∏°‡∏û‡∏á‡∏©‡πå (‡πÄ‡∏à‡πâ‡∏≤‡∏Ç‡∏≠‡∏á‡πÄ‡∏û‡∏à SPEAK SPARK - present...,2500 THB,175 mins,General,https://www.skilllane.com/courses/NotebookLM-101,SkillLane


In [8]:
import pandas as pd

In [11]:
pd.read_csv("../data/coursera_dataset.csv")

Unnamed: 0,id,title,description,level,duration,category,certificate_type,url
0,l31la3mKEe-zFg7heHyXOQ,Getting started with the Vertex AI Gemini 1.5 ...,This is a self-paced lab that takes place in t...,BEGINNER,1 hour 30 minutes,cloud-computing,VerifiedCert,https://www.coursera.org/learn/googlecloud-get...
1,OK4S2mhbEfCCGA7lN4veKQ,SPSS: Apply & Interpret Logistic Regression Mo...,This course provides a practical and applied i...,Not Specified,,data-analysis,VerifiedCert,https://www.coursera.org/learn/spss-apply-inte...
2,mukrVoSKEe-L8RKezlgehQ,Securing Compute Engine Applications and Resou...,This is a self-paced lab that takes place in t...,INTERMEDIATE,1 hour 30 minutes,cloud-computing,VerifiedCert,https://www.coursera.org/learn/googlecloud-sec...
3,ecoAPNowEe22LAo38arT7w,Overcoming Challenges in Self and Society,This course is an personal development and aca...,BEGINNER,"4 weeks of study, 2-4 hours a week",personal-development,"VerifiedCert, Specialization",https://www.coursera.org/learn/overcoming-chal...
4,69Bku0KoEeWZtA4u62x6lQ,Gamification,Gamification is the application of game elemen...,Not Specified,4-8 hours/week,design-and-product,VerifiedCert,https://www.coursera.org/learn/gamification
...,...,...,...,...,...,...,...,...
7848,lLCNc1b4Ee2O6hIM21GqbQ,Mastering Ansible Automation,Learn the essentials to quickly master automat...,BEGINNER,"3 weeks of study, 3-4 hours/week",support-and-operations,"VerifiedCert, Specialization",https://www.coursera.org/learn/mastering-ansib...
7849,gQsElhqJEfCwdwr_1g1Hsw,Effective Hiring: Interview Training for Managers,Hiring the right people is one of the most cri...,BEGINNER,4 Hours,leadership-and-management,"VerifiedCert, Specialization",https://www.coursera.org/learn/effective-hirin...
7850,lruGmUhFEe-xRA6omnSgfQ,Apache Kafka - An Introduction,"Apache Kafka is a powerful, open-source stream...",BEGINNER,1 Hour,software-development,"VerifiedCert, Specialization",https://www.coursera.org/learn/apache-kafka-an...
7851,cmFRA_AOEeqnMQ47wD6eKQ,Design and Develop a Website using Figma and CSS,"In this 2-hour long project-based course, you ...",INTERMEDIATE,2 hours,software-development,"VerifiedCert, Specialization",https://www.coursera.org/learn/design-and-deve...


In [1]:
import requests
import math

def check_total_pages(limit_per_page=100):
    url = "https://api.coursera.org/api/courses.v1"
    
    # ‡∏Ç‡∏≠‡πÅ‡∏Ñ‡πà 1 ‡∏ï‡∏±‡∏ß‡∏û‡∏≠ ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏î‡∏π Meta Data (‡∏õ‡∏£‡∏∞‡∏´‡∏¢‡∏±‡∏î‡πÄ‡∏ô‡πá‡∏ï)
    params = {
        "start": 0,
        "limit": 1
    }
    
    try:
        print("üîç Checking Coursera API Total Count...")
        response = requests.get(url, params=params)
        
        if response.status_code == 200:
            data = response.json()
            
            # 1. ‡∏î‡∏∂‡∏á‡∏¢‡∏≠‡∏î‡∏£‡∏ß‡∏° (Total Items)
            total_items = data.get('paging', {}).get('total', 0)
            
            if total_items == 0:
                print("‚ùå ‡πÑ‡∏°‡πà‡∏û‡∏ö‡∏¢‡∏≠‡∏î‡∏£‡∏ß‡∏° (API ‡∏≠‡∏≤‡∏à‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô Format)")
                return

            # 2. ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏´‡∏ô‡πâ‡∏≤
            # ‡∏™‡∏π‡∏ï‡∏£: (‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î / ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏ï‡πà‡∏≠‡∏´‡∏ô‡πâ‡∏≤) ‡∏õ‡∏±‡∏î‡πÄ‡∏®‡∏©‡∏Ç‡∏∂‡πâ‡∏ô
            total_pages = math.ceil(total_items / limit_per_page)
            
            print(f"‚úÖ ‡∏û‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: {total_items:,} ‡∏Ñ‡∏≠‡∏£‡πå‡∏™")
            print(f"üìÑ ‡∏ñ‡πâ‡∏≤‡∏î‡∏∂‡∏á‡∏ó‡∏µ‡∏•‡∏∞ {limit_per_page} ‡∏Ñ‡∏≠‡∏£‡πå‡∏™ ‡∏à‡∏∞‡∏°‡∏µ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: {total_pages:,} ‡∏´‡∏ô‡πâ‡∏≤")
            print("-" * 30)
            print(f"üí° ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡πÉ‡∏´‡πâ‡∏ï‡∏±‡πâ‡∏á max_pages = {total_pages + 1} ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ä‡∏±‡∏ß‡∏£‡πå")
            
        else:
            print(f"Error: {response.status_code}")
            
    except Exception as e:
        print(f"Exception: {e}")

if __name__ == "__main__":
    # ‡∏•‡∏≠‡∏á‡πÄ‡∏ä‡πá‡∏Ñ‡∏î‡∏π‡∏ß‡πà‡∏≤‡∏ñ‡πâ‡∏≤‡∏î‡∏∂‡∏á‡∏ó‡∏µ‡∏•‡∏∞ 100 ‡∏à‡∏∞‡∏°‡∏µ‡∏Å‡∏µ‡πà‡∏´‡∏ô‡πâ‡∏≤
    check_total_pages(limit_per_page=100)

üîç Checking Coursera API Total Count...
‚úÖ ‡∏û‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: 16,829 ‡∏Ñ‡∏≠‡∏£‡πå‡∏™
üìÑ ‡∏ñ‡πâ‡∏≤‡∏î‡∏∂‡∏á‡∏ó‡∏µ‡∏•‡∏∞ 100 ‡∏Ñ‡∏≠‡∏£‡πå‡∏™ ‡∏à‡∏∞‡∏°‡∏µ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î: 169 ‡∏´‡∏ô‡πâ‡∏≤
------------------------------
üí° ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡πÉ‡∏´‡πâ‡∏ï‡∏±‡πâ‡∏á max_pages = 170 ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ä‡∏±‡∏ß‡∏£‡πå


In [None]:
from curl_cffi import requests
from bs4 import BeautifulSoup
import pandas as pd
import json
import time
import random

def fetch_datacamp_courses():
    # URL ‡∏´‡∏ô‡πâ‡∏≤‡∏à‡∏≤‡∏ô‡∏£‡∏ß‡∏°‡∏Ñ‡∏≠‡∏£‡πå‡∏™
    url = "https://www.datacamp.com/courses-all"
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    print(f"üöÄ Fetching DataCamp (Next.js extraction)...")
    
    try:
        # 1. ‡∏î‡∏∂‡∏á HTML ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏ô‡πâ‡∏≤‡∏°‡∏≤
        response = requests.get(url, headers=headers, impersonate="chrome")
        
        if response.status_code != 200:
            print(f"‚ùå Error: {response.status_code}")
            return []

        # 2. ‡πÉ‡∏ä‡πâ BeautifulSoup ‡∏´‡∏≤‡∏ï‡∏π‡πâ‡πÄ‡∏ã‡∏ü‡∏ó‡∏µ‡πà‡∏ä‡∏∑‡πà‡∏≠ __NEXT_DATA__
        soup = BeautifulSoup(response.text, 'html.parser')
        next_data_tag = soup.find('script', id='__NEXT_DATA__')
        
        if not next_data_tag:
            print("‚ùå ‡πÑ‡∏°‡πà‡πÄ‡∏à‡∏≠ Next.js Data (‡πÄ‡∏ß‡πá‡∏ö‡∏≠‡∏≤‡∏à‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÇ‡∏Ñ‡∏£‡∏á‡∏™‡∏£‡πâ‡∏≤‡∏á)")
            return []

        # 3. ‡πÅ‡∏õ‡∏•‡∏á‡∏Ç‡πâ‡∏≠‡∏Ñ‡∏ß‡∏≤‡∏°‡πÉ‡∏ô‡∏ï‡∏π‡πâ‡πÄ‡∏ã‡∏ü‡πÉ‡∏´‡πâ‡πÄ‡∏õ‡πá‡∏ô JSON
        json_data = json.loads(next_data_tag.string)
        
        all_courses = []
        
        try:
            # ‡πÄ‡∏Ç‡πâ‡∏≤‡πÑ‡∏õ‡∏ó‡∏µ‡πà props -> pageProps ‡∏Å‡πà‡∏≠‡∏ô
            page_props = json_data.get('props', {}).get('pageProps', {})
            
            # [‡πÅ‡∏Å‡πâ‡∏à‡∏∏‡∏î‡∏ó‡∏µ‡πà 1] ‡πÄ‡∏ä‡πá‡∏Ñ‡∏ß‡πà‡∏≤‡∏°‡∏µ 'hits' ‡πÑ‡∏´‡∏° (‡πÇ‡∏Ñ‡∏£‡∏á‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÅ‡∏ö‡∏ö Search)
            if 'hits' in page_props:
                courses_list = page_props['hits']
                print("‚úÖ Found courses in 'hits' (Search Structure)")
            
            # ‡πÄ‡∏ú‡∏∑‡πà‡∏≠‡πÑ‡∏ß‡πâ: ‡πÄ‡∏ä‡πá‡∏Ñ‡∏ß‡πà‡∏≤‡∏°‡∏µ 'courses' ‡πÑ‡∏´‡∏° (‡πÇ‡∏Ñ‡∏£‡∏á‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÅ‡∏ö‡∏ö Catalog)
            elif 'courses' in page_props:
                courses_list = page_props['courses']
                print("‚úÖ Found courses in 'courses' (Catalog Structure)")
                
            else:
                print("‚ùå ‡∏´‡∏≤ key ‡πÑ‡∏°‡πà‡πÄ‡∏à‡∏≠ (‡∏•‡∏≠‡∏á‡∏î‡∏π keys: ", page_props.keys(), ")")
                return []

        except Exception as e:
            print(f"‚ö†Ô∏è Error extracting list: {e}")
            return []

        print(f"üì¶ Found {len(courses_list)} courses.")

        for item in courses_list:
            # [‡πÅ‡∏Å‡πâ‡∏à‡∏∏‡∏î‡∏ó‡∏µ‡πà 2] ‡∏ä‡∏∑‡πà‡∏≠ Key ‡πÉ‡∏ô hits ‡∏≠‡∏≤‡∏à‡∏à‡∏∞‡∏ï‡πà‡∏≤‡∏á‡∏à‡∏≤‡∏Å‡∏õ‡∏Å‡∏ï‡∏¥‡πÄ‡∏•‡πá‡∏Å‡∏ô‡πâ‡∏≠‡∏¢
            # ‡∏ï‡πâ‡∏≠‡∏á‡πÉ‡∏ä‡πâ .get() ‡∏î‡∏±‡∏Å‡πÑ‡∏ß‡πâ‡πÄ‡∏¢‡∏≠‡∏∞‡πÜ
            
            raw_title = item.get('title')
            # ‡πÄ‡∏ä‡πá‡∏Ñ‡∏ß‡πà‡∏≤‡πÄ‡∏õ‡πá‡∏ô Dict ‡πÑ‡∏´‡∏° ‡∏ñ‡πâ‡∏≤‡πÉ‡∏ä‡πà‡πÉ‡∏´‡πâ‡∏î‡∏∂‡∏á‡∏†‡∏≤‡∏©‡∏≤‡∏≠‡∏±‡∏á‡∏Å‡∏§‡∏© ‡∏ñ‡πâ‡∏≤‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πà‡∏Å‡πá‡πÉ‡∏ä‡πâ‡πÄ‡∏•‡∏¢
            if isinstance(raw_title, dict):
                title = raw_title.get('en-US', 'Untitled')
            else:
                title = raw_title

            # Description: ‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡πÉ‡∏ä‡πâ 'summary', 'excerpt', 'description'
            desc = item.get('summary') or item.get('excerpt') or item.get('description') or title
            
            # Technology
            technology = item.get('technology') or item.get('topic') or 'Data Science'
            
            # Duration
            # ‡∏ö‡∏≤‡∏á‡∏ó‡∏µ‡∏°‡∏≤‡πÄ‡∏õ‡πá‡∏ô 'duration_hours' (int) ‡∏´‡∏£‡∏∑‡∏≠ 'duration' (string)
            duration_val = item.get('duration_hours') or item.get('duration')
            duration = f"{duration_val} hours" if duration_val else "Self-paced"
            
            # URL
            # ‡πÉ‡∏ô hits ‡∏°‡∏±‡∏Å‡∏à‡∏∞‡∏°‡∏µ 'slug' ‡∏´‡∏£‡∏∑‡∏≠ 'relative_url'
            slug = item.get('slug') or item.get('url')
            if slug and str(slug).startswith('http'):
                course_url = slug
            else:
                course_url = f"https://www.datacamp.com{slug}"
            
            # Image
            image_url = item.get('image_url') or item.get('cap_image_url') or item.get('thumbnail_url')

            course_info = {
                "id": f"dc_{item.get('id') or item.get('objectID')}", # Algolia ‡∏ä‡∏≠‡∏ö‡πÉ‡∏ä‡πâ objectID
                "title": title,
                "description": desc,
                "instructor": "DataCamp Instructor",
                "price": "Subscription",
                "duration": str(duration),
                "category": technology,
                "image_url": image_url,
                "url": course_url,
                "source": "DataCamp"
            }
            all_courses.append(course_info)
            
        return all_courses

    except Exception as e:
        print(f"‚ùå Exception: {e}")
        return []

if __name__ == "__main__":
    courses = fetch_datacamp_courses()
    if courses:
        df = pd.DataFrame(courses)
        print(f"\n‚úÖ Extracted {len(df)} courses successfully!")
        # print(df.head())
        df.to_csv("datacamp_dataset.csv", index=False, encoding='utf-8-sig')

üöÄ Fetching DataCamp (Next.js extraction)...
‚úÖ Found courses in 'hits' (Search Structure)
üì¶ Found 30 courses.

‚úÖ Extracted 30 courses successfully!
