In [None]:
import pandas as pd

# Load course data
df = pd.read_csv("/Users/claudiapastores/Documents/FHTW_MWI/SS-2025-DDP/Master-Thesis/scraper/master_programs_data_cleaned.csv")
course_titles = df["Course Title"].dropna().astype(str).tolist()

# Load category-keyword mapping
df_mapping = pd.read_csv("category_keywords.csv", sep=";")

# Create category mapping from CSV
category_mapping = {}

for _, row in df_mapping.iterrows():
    category = str(row["Category"]).strip()

    # Skip rows with empty category or keywords
    if not category or pd.isna(row["Keywords"]):
        continue

    # Split and clean keywords
    raw_keywords = row["Keywords"]
    keywords = [
        kw.strip().lower()
        for kw in str(raw_keywords).split(",")
        if kw.strip()  # ignore empty strings
    ]

    if keywords:
        category_mapping[category] = keywords

# Perform keyword-based matching
matches = []

for title in course_titles:
    matched_categories = []
    title_lower = title.lower()
    
    for category, keywords in category_mapping.items():
        if any(keyword in title_lower for keyword in keywords):
            matched_categories.append(category)
    
    matches.append({
        "Course Title": title,
        "Matched Categories": ", ".join(matched_categories) if matched_categories else "None"
    })

# Create result DataFrame
df_matches = pd.DataFrame(matches)
display(df_matches.head(20))  # Show first 20 matches for inspection

# Save to CSV
df_matches.to_csv("course_category_matches.csv", index=False)
print(f"\n[OK] Matching completed – {len(df_matches)} courses processed and saved to 'course_category_matches.csv'")

Unnamed: 0,Course Title,Matched Categories
0,Computational Thinking,Computational and Optimization Methods
1,Introduction to Systematic Program Design,Programming
2,Systematic Program Design,Programming
3,"Computation, Programs, and Programming","Programming, Computational and Optimization Me..."
4,Models of Computation,Computational and Optimization Methods
5,"Programming, Problem Solving, and Algorithms","Data Science, Programming"
6,Software Construction,Programming
7,Introduction to Computer Systems,Systems and Architecture
8,Basic Algorithms and Data Structures,Data Science
9,Data Structures and Algorithms for Electrical ...,Data Science



[OK] Matching completed – 363 courses processed and saved to 'course_category_matches.csv'
