# Recommender System Notebook
Deze notebook bouwt een content-based recommender voor de VKM-module dataset.

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("Uitgebreide_VKM_dataset_zonder_weird_data.csv")
df.head()

## Data schoonmaken

In [None]:
# Schoonmaken en voorbereiden
def fill_short(row):
    if isinstance(row["short_description"], str) and row["short_description"].lower().strip() == "ntb":
        desc = ""
        if "description" in row:
            desc += str(row["description"]) + " "
        if "content" in row:
            desc += str(row["content"])
        return desc.strip()
    return row["short_description"]

df["short_description"] = df.apply(fill_short, axis=1)

for col in df.columns:
    if df[col].dtype == object:
        df[col] = df[col].str.lower()

df["combined_text"] = (
    df["name"].astype(str) + " " +
    df["short_description"].astype(str) + " " +
    df["module_tags"].astype(str) + " " +
    df["location"].astype(str)
)
df.head()

## Vectorisatie

In [None]:
vectorizer = CountVectorizer(stop_words="english")
vectorized = vectorizer.fit_transform(df["combined_text"])
vectorized

## Cosine Similarity

In [None]:
similarities = cosine_similarity(vectorized)
similarity_df = pd.DataFrame(similarities, index=df["name"], columns=df["name"])
similarity_df.head()

## Aanbevelingsfunctie

In [None]:
def recommend(module_name, similarity_df):
    try:
        recs = similarity_df[module_name].sort_values(ascending=False)[1:6]
        print(f"Aanbevolen modules voor '{module_name}':")
        print(recs)
    except KeyError:
        print("Module niet gevonden in dataset.")

recommend("ai fundamentals", similarity_df)