Datensatz öffnen

In [1]:
import pandas as pd
df = pd.read_csv('movies.csv')
print(df.head())

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


Jahr als separates Feature

In [4]:
# Jahr extrahieren in neue Spalte 'year'
df['year'] = df['title'].str.extract(r'\((\d{4})\)')

# Klammerjahr aus dem Titel entfernen
df['title'] = df['title'].str.replace(r'\s*\(\d{4}\)', '', regex=True).str.strip()

# Ergebnis anzeigen
print(df.head(10))


   movieId                        title  \
0        1                    Toy Story   
1        2                      Jumanji   
2        3             Grumpier Old Men   
3        4            Waiting to Exhale   
4        5  Father of the Bride Part II   
5        6                         Heat   
6        7                      Sabrina   
7        8                 Tom and Huck   
8        9                 Sudden Death   
9       10                    GoldenEye   

                                        genres year  
0  Adventure|Animation|Children|Comedy|Fantasy  NaN  
1                   Adventure|Children|Fantasy  NaN  
2                               Comedy|Romance  NaN  
3                         Comedy|Drama|Romance  NaN  
4                                       Comedy  NaN  
5                        Action|Crime|Thriller  NaN  
6                               Comedy|Romance  NaN  
7                           Adventure|Children  NaN  
8                                       

Prototyp

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Daten vorbereiten
df = pd.read_csv('movies.csv')
df['year'] = df['title'].str.extract(r'\((\d{4})\)')
df['title'] = df['title'].str.replace(r'\s*\(\d{4}\)', '', regex=True).str.strip()
df['year'] = df['year'].astype('Int64')

# Genre-Vektoren (einmalig!)
vectorizer = CountVectorizer(tokenizer=lambda x: x.split('|'))
genre_matrix = vectorizer.fit_transform(df['genres'])

# Mapping von Titel zu Index
title_to_index = {t.lower(): i for i, t in enumerate(df['title'])}

# User Input (3 Titel)
print("Bitte gib 3 Filmtitel ein:")
user_inputs = [input(f"Film {i+1}: ").strip().lower() for i in range(3)]

# Finde die Vektoren der eingegebenen Filme
valid_indices = []
for title in user_inputs:
    if title in title_to_index:
        valid_indices.append(title_to_index[title])
    else:
        print(f"⚠️  Film nicht gefunden: {title}")

if not valid_indices:
    print("❌ Keine gültigen Filme eingegeben. Abbruch.")
else:
    # Durchschnittlicher Genre-Vektor (mit Umwandlung zu dichten Array)
    user_vector = genre_matrix[valid_indices].mean(axis=0).A.flatten()

    # Ähnlichkeit zwischen User-Vektor und allen Filmen berechnen
    similarities = cosine_similarity(user_vector.reshape(1, -1), genre_matrix).flatten()

    # Top-N Empfehlungen (ausgenommen Eingaben)
    top_n = 5
    recommendations = similarities.argsort()[::-1]
    recommendations = [i for i in recommendations if i not in valid_indices][:top_n]

    print("\n🎬 Empfehlungen:")
    print(df.iloc[recommendations][['title', 'genres', 'year']])




Bitte gib 3 Filmtitel ein:

🎬 Empfehlungen:
                                              title  \
50592              Pokémon the Movie: I Choose You!   
29990                                    Inside Out   
66022  Naruto Shippuden the Movie: The Will of Fire   
76684                                 Kung Fu Mulan   
71717           Demon Slayer the Movie: Mugen Train   

                                                  genres  year  
50592  Adventure|Animation|Children|Comedy|Drama|Fantasy  2017  
29990  Adventure|Animation|Children|Comedy|Drama|Fantasy  2015  
66022    Action|Adventure|Animation|Comedy|Drama|Fantasy  2009  
76684  Action|Adventure|Animation|Children|Drama|Fantasy  2020  
71717           Action|Adventure|Animation|Drama|Fantasy  2020  
