<a href="https://colab.research.google.com/github/lucarenz1997/recommender_systems/blob/main/NCF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://colab.research.google.com/github/lucarenz1997/recommender_systems/blob/main/NCF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

** Authors **: Rafaella and Luca
## Provisorischer Data Import

In [23]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/drive')

# Loading the dataset
import pandas as pd
data = pd.read_csv("/content/drive/MyDrive/Recommender/preprocessed_train.csv")

import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Data Prep?

In [24]:
# 2. Umwandlung von 'time_of_day' in numerische Werte
time_mapping = {'MORNING': 0, 'AFTERNOON': 1, 'EVENING': 2, 'NIGHT': 3}
data['time_of_day'] = data['time_of_day'].map(time_mapping)

## Train-Test-Split

In [25]:
# 3. Train-Test-Split
train, test = train_test_split(data, test_size=0.2, random_state=42)
train.head(100)


Unnamed: 0,genre_id,ts_listen,media_id,album_id,context_type,release_date,platform_name,platform_family,media_duration,listen_type,...,last_listen,days_since_release,genre_popularity,media_popularity,artist_popularity,album_popularity,songs_listened,song_popularity_7d,artist_popularity_7d,album_popularity_7d
141061,22,2016-11-11 10:39:29,32358,16590,0.220868,2012-11-19,0.175726,0.804247,0.336207,0.313345,...,0.091299,0.035789,0.072560,0.031216,0.032770,0.005134,0.394231,0.058252,0.033728,0.006652
101826,6,2016-11-08 09:35:14,6230,2976,0.009166,1993-12-31,0.141284,0.141284,0.556034,0.686655,...,0.220489,0.196966,0.252084,0.000000,0.009910,0.012569,0.099359,0.000000,0.007293,0.008869
39223,9,2016-11-03 16:51:50,16158,8058,0.127203,2010-05-07,0.682990,0.804247,0.590517,0.686655,...,0.043068,0.057271,0.074606,0.000000,0.002246,0.000885,0.022436,0.000000,0.003646,0.002217
42170,12,2016-11-03 20:10:01,37820,19297,0.434585,1994-04-12,0.175726,0.804247,0.788793,0.686655,...,0.115495,0.194465,0.048558,0.022605,0.026427,0.004957,0.016026,0.009709,0.021878,0.001109
104449,0,2016-11-08 13:28:38,45865,23651,0.434585,2015-03-30,0.682990,0.054469,0.418103,0.686655,...,0.004206,0.015592,1.000000,0.027987,0.037923,0.015401,0.336538,0.058252,0.041933,0.013304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22933,0,2016-11-02 15:25:59,55414,28510,0.127203,2016-04-22,0.682990,0.804247,0.780172,0.686655,...,0.070007,0.006358,1.000000,0.059203,0.130814,0.049212,0.214744,0.097087,0.160438,0.052106
20482,169,2016-11-02 12:41:42,10698,5350,0.054653,2006-06-23,0.682990,0.804247,0.452586,0.686655,...,0.162471,0.090301,0.038209,0.000000,0.019688,0.000177,0.278846,0.000000,0.020966,0.000000
238103,3,2016-11-19 17:00:36,22235,11327,0.029810,2011-10-17,0.682990,0.804247,0.366379,0.313345,...,0.095882,0.045303,0.019046,0.022605,0.017178,0.003717,0.044872,0.077670,0.030994,0.008869
74249,0,2016-11-05 23:25:50,46513,23953,0.434585,2015-04-22,0.682990,0.804247,0.534483,0.686655,...,0.075809,0.014984,1.000000,0.023681,0.193050,0.003894,0.035256,0.038835,0.176846,0.004435




## Model architecture

In [31]:
# 5. Model Architektur
num_users = data['user_id'].nunique()
num_media = data['media_id'].nunique()
num_genres = data['genre_id'].nunique()
num_artists = data['artist_id'].nunique()

# Embeddings
user_input = Input(shape=(1,), name='user_input')
user_embedding = Embedding(num_users, 50)(user_input)
user_vec = Flatten()(user_embedding)

media_input = Input(shape=(1,), name='media_input')
media_embedding = Embedding(num_media, 50)(media_input)
media_vec = Flatten()(media_embedding)

# Zusatzfeatures
genre_input = Input(shape=(1,), name='genre_input')
artist_input = Input(shape=(1,), name='artist_input')
is_weekend_input = Input(shape=(1,), name='is_weekend_input')
time_input = Input(shape=(1,), name='time_input')
duration_input = Input(shape=(1,), name='duration_input')
last_listen_input = Input(shape=(1,), name='last_listen_input')

# Feature-Verknüpfung
concat = Concatenate()([user_vec, media_vec, genre_input, artist_input, is_weekend_input, time_input, duration_input, last_listen_input])
dense = Dense(128, activation='relu')(concat)
dense = Dropout(0.3)(dense)
dense = Dense(64, activation='relu')(dense)
dense = Dropout(0.3)(dense)
output = Dense(1, activation='sigmoid')(dense)  # Empfehlungsscore

# Model erstellen
model = Model(inputs=[user_input, media_input, genre_input, artist_input, is_weekend_input, time_input, duration_input, last_listen_input],
              outputs=output)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

## Train model

In [32]:
# 6. Model Training
history = model.fit(
    [train['user_id'], train['media_id'], train['genre_id'], train['artist_id'], train['is_weekend'], train['time_of_day'],
     train['media_duration'], train['last_listen']],
    train['is_listened'],
    validation_data=([
        test['user_id'], test['media_id'], test['genre_id'], test['artist_id'], test['is_weekend'], test['time_of_day'],
        test['media_duration'], test['last_listen']],
        test['is_listened']),
    epochs=10,
    batch_size=256,
    verbose=1
)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node functional_3_1/embedding_7_1/GatherV2 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py", line 37, in <module>

  File "/usr/local/lib/python3.11/dist-packages/traitlets/config/application.py", line 992, in launch_instance

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelapp.py", line 619, in start

  File "/usr/local/lib/python3.11/dist-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 608, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1936, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run

  File "/usr/local/lib/python3.11/dist-packages/tornado/ioloop.py", line 685, in <lambda>

  File "/usr/local/lib/python3.11/dist-packages/tornado/ioloop.py", line 738, in _run_callback

  File "/usr/local/lib/python3.11/dist-packages/tornado/gen.py", line 825, in inner

  File "/usr/local/lib/python3.11/dist-packages/tornado/gen.py", line 786, in run

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 361, in process_one

  File "/usr/local/lib/python3.11/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell

  File "/usr/local/lib/python3.11/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request

  File "/usr/local/lib/python3.11/dist-packages/tornado/gen.py", line 234, in wrapper

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute

  File "/usr/local/lib/python3.11/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes

  File "/usr/local/lib/python3.11/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "<ipython-input-32-d2d79082a28d>", line 2, in <cell line: 0>

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 371, in fit

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 219, in function

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 132, in multi_step_on_iterator

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 113, in one_step_on_data

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/trainer.py", line 57, in train_step

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/layer.py", line 908, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/models/functional.py", line 182, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/function.py", line 171, in _run_through_graph

  File "/usr/local/lib/python3.11/dist-packages/keras/src/models/functional.py", line 637, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/layer.py", line 908, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/operation.py", line 46, in __call__

  File "/usr/local/lib/python3.11/dist-packages/keras/src/utils/traceback_utils.py", line 156, in error_handler

  File "/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/embedding.py", line 140, in call

  File "/usr/local/lib/python3.11/dist-packages/keras/src/ops/numpy.py", line 5346, in take

  File "/usr/local/lib/python3.11/dist-packages/keras/src/backend/tensorflow/numpy.py", line 2093, in take

indices[3,0] = 66185 is not in [0, 61075)
	 [[{{node functional_3_1/embedding_7_1/GatherV2}}]] [Op:__inference_multi_step_on_iterator_138011]

## Generate predictions

In [8]:
# 6. Vorhersagen generieren
def recommend_songs(user_id, top_n=5):
    user_encoded = user_encoder.transform([user_id])[0]
    media_ids = np.arange(num_media)
    predictions = model.predict([
        np.full(media_ids.shape, user_encoded), media_ids,
        np.zeros_like(media_ids), np.zeros_like(media_ids),
        np.zeros_like(media_ids), np.zeros_like(media_ids),
        np.zeros_like(media_ids)
    ])
    top_indices = predictions.flatten().argsort()[-top_n:][::-1]
    recommended_songs = media_encoder.inverse_transform(media_ids[top_indices])
    return recommended_songs

# Beispiel: Empfehlungen für einen User
print(recommend_songs('12345'))

[1m1909/1909[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step
[42930 40123 40103 40106 40107]


## NCF (Neural Collaborative Filtering) Train model