In [1]:
import pandas as pd
from pathlib import Path
import random

In [5]:
log_data = pd.read_json(Path(f'../data_v2/ab_experiment_log.jsonl'), lines=True)

In [6]:
results = []
model_counts = {
    "simple": {"in_session": 0, "not_in_session": 0}, "complex": {"in_session": 0, "not_in_session": 0}
}
for index, log_entry in log_data.iterrows():
    user_id = log_entry["user_id"]
    recommended_tracks = log_entry["recommended_tracks"]
    model_type = log_entry["model_type"]

    
    session_data = pd.read_json(Path(f'../data_v2/sessions/sessions_user_{user_id}.jsonl'), lines=True)
    session_track_ids = set(session_data['track_id'])
    is_in_session = False

    for recommended_track in recommended_tracks:
        recommended_track_id = recommended_track["id_track"]
        if recommended_track_id in session_track_ids:
            is_in_session = True

    
    result = {
        "user_id": user_id,
        "recommended_track_id": recommended_track_id,
        "model_type": model_type,
        "is_in_session": is_in_session
    }
    
    results.append(result)
    
    if is_in_session:
        model_counts[model_type]["in_session"] += 1
    else:
        model_counts[model_type]["not_in_session"] += 1



In [8]:

for model_type, counts in model_counts.items():
    total_recommendations = counts["in_session"] + counts["not_in_session"]
    success_rate = (counts["in_session"] / total_recommendations) * 100 if total_recommendations > 0 else 0
    print(f"  Model: {model_type}")
    print(f"    Successfully recommended tracks: {counts['in_session']}")
    print(f"    Unsuccessfully recommended tracks: {counts['not_in_session']}")
    print(f"    Success rate: {success_rate:.2f}%")

  Model: simple
    Successfully recommended tracks: 131
    Unsuccessfully recommended tracks: 316
    Success rate: 29.31%
  Model: complex
    Successfully recommended tracks: 169
    Unsuccessfully recommended tracks: 286
    Success rate: 37.14%


In [93]:
tracks = pd.read_json(Path(f'../data_v2/tracks_artists.jsonl'), lines=True)

results = []
model_type = "random"
model_counts = {
    model_type: {"in_session": 0, "not_in_session": 0}
}

users = random.sample(range(101, 1001), 400)
for user_id in users:
    recommended_tracks = tracks.sample(5)
    
    session_data = pd.read_json(Path(f'../data_v2/sessions/sessions_user_{user_id}.jsonl'), lines=True)
    session_track_ids = set(session_data['track_id'])
    is_in_session = False

    for i, recommended_track in recommended_tracks.iterrows():
        recommended_track_id = recommended_track["id_track"]
        if recommended_track_id in session_track_ids:
            is_in_session = True

    
    result = {
        "user_id": user_id,
        "recommended_track_id": recommended_track_id,
        "model_type": model_type,
        "data_type": data_type,
        "is_in_session": is_in_session
    }
    
    results.append(result)
    
    if is_in_session:
        model_counts[model_type]["in_session"] += 1
    else:
        model_counts[model_type]["not_in_session"] += 1

for model_type, counts in model_counts.items():
    total_recommendations = counts["in_session"] + counts["not_in_session"]
    success_rate = (counts["in_session"] / total_recommendations) * 100 if total_recommendations > 0 else 0
    print(f"  Model: {model_type}")
    print(f"    Successfully recommended tracks: {counts['in_session']}")
    print(f"    Unsuccessfully recommended tracks: {counts['not_in_session']}")
    print(f"    Success rate: {success_rate:.2f}%")

  Model: random
    Successfully recommended tracks: 128
    Unsuccessfully recommended tracks: 272
    Success rate: 32.00%
