In [1]:
!uv run dvc pull

A       ..\data\archive.zip
1 file added


In [3]:
import json

from pathlib import Path

import pandas as pd

from anime_recommender.scripts.setup import DatasetLoader, DatasetProcessor
from anime_recommender.scripts.callbacks import EventsCallback

In [4]:
config_file = Path.cwd().parent.parent / "config" / "log-config.yaml"
archive_path = Path.cwd().parent / "data" / "archive.zip"
callback = EventsCallback()
callback.reconfigure_from_file(config_file=config_file)
log = callback.logger

ds_loader = DatasetLoader(log=log, archive_path=archive_path)
anime_pd, ratings_pd = ds_loader.load_pandas_data_frames()
ds_processor = DatasetProcessor(log=log, anime_pd=anime_pd, ratings_pd=ratings_pd)

tables_merge = ds_processor._merge()

EventsCallback [INFO]: ===== Unpack Archive Job =====
EventsCallback [DEBUG]: Unlinking file animelist.csv
EventsCallback [DEBUG]: Unlinking file anime_with_synopsis.csv
EventsCallback [DEBUG]: Removing tree html folder
EventsCallback [DEBUG]: Unlinking file watching_status.csv
EventsCallback [INFO]: ===== Filter users Job =====
EventsCallback [INFO]: ===== Join Tables Job =====
EventsCallback [DEBUG]: Total Records: 752_850


In [5]:
tables_merge.sample(10)

Unnamed: 0,user_id,rating,anime_id,name,genres
25592,10367,4.0,21743,Donten ni Warau,"Action, Historical, Shoujo, Supernatural"
646653,316318,7.0,28171,Shokugeki no Souma,"Ecchi, School, Shounen"
324496,173716,6.0,10308,Sengoku Otome: Momoiro Paradox,"Action, Comedy, Historical, Samurai, Sci-Fi, S..."
642471,316154,6.0,23935,Kyutai Panic Adventure Returns!,"Action, Shounen, Super Power"
584437,297524,7.0,6151,Final Fantasy VII: On the Way to a Smile - Epi...,"Action, Fantasy, Super Power"
611515,307456,6.0,40932,Null Peta Special,"Sci-Fi, Comedy"
665984,323567,7.0,21075,Hetalia: The Beautiful World Extra Disc,"Comedy, Historical, Parody"
59240,24990,6.0,33241,Honobono Log,"Romance, Slice of Life"
477649,259790,2.0,31145,Code Geass: Boukoku no Akito 3 - Kagayaku Mono...,"Comedy, Military"
120659,67428,4.0,2442,Zoku Koihime,"Fantasy, Harem, Hentai, Magic, Supernatural"


In [6]:
def get_unique_genres(genre_series: pd.Series) -> list[str]:
    """Unique Genres of all Anime Movies."""

    genres = set()

    for pdtuple in genre_series.to_frame().itertuples():
        row_split = pdtuple.genres.split(", ")
        for genre in row_split:
            genres.add(genre)

    return sorted(genres)

In [7]:
unique_genres = get_unique_genres(genre_series=tables_merge.genres)
unique_genres_str = "\n".join(unique_genres)
log.info(f"\nUnique Genres:\n--------\n{unique_genres_str}")

EventsCallback [INFO]: 
Unique Genres:
--------
Action
Adventure
Cars
Comedy
Dementia
Demons
Drama
Ecchi
Fantasy
Game
Harem
Hentai
Historical
Horror
Josei
Kids
Magic
Martial Arts
Mecha
Military
Music
Mystery
Parody
Police
Psychological
Romance
Samurai
School
Sci-Fi
Seinen
Shoujo
Shoujo Ai
Shounen
Shounen Ai
Slice of Life
Space
Sports
Super Power
Supernatural
Thriller
Unknown
Vampire
Yaoi
Yuri


In [8]:
feature_dim = [len(tables_merge[col].unique()) for col in ("user_id", "anime_id")]
dim = {"features": {"users_filtered": feature_dim[0], "anime_total": feature_dim[1], "feature_dim": sum(feature_dim)}}
log.info(f"\n{json.dumps(dim, indent=3)}")

EventsCallback [INFO]: 
{
   "features": {
      "users_filtered": 167,
      "anime_total": 16805,
      "feature_dim": 16972
   }
}
