In [None]:
import os
from IPython.display import display, HTML
import polars as pl

os.environ["POLARS_FMT_MAX_ROWS"] = "50"
os.environ["POLARS_FMT_MAX_COLS"] = "50"
os.environ["POLARS_FMT_COL_MAX_WIDTH"] = "50"
pl.Config(fmt_str_lengths=50)

def print_df(df: pl.DataFrame, title=None):
	html = df._repr_html_()
	if title:
		html = f"<h3>{title}</h3>" + html
	display(HTML(html))

# import httpx
# from src.user_list import UserList
# async with httpx.AsyncClient() as client:
# 	user_list = await UserList.from_web("cosmicwaves", client)
# 	user_list.df.sink_parquet("data/user_list.parquet")

user_list = pl.read_parquet("data/user_list.parquet")
anime_db = pl.scan_parquet("data/anime.parquet")

common_cols = set(user_list.columns).intersection(set(anime_db.columns))
if len(common_cols) > 1:
	raise Exception(f"Common columns found between user_list and anime_db: {common_cols}")

user_animes = user_list.lazy().join(anime_db, on="anime_id", how="inner", validate="1:1").collect()

if user_animes.height < user_list.height:
	print(f"Warning: {user_list.height - user_animes.height} anime not found in database")

print_df(user_animes.head(10), "User Animes")

for col in ["user_watch_status", "user_priority", "user_storage", "user_added_to_list", "user_rewatching", "user_scored"]:
	print_df(user_animes[col].value_counts().sort("count", descending=True), f"Value Count '{col}'")


In [None]:
from src.schedule import Schedule
from src.next_releases import NextReleases

user_animes = user_animes.lazy()

schedule = Schedule.get(user_animes)
next_releases = NextReleases.get(user_animes)

[schedule, next_releases] = pl.collect_all([schedule, next_releases])

schedule = Schedule.from_df(schedule)
print_df(schedule, "Schedule")
print_df(next_releases, "Next releases")