In [None]:
import pandas as pd
import polars as pl
import numpy as np
import networkx as nx
import json
import time
import os
from rtsvg import *
rt = RACETrack()
ofi = rt.ontologyFrameworkInstance(base_filename='../../../data/kaggle_imdb_600k/20240519_ontology')

In [None]:
# Extract out the "hasGenre" relationship & drop all non-related columns
_genre_ = ofi.df_triples.filter(pl.col('vrb') == 'hasGenre')    \
             .drop(['stype', 'sdisp', 'vrb', 'otype', 'odisp', 'grp', 'gdisp', 'src']) \
             .rename({'sbj':'MovieID', 'obj':'genre'})
# Extract out the "yearReleased" relationship & drop all non-related columns
_year_  = ofi.df_triples.filter(pl.col('vrb') == 'yearReleased') \
             .drop(['stype', 'sdisp', 'vrb', 'otype', 'odisp', 'grp', 'gdisp', 'src']) \
             .rename({'sbj':'MovieID', 'obj':'year'})
# Join the two tables -- now have "MovieID", "genre", "year"
_genre_year_ = _genre_.join(_year_, on='MovieID', how='inner')
# Convert the "genre" and "year" columns to their corresponding labels
_genre_year_ = _genre_year_.with_columns(pl.col('genre').map_elements(lambda x: ofi.uid_lu[x][0], return_dtype=pl.String).alias('genre_str'),
                                         pl.col('year'). map_elements(lambda x: ofi.uid_lu[x][0], return_dtype=pl.String).alias('year_as_dt'))
# Fix up the year column -- anything null or not 4 digits gets dropped... convert to a timestamp
_genre_year_ = _genre_year_.with_columns(pl.col('year_as_dt').map_elements(lambda x: None if len(x) != 4 else x, return_dtype=pl.String))
_genre_year_ = _genre_year_.drop_nulls()
_genre_year_ = _genre_year_.with_columns(pl.col('year_as_dt').str.to_datetime('%Y'))
# Render
rt.smallMultiples(_genre_year_, category_by='genre_str', 
                  sm_type='temporalBarChart', sm_params={'ts_field':'year_as_dt'},
                  x_axis_independent=False, y_axis_independent=True,
                  w_sm_override=512, h_sm_override=96, w=1700)