In [2]:
!pip install redis==4.1.4

Collecting redis==4.1.4
  Downloading redis-4.1.4-py3-none-any.whl (175 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.8/175.8 KB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Collecting deprecated>=1.2.3
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: deprecated, redis
Successfully installed deprecated-1.2.13 redis-4.1.4


In [1]:
from google.cloud import bigquery
import redis
from tqdm import tqdm

In [2]:
REDIS_INSTANCE_HOST = "10.69.144.99"
REDIS_INSTANCE_PORT = 6379

In [3]:
r = redis.Redis(
    host=REDIS_INSTANCE_HOST,
    port=REDIS_INSTANCE_PORT)
r.flushdb()

True

In [4]:
ANIME_INFO_DATASET = "processed_area"
ANIME_ANIME_DATASET = "final_ml_pipelines_anime_anime_retrieval_true_20220304221756"
USER_ANIME_DATASET = "final_ml_pipelines_user_anime_retrieval_true_list_ranking_true_20220304022950"

In [5]:
ANIME_INFO_QUERY = f"SELECT anime_id, title AS name, anime_url AS url, main_pic AS img_url FROM `anime-rec-dev.{ANIME_INFO_DATASET}.anime`"
ANIME_ANIME_RECS_QUERY = f"""
    WITH ranked_recs AS (
        SELECT user_id, last_watched AS recent_watch, retrieved_anime_id AS anime_id, score, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY score DESC) AS rnk
        FROM `anime-rec-dev.{ANIME_ANIME_DATASET}.anime_anime_user_anime_ranked`
    )
    SELECT user_id, recent_watch, anime_id
    FROM ranked_recs
    WHERE rnk <= 10
    ORDER BY user_id, rnk
"""
USER_ANIME_RECS_QUERY = f"""
    WITH ranked_recs AS (
        SELECT user_id, anime_id, score, ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY score DESC) AS rnk
        FROM `anime-rec-dev.{USER_ANIME_DATASET}.user_anime_list_ranking_infer`
    )
    SELECT user_id, anime_id
    FROM ranked_recs
    WHERE rnk <= 20
    ORDER BY user_id, rnk
"""

In [6]:
def load_big_query_data(dataset_id, query):
    client = bigquery.Client(project="anime-rec-dev")
    dataset_ref = client.dataset(dataset_id)
    job_config = bigquery.QueryJobConfig()
    query_job = client.query(query, job_config=job_config)
    data = query_job.to_dataframe()
    return data

## Anime info

In [7]:
anime_info = load_big_query_data(ANIME_INFO_DATASET, ANIME_INFO_QUERY)

In [None]:
anime_info.shape

In [8]:
anime_info.head()

Unnamed: 0,anime_id,name,url,img_url
0,33341,Time Travel Shoujo: Mari Waka to 8-nin no Kaga...,https://myanimelist.net/anime/33341/Time_Trave...,https://cdn.myanimelist.net/images/anime/11/80...
1,38670,Actors: Songs Connection,https://myanimelist.net/anime/38670/Actors__So...,https://cdn.myanimelist.net/images/anime/1869/...
2,33036,Suki ni Naru Sono Shunkan wo.: Kokuhaku Jikkou...,https://myanimelist.net/anime/33036/Suki_ni_Na...,https://cdn.myanimelist.net/images/anime/4/817...
3,33037,Tsukiuta. The Animation,https://myanimelist.net/anime/33037/Tsukiuta_T...,https://cdn.myanimelist.net/images/anime/7/793...
4,6985,Buddha Saitan,https://myanimelist.net/anime/6985/Buddha_Saitan,https://cdn.myanimelist.net/images/anime/6/163...


In [9]:
for _, row in tqdm(anime_info.iterrows()):
    r.set(f"{row['anime_id']}_name", row['name'])
    r.set(f"{row['anime_id']}_url", row['url'])
    r.set(f"{row['anime_id']}_img_url", row['img_url'])

9792it [00:11, 835.85it/s]


In [10]:
print(r.get("1_name"))
print(r.get("1_url"))
print(r.get("1_img_url"))

b'Cowboy Bebop'
b'https://myanimelist.net/anime/1/Cowboy_Bebop'
b'https://cdn.myanimelist.net/images/anime/4/19644.jpg'


## Anime Anime rec

In [11]:
anime_anime = load_big_query_data(ANIME_ANIME_DATASET, ANIME_ANIME_RECS_QUERY)

In [None]:
anime_anime.shape

In [12]:
anime_anime.head()

Unnamed: 0,user_id,recent_watch,anime_id
0,---Adina---,170,245
1,---Adina---,170,22
2,---Adina---,170,114
3,---Adina---,170,265
4,---Adina---,170,12069


In [None]:
for _, row in tqdm(anime_anime.iterrows()):
    r.set(f"{row['user_id']}_recent_watch", row['recent_watch'])
    r.rpush(f"{row['user_id']}_anime_anime_recs", row['anime_id'])

243722it [03:53, 1134.88it/s]

In [22]:
print(r.get("---Adina---_recent_watch"))
print(r.lrange("---Adina---_anime_anime_recs", 0, -1))

b'170'
[b'245', b'22', b'114', b'265', b'12069', b'2403', b'558', b'11771', b'196', b'627']


## User Anime rec

In [None]:
user_anime = load_big_query_data(USER_ANIME_DATASET, USER_ANIME_RECS_QUERY)

In [None]:
user_anime.shape

In [None]:
user_anime.head()

In [None]:
for _, row in tqdm(user_anime.iterrows()):
    r.rpush(f"{row['user_id']}_user_anime_recs", row['anime_id'])

In [None]:
print(r.lrange("svanO_user_anime_recs", 0, -1))