# Collect feature data on MovieLens movies from Wikidata.

**references**

* internal colab: [QueryWikiData.ipynb](https://source.corp.google.com/piper///depot/google3/learning/laser/experiments/dpwg/movielens/QueryWikiData.ipynb)
* tf recommendations: [movielens utils](https://source.corp.google.com/piper///depot/google3/third_party/tensorflow_models/official/recommendation/movielens.py)

In [21]:
VERSION        = "v2"                       # TODO
PREFIX         = f'rec-bandits-{VERSION}'   # TODO

print(f"PREFIX: {PREFIX}")

PREFIX: rec-bandits-v2


In [22]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "rec-bandits-v2"
VERSION                  = "v2"

BUCKET_NAME              = "rec-bandits-v2-hybrid-vertex-bucket"
BUCKET_URI               = "gs://rec-bandits-v2-hybrid-vertex-bucket"
DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://rec-bandits-v2-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"
DATA_PATH_KFP_DEMO       = "gs://rec-bandits-v2-hybrid-vertex-bucket/data/kfp_demo_data/u.data"

VPC_NETWORK_FULL         = "projects/934903580331/global/networks/ucaip-haystack-vpc-network"

BIGQUERY_DATASET_NAME    = "mvlens_rec_bandits_v2"
BIGQUERY_TABLE_NA

In [23]:
DATA_PATH

'gs://rec-bandits-v2-hybrid-vertex-bucket/data'

In [24]:
# google cloud
from google.cloud import aiplatform, storage

# cloud storage client
storage_client = storage.Client(project=PROJECT_ID)
# bucket = storage_client.bucket(BUCKET_NAME)

# Vertex client
aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [25]:
bucket = storage_client.bucket(BUCKET_NAME)

In [1]:
%%capture
# !pip install sparqlwrapper
from SPARQLWrapper import SPARQLWrapper, JSON

import sys
import os
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
import json
import urllib.request
import zipfile

In [2]:
df = pd.DataFrame(columns = ['sid'])
df.empty

True

## Query functions

In [3]:
ENDPOINT_URL = "https://query.wikidata.org/sparql"

def make_full_query(id):
    """
    Takes an imbd ID and return a formatted sparql query.

    Args:
        id: str, imdb movie identifier

    Returns:
        query : str, sparql query
    """
  # SPARQL views the database composed of triplets: ?subject ?predicate ?object
  # Relevant subjects:
  #   ?item: item ID
  #   ?itemLabel: item name (e.g. film title)
  #   ?cast: ID of cast member
  #   ?castLabel:  name of cast member
  #   ?property: ID of cast member type
  #   ?propLabel: type of cast member (e.g. director)
  #   ?genreID: ID of genre
  #   ?genre: name of genre
  #   ?topicID: ID of topic
  #   ?topic: name of topic
  # Relevant predicates
  #   wdt:P31 - instance of (P for property)
  #   wdt:P279 - subclass off
  #   wtd:P345 - IMDb ID
  #   wdt:P136 - genre
  #   wtd:P921 - main subjecy
  # Relevant objects
  #   wd:Q11424 - film (entity)
  #   wd:Q5 - human

  # The query further limits responses to English responses to avoid duplicates
  # of movie entries recorded in other languages. Foreign films will return
  # corresponding data recorded in English.

  # In plain language the query reads:
  # "Find the wikidata item that is either an instance of film or a subclass of
  # a film and with IMDb id matching <id>. Further take all properties of this 
  # item (movie) which are instances of humans (cast) and record the type of
  # cast member they are. Finally collect the genre and topic properties of the 
  # item and only maintain the version in English."


    query = """
    SELECT DISTINCT ?item ?itemLabel ?cast ?castLabel ?property ?propLabel ?genreID ?genre ?topicID ?topic WHERE {
    ?item wdt:P31/wdt:P279* wd:Q11424 . \n""" + f"    ?item wdt:P345 'tt{id}' .\n" +"""    
    # take all claims on this movie
    ?item ?property ?cast.
    # that involve a human
    ?cast wdt:P31 wd:Q5 .
    ?prop wikibase:directClaim ?property .
    ?item wdt:P136 ?genreID filter (lang(?genre) = "en").
    ?genreID rdfs:label ?genre.
    ?item wdt:P921 ?topicID filter (lang(?topic) = "en").
    ?topicID rdfs:label ?topic.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }}
    """
    return(query)

def make_genre_query(id):
    query = """SELECT DISTINCT ?item ?itemLabel ?genreID ?genre
    WHERE {
    ?item wdt:P31/wdt:P279* wd:Q11424 . \n""" + f"    ?item wdt:P345 'tt{id}' .\n" +"""
    ?item wdt:P136 ?genreID filter (lang(?genre) = "en").
    ?genreID rdfs:label ?genre.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    """
    return(query)

def make_topic_query(id):
    query = """SELECT DISTINCT ?item ?itemLabel ?topicID ?topic
    WHERE {
    ?item wdt:P31/wdt:P279* wd:Q11424 . \n""" + f"    ?item wdt:P345 'tt{id}' .\n" +"""
    ?item wdt:P921 ?topicID filter (lang(?topic) = "en").
    ?topicID rdfs:label ?topic.
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    """
    return(query)

def make_cast_query(id):
    query = """SELECT DISTINCT ?item ?itemLabel ?cast ?castLabel ?property ?propLabel
    WHERE {
    ?item wdt:P31/wdt:P279* wd:Q11424 . \n""" + f"    ?item wdt:P345 'tt{id}' .\n" +"""
    # take all claims on this movie
    ?item ?property ?cast.
    # that involve a human
    ?cast wdt:P31 wd:Q5 .
    ?prop wikibase:directClaim ?property .
    SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
    }
    """
    return(query)

def get_results(id, how='full', endpoint_url=ENDPOINT_URL):
    python_version = f"{sys.version_info[0]}.{sys.version_info[1]}"
    user_agent = f"WDQS-example Python/{python_version}"
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    if how == 'full':
        format_q = make_full_query
    if how == 'genre':
        format_q = make_genre_query
    if how == 'topic':
        format_q = make_topic_query
    if how == 'cast':
        format_q = make_cast_query

    def pad_id(id):
        """Helper function to add enough leading 0s until the id has 7 characters"""
        str_id = str(id)
        len_id = len(str_id)
        return('0'*(7-len_id)+str_id)

    padded_id = pad_id(id)
    sparql.setQuery(format_q(padded_id))
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    results = results["results"]["bindings"]
    if len(results) == 0:
        print(f"INFO: Movie ID {padded_id} was not found on WikiData")
    results = [{k:v['value'] for k, v in item.items()} for item in results]
    results = [dict(item, **{'imdbID':padded_id}) for item in results]
    
    return results

def flatten(list_of_lists):
    """Flattens a list of list of varying size into a single list"""
    flat_list = [item for sublist in list_of_lists for item in sublist]
    return flat_list

def extract_entity(uri):
    """Given an URI returns the identifier of the Wiki Entity"""
    uri_parts = uri.split('/')
    return uri_parts[-1]

## Download MovieLens data

In [8]:
data_size = '20m'
assert data_size in ['100k', '1m', '20m','25m']

LOCAL_OUTPUT_DIRECTORY = "movielens"
print(f"LOCAL_OUTPUT_DIRECTORY : {LOCAL_OUTPUT_DIRECTORY}")

LOCAL_OUTPUT_DIRECTORY : movielens


In [9]:
# ! rm -rf ./$LOCAL_OUTPUT_DIRECTORY
# ! mkdir ./$LOCAL_OUTPUT_DIRECTORY

In [10]:
data_url = f"https://files.grouplens.org/datasets/movielens/ml-{data_size}.zip"

a = urllib.request.urlretrieve(data_url, "data")

if not os.path.exists(f"{LOCAL_OUTPUT_DIRECTORY}/"):
    os.makedirs(f"{LOCAL_OUTPUT_DIRECTORY}/")

zip_ref = zipfile.ZipFile("data", "r")
zip_ref.extractall(f"{LOCAL_OUTPUT_DIRECTORY}/ml_{data_size}")

In [13]:
! tree $LOCAL_OUTPUT_DIRECTORY

[01;34mmovielens[00m
└── [01;34mml_20m[00m
    └── [01;34mml-20m[00m
        ├── README.txt
        ├── genome-scores.csv
        ├── genome-tags.csv
        ├── links.csv
        ├── movies.csv
        ├── ratings.csv
        └── tags.csv

2 directories, 7 files


In [14]:
# Read Links dataset
datapath = os.path.join(LOCAL_OUTPUT_DIRECTORY, f"ml_{data_size}", f"ml-{data_size}")
link_df = pd.read_csv(os.path.join(datapath, 'links.csv'))
link_df.head()

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0


In [63]:
DEST_GCS_PATH

'gs://rec-bandits-v2-hybrid-vertex-bucket/data/ml_20m'

In [65]:
!gsutil cp -r $LOCAL_OUTPUT_DIRECTORY $DEST_GCS_PATH

Copying file://movielens/ml_20m/ml-20m/links.csv [Content-Type=text/csv]...
Copying file://movielens/ml_20m/ml-20m/README.txt [Content-Type=text/plain]...  
Copying file://movielens/ml_20m/ml-20m/genome-scores.csv [Content-Type=text/csv]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying file://movielens/ml_20m/ml-20m/ratings.csv [Content-Type=text/csv

## Send sparql queries to WikiData

In [18]:
raw_feat_datapath = 'raw_features'
if not os.path.exists(raw_feat_datapath):
    os.makedirs(raw_feat_datapath)

def save_json(result_dict, fname, save_path = raw_feat_datapath):
    res = json.dumps(result_dict)
    res = res.encode('utf-8')
    with open(os.path.join(save_path, fname), 'w') as f:
        f.write(str(res))
        
LOCAL_GENRE_FILE = 'genre.json'
LOCAL_TOPIC_FILE = 'topic.json'
LOCAL_CAST_FILE = 'cast.json'
        
DEST_GCS_PATH = f"{DATA_PATH}/ml_{data_size}"

In [1]:
# genre information
result_genre = Parallel(n_jobs=4)(delayed(get_results)(i, 'genre') for i in tqdm(link_df['imdbId']))
result_genre_flat = flatten(result_genre)

In [19]:
LOCAL_GENRE_FILE = 'genre.json'

# result_genre_flat
save_json(result_genre_flat, LOCAL_GENRE_FILE)

In [34]:
# LOCAL_FILE = f"{raw_feat_datapath}/genre.json"
# DEST_BLOB = f"{DATA_PATH}/ml_{data_size}/{LOCAL_FILE}"

# blob = bucket.blob(DEST_BLOB)
# blob.upload_from_filename(LOCAL_FILE)

! gsutil cp $raw_feat_datapath/$LOCAL_GENRE_FILE $DEST_GCS_PATH

'gs://rec-bandits-v2-hybrid-vertex-bucket/data/ml_20m/raw_features/genre.json'

In [2]:
# topic information
result_topic = Parallel(n_jobs=4)(delayed(get_results)(i, 'topic') for i in tqdm(link_df['imdbId']))
result_topic_flat = flatten(result_topic)

In [3]:
# cast information
result_cast = Parallel(n_jobs=4)(delayed(get_results)(i, 'cast') for i in tqdm(link_df['imdbId']))
result_cast_flat = flatten(result_cast)

## Save raw feature data

In [38]:
LOCAL_GENRE_FILE = 'genre.json'
LOCAL_TOPIC_FILE = 'topic.json'
LOCAL_CAST_FILE = 'cast.json'

In [54]:
# save_json(result_genre_flat, LOCAL_GENRE_FILE)

! gsutil cp $raw_feat_datapath/$LOCAL_GENRE_FILE $DEST_GCS_PATH/$raw_feat_datapath/$LOCAL_GENRE_FILE

Copying file://raw_features/genre.json [Content-Type=application/json]...
/ [1 files][ 10.5 MiB/ 10.5 MiB]                                                
Operation completed over 1 objects/10.5 MiB.                                     


In [55]:
save_json(result_topic_flat, LOCAL_TOPIC_FILE)

! gsutil cp $raw_feat_datapath/$LOCAL_TOPIC_FILE $DEST_GCS_PATH/$raw_feat_datapath//$LOCAL_TOPIC_FILE

Copying file://raw_features/topic.json [Content-Type=application/json]...
/ [1 files][  2.1 MiB/  2.1 MiB]                                                
Operation completed over 1 objects/2.1 MiB.                                      


In [56]:
save_json(result_cast_flat, LOCAL_CAST_FILE)

! gsutil cp $raw_feat_datapath/$LOCAL_CAST_FILE $DEST_GCS_PATH/$raw_feat_datapath//$LOCAL_CAST_FILE

Copying file://raw_features/cast.json [Content-Type=application/json]...
- [1 files][113.4 MiB/113.4 MiB]                                                
Operation completed over 1 objects/113.4 MiB.                                    


In [50]:
!gsutil ls $DEST_GCS_PATH/

gs://rec-bandits-v2-hybrid-vertex-bucket/data/ml_20m


## Process data and save it as a csv

In [58]:
feat_datapath = 'processed_features'
if not os.path.exists(feat_datapath):
    os.makedirs(feat_datapath)

def process_save_csv(
    result_dict, 
    fname, 
    rename_cols = None, 
    extract_entity_cols = None, 
    save_path = feat_datapath
):
    result_df = pd.DataFrame(result_dict)
    if rename_cols:
        result_df.rename(columns = rename_cols, inplace = True)
    if extract_entity_cols:
        for col in extract_entity_cols:
            result_df[col] = result_df.apply(lambda x: extract_entity(x[col]), axis = 1)
    result_df.to_csv(os.path.join(save_path, fname), index=False)
    
LOCAL_GENRE_CSV = 'genre.csv'
LOCAL_TOPIC_CSV = 'topic.csv'
LOCAL_CAST_CSV  = 'cast.csv'

In [45]:
process_save_csv(
    result_genre_flat, 'genre.csv', 
    rename_cols = {
        'item':'wikiItemID', 
        'itemLabel':'wikiItemLabel',
        'genre':'genreLabel'
    },
    extract_entity_cols = ['wikiItemID', 'genreID']
)

In [46]:
process_save_csv(
    result_topic_flat, 'topic.csv',
    rename_cols = {
        'item':'wikiItemID', 
        'itemLabel':'wikiItemLabel',
        'topic':'topicLabel'
    },
    extract_entity_cols = ['wikiItemID', 'topicID']
)

In [47]:
process_save_csv(
    result_cast_flat, 'cast.csv',
    rename_cols = {
        'item':'wikiItemID', 
        'itemLabel':'wikiItemLabel',
        'cast':'personID',
        'castLabel':'personLabel',
        'property':'roleID',
        'propLabel':'roleLabel'
    },
    extract_entity_cols = ['wikiItemID', 'personID', 'roleID']
)

In [48]:
!ls $feat_datapath

cast.csv  genre.csv  topic.csv


In [59]:
! gsutil cp $feat_datapath/$LOCAL_GENRE_CSV $DEST_GCS_PATH/$feat_datapath/$LOCAL_GENRE_CSV

Copying file://processed_features/genre.csv [Content-Type=text/csv]...
/ [1 files][  3.2 MiB/  3.2 MiB]                                                
Operation completed over 1 objects/3.2 MiB.                                      


In [60]:
! gsutil cp $feat_datapath/$LOCAL_TOPIC_CSV $DEST_GCS_PATH/$feat_datapath/$LOCAL_TOPIC_CSV

Copying file://processed_features/topic.csv [Content-Type=text/csv]...
/ [1 files][622.2 KiB/622.2 KiB]                                                
Operation completed over 1 objects/622.2 KiB.                                    


In [61]:
! gsutil cp $feat_datapath/$LOCAL_CAST_CSV $DEST_GCS_PATH/$feat_datapath/$LOCAL_CAST_CSV

Copying file://processed_features/cast.csv [Content-Type=text/csv]...
/ [1 files][ 30.7 MiB/ 30.7 MiB]                                                
Operation completed over 1 objects/30.7 MiB.                                     


# inspect files

In [66]:
!gsutil ls $DEST_GCS_PATH/$feat_datapath

gs://rec-bandits-v2-hybrid-vertex-bucket/data/ml_20m/processed_features/cast.csv
gs://rec-bandits-v2-hybrid-vertex-bucket/data/ml_20m/processed_features/genre.csv
gs://rec-bandits-v2-hybrid-vertex-bucket/data/ml_20m/processed_features/topic.csv


In [68]:
cast_df = pd.read_csv(f"{DEST_GCS_PATH}/{feat_datapath}/{LOCAL_CAST_CSV}")

print(f"cast_df shape : {cast_df.shape}")
cast_df.head()

cast_df shape : (436642, 7)


Unnamed: 0,wikiItemID,roleID,personID,wikiItemLabel,personLabel,roleLabel,imdbID
0,Q171048,P58,Q3397612,Toy Story,Joel Cohen,screenwriter,114709
1,Q171048,P725,Q374093,Toy Story,John Ratzenberger,voice actor,114709
2,Q171048,P58,Q269214,Toy Story,John Lasseter,screenwriter,114709
3,Q171048,P57,Q269214,Toy Story,John Lasseter,director,114709
4,Q171048,P170,Q269214,Toy Story,John Lasseter,creator,114709


In [69]:
topic_df = pd.read_csv(f"{DEST_GCS_PATH}/{feat_datapath}/{LOCAL_TOPIC_CSV}")

print(f"topic_df shape : {topic_df.shape}")
topic_df.head()

topic_df shape : (11773, 5)


Unnamed: 0,wikiItemID,topicID,wikiItemLabel,topicLabel,imdbID
0,Q222939,Q182154,Jumanji,time travel,113497
1,Q222939,Q3244175,Jumanji,tabletop game,113497
2,Q782465,Q191089,Grumpier Old Men,old age,113228
3,Q1304560,Q11995,Father of the Bride Part II,human pregnancy,113041
4,Q42198,Q214126,Heat,Los Angeles Police Department,113277


In [70]:
genre_df = pd.read_csv(f"{DEST_GCS_PATH}/{feat_datapath}/{LOCAL_GENRE_CSV}")

print(f"genre_df shape : {genre_df.shape}")
genre_df.head()

genre_df shape : (58984, 5)


Unnamed: 0,wikiItemID,genreID,wikiItemLabel,genreLabel,imdbID
0,Q171048,Q157394,Toy Story,fantasy film,114709
1,Q171048,Q157443,Toy Story,comedy film,114709
2,Q171048,Q319221,Toy Story,adventure film,114709
3,Q171048,Q663106,Toy Story,buddy film,114709
4,Q222939,Q157394,Jumanji,fantasy film,113497


## Aggregate wiki features

### Cast features

In [73]:
print(f"cast_df shape : {cast_df.shape}")

cast_df.head()

cast_df shape : (436642, 7)


Unnamed: 0,wikiItemID,roleID,personID,wikiItemLabel,personLabel,roleLabel,imdbID
0,Q171048,P58,Q3397612,Toy Story,Joel Cohen,screenwriter,114709
1,Q171048,P725,Q374093,Toy Story,John Ratzenberger,voice actor,114709
2,Q171048,P58,Q269214,Toy Story,John Lasseter,screenwriter,114709
3,Q171048,P57,Q269214,Toy Story,John Lasseter,director,114709
4,Q171048,P170,Q269214,Toy Story,John Lasseter,creator,114709


In [72]:
cast_df_v1 = cast_df.drop_duplicates(
    subset = ['imdbID', 'personID'],
    keep = 'last'
).reset_index(drop = True)

print(f"cast_df_v1 shape : {cast_df_v1.shape}")

cast_df_v1.head(3)

cast_df_v1 shape : (408479, 7)


Unnamed: 0,wikiItemID,roleID,personID,wikiItemLabel,personLabel,roleLabel,imdbID
0,Q171048,P58,Q3397612,Toy Story,Joel Cohen,screenwriter,114709
1,Q171048,P725,Q374093,Toy Story,John Ratzenberger,voice actor,114709
2,Q171048,P170,Q269214,Toy Story,John Lasseter,creator,114709


In [74]:
cast_list_agg_df = cast_df_v1.groupby('imdbID')['personLabel'].apply(list).reset_index()
print(f"cast_list_agg_df shape : {cast_list_agg_df.shape}")

cast_list_agg_df.head(3)

cast_list_agg_df shape : (25771, 2)


Unnamed: 0,imdbID,personLabel
0,5,"[William Kennedy Dickson, Charles Kayser, Will..."
1,8,"[Fred Ott, William Kennedy Dickson, William He..."
2,10,[Louis Lumière]


### Topic features

In [75]:
print(f"topic_df shape : {topic_df.shape}")

topic_df.head()

topic_df shape : (11773, 5)


Unnamed: 0,wikiItemID,topicID,wikiItemLabel,topicLabel,imdbID
0,Q222939,Q182154,Jumanji,time travel,113497
1,Q222939,Q3244175,Jumanji,tabletop game,113497
2,Q782465,Q191089,Grumpier Old Men,old age,113228
3,Q1304560,Q11995,Father of the Bride Part II,human pregnancy,113041
4,Q42198,Q214126,Heat,Los Angeles Police Department,113277


In [76]:
topic_list_agg_df = topic_df.groupby('imdbID')['topicLabel'].apply(list).reset_index()
print(f"topic_list_agg_df shape : {topic_list_agg_df.shape}")

topic_list_agg_df.head(3)

topic_list_agg_df shape : (6449, 2)


Unnamed: 0,imdbID,topicLabel
0,1527,[adultery]
1,2381,[organized crime]
2,4008,[dinosaur]


### genre features

In [79]:
print(f"genre_df shape : {genre_df.shape}")

genre_df.head()

genre_df shape : (58984, 5)


Unnamed: 0,wikiItemID,genreID,wikiItemLabel,genreLabel,imdbID
0,Q171048,Q157394,Toy Story,fantasy film,114709
1,Q171048,Q157443,Toy Story,comedy film,114709
2,Q171048,Q319221,Toy Story,adventure film,114709
3,Q171048,Q663106,Toy Story,buddy film,114709
4,Q222939,Q157394,Jumanji,fantasy film,113497


In [80]:
genre_list_agg_df = genre_df.groupby('imdbID')['genreLabel'].apply(list).reset_index()
print(f"genre_list_agg_df shape : {genre_list_agg_df.shape}")

genre_list_agg_df.head(3)

genre_list_agg_df shape : (25434, 2)


Unnamed: 0,imdbID,genreLabel
0,5,"[docufiction film, documentary film, silent film]"
1,8,[silent film]
2,10,"[documentary film, silent film]"
