In [None]:
# pull all Wikidata directors as ttl.

import hashlib
import numpy
import pandas
import pathlib
import pydash
import rdflib
import requests
import time
import tqdm

def value_extract(row, column):

    ''' Extract dictionary values. '''
    
    return pydash.get(row[column], 'value')

def sparql_query(query, service):

    ''' Send sparql request, and formulate results into a dataframe. '''

    response = requests.get(service, params={'format': 'json', 'query': query}, timeout=120)
    results = pydash.get(response.json(), 'results.bindings')
    df = pandas.DataFrame.from_dict(results)
    for column in df.columns:
        df[column] = df.apply(value_extract, column=column, axis=1)
    
    return df

query = ''' select distinct ?work ?director where { ?work wdt:P57 ?director . } '''
wikidata_director = sparql_query(query, 'https://query.wikidata.org/sparql').drop_duplicates()
wikidata_entities = pydash.uniq(list(wikidata_director.director.unique())+list(wikidata_director.work.unique()))

for x in tqdm.tqdm(wikidata_entities):
    qid = pathlib.Path(x).name
    qid_hash = hashlib.md5(qid.encode()).hexdigest()
    save_path = pathlib.Path.cwd() / 'data' / 'wikidata' / qid_hash[:2] / f'{qid}.ttl'
    if not save_path.exists():
        save_path.parents[0].mkdir(parents=True, exist_ok=True)
        r = requests.get(f'https://www.wikidata.org/wiki/Special:EntityData/{qid}.ttl?flavor=simple')
        if r.status_code == 200:
            with open(save_path, 'w') as export:
                export.write(r.text)