# Query caselaw-explorer API
---
Before loading the dependencies, please make sure to paste your user API key in the *.env* file under *COGNITO_USER_KEY*.
You can find the key in the bottom left menu of the app under *... > Preferences > Developer > API key*.
### Load dependencies & set up client

In [1]:
import boto3
import requests
from dotenv import load_dotenv
from os import getenv, makedirs
from datetime import datetime
from os.path import join
from notebooks.api.gql_queries import *
import pandas as pd
from collections.abc import MutableMapping

load_dotenv()
makedirs('output', exist_ok=True)

refresh_token = getenv('COGNITO_USER_KEY')
client_id = getenv('COGNITO_CLIENT_ID')
endpoint = getenv('APPSYNC_ENDPOINT')

client = boto3.client('cognito-idp')
session = requests.Session()

### Authenticate (If token expired: re-run cell)

In [2]:
auth = client.initiate_auth(
    ClientId=client_id,
    AuthFlow='REFRESH_TOKEN',
    AuthParameters={'REFRESH_TOKEN': refresh_token}
)

### Function definitions

In [3]:
def execute(qgl_query):
    resp = session.request(
        url=endpoint,
        method='POST',
        headers={'authorization': auth['AuthenticationResult']['AccessToken']},
        json={'query': qgl_query}
    ).json()['data']
    query_name = list(resp.keys())[0]
    print(f'Query {query_name} executed.')
    return resp[query_name], query_name

def format_network(resp):
    nw = resp.copy()
    nw['nodes'] = [(lambda x: {'id': x['id'], 'data': json.loads(x['data'])})(x) for x in nw['nodes']]
    if nw['message'] != '':
        print('Query limit reached! Only partial result returned.\n')
    print('Network formatted.')
    return nw

def get_network_statistics(nodes, edges):
    nodes, b_name = execute(gql_batch_fetch_node_data(nodes, AttributesList.NETWORKSTATS))
    print('Required node metadata fetched.')
    query = gql_compute_networkstatistics(nodes, edges)
    stats, name = execute(query)
    print('Statistics calculated.')
    return json.loads(stats), name

def convert_to_dataframe(list_of_dict, flatten=False):
    def convert_flatten(d):
        items = []
        for k, v in d.items():
            if isinstance(v, MutableMapping):
                items.extend(convert_flatten(v).items())
            else:
                items.append((k, v))
        return dict(items)
    # flatten dict attributes
    df_flat = pd.DataFrame([convert_flatten(entry) for entry in list_of_dict])
    # flatten list attributes
    if flatten:
        df_flat = df_flat.explode('domains')
        df_flat = df_flat.explode('legal_provisions')
    return df_flat

def export_as_csv(df_response, file_name=''):
    path = join('output', f'{file_name}_{datetime.today().strftime("%Y-%m-%d_%H:%M:%S")}.csv')
    df_response.to_csv(path, index=False)
    print(f'Saved {file_name} to {path}.')

def export_as_json(dict_response, file_name=''):
    path = join('output', f'{file_name}_{datetime.today().strftime("%Y-%m-%d_%H:%M:%S")}.json')
    with open(path, 'w') as f:
        json.dump(dict_response, f)
    print(f'Saved {file_name} to {path}.')

### Query network
At least one of the optional parameters must be selected.
Other parameters, if omitted, will assume their default value (see function definition).

For descriptions of the parameter options see *definitions > terminology > attribute_values.py*.

In [4]:
# construct GraphQL query string from input:
n_query = gql_query_network(
    attributes_to_fetch=AttributesList.ALL,
    data_sources=[DataSource.RS],               #           list of DataSources (options so far: RS (Rechtspraak))
    eclis='ECLI:NL:HR:2004:AP0186',             # optional; string of eclis separated by space
    #keywords='gevaarzetting',                  # optional; string in Simple Query String Syntax (https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html)
    #articles='Artikel 3',                      # optional; string in Simple Query String Syntax, see user guide
    date_start='1900-01-01',                    #           string of format yyyy-mm-dd
    date_end='2021-12-31',                      #           string of format yyyy-mm-dd
    #instances=[Instance.HOGE_RAAD],            # optional; list of Instance options
    #domains=[Domain.STRAFRECHT],               # optional; list of Domain options
    doc_types=[DocType.DEC],                    #           list of DocTypes (options: DEC (decision), OPI (opinion))
    degrees_sources=1,                          #           int: degree of source citations
    degrees_targets=1                           #           int: degree of target citations
)

# execute GraphQL query:
n_response, n_query_name = execute(n_query)

# format response to dict:
network = format_network(n_response)
#print(json.dumps(network, indent=4))

# export response to json:
export_as_json(network, n_query_name)

# convert nodes and edges to dataframe:
df_nodes = convert_to_dataframe(network['nodes'], flatten=True)
df_edges = convert_to_dataframe(network['edges'])

# export nodes and edges as csv:
export_as_csv(df_nodes, f'nodes_{n_query_name}')
export_as_csv(df_edges, f'edges_{n_query_name}')

Query queryNetworkByUserInput executed.
Network formatted.
Saved queryNetworkByUserInput to output/queryNetworkByUserInput_2022-01-05_15:15:58.json.
Saved nodes_queryNetworkByUserInput to output/nodes_queryNetworkByUserInput_2022-01-05_15:15:58.csv.
Saved edges_queryNetworkByUserInput to output/edges_queryNetworkByUserInput_2022-01-05_15:15:58.csv.


### Query network statistics

In [5]:
statistics, s_query_name = get_network_statistics(
    nodes=n_response['nodes'],
    edges=n_response['edges']
)

export_as_json(statistics, s_query_name)

df_statistics = pd.DataFrame(statistics).T

export_as_csv(df_statistics, s_query_name)

Query batchFetchNodeData executed.
Required node metadata fetched.
Query computeNetworkStatistics executed.
Statistics calculated.
Saved computeNetworkStatistics to output/computeNetworkStatistics_2022-01-05_15:15:59.json.
Saved computeNetworkStatistics to output/computeNetworkStatistics_2022-01-05_15:15:59.csv.
