# Argument Mining API query testing

Load the target datafile, and see how the query results work

In [1]:
# reload local package definitions for each cell
%load_ext autoreload
%autoreload 2

In [2]:
import os
import time

from dotenv import find_dotenv, load_dotenv
import requests
import grequests
import pandas as pd

from arg_mine import DATA_DIR
from arg_mine.data.loaders import get_gdelt_df
from arg_mine.api import classify, auth, session, errors
from arg_mine import utils

  curious_george.patch_all(thread=False, select=False)


In [3]:
# load the env variables to get the API key
user_id, api_key = auth.load_auth_tokens()

topic = "climate change"

In [4]:
csv_datapath = os.path.join(DATA_DIR, "raw", "2020-climate-change-narrative")
csv_filepath = os.path.join(csv_datapath, "WebNewsEnglishSnippets.2020.csv")

url_df = get_gdelt_df(csv_filepath)
url_df.head()

Unnamed: 0,datetime,title,headline_image_url,content_url,snippit,timestamp
0,20200115111500,Liberal MPs back Science Minister Karen Andrew...,https://static.ffx.io/images/$zoom_0.2627%2C$m...,https://www.smh.com.au/politics/federal/libera...,the science in her interview with The Age and...,2020-01-15 11:15:00
1,20200117184500,Several crowd-pullers on day two of KLF,https://www.thehindu.com/news/cities/kozhikode...,https://www.thehindu.com/news/cities/kozhikode...,"Guha, who talked about patriotism and jingois...",2020-01-17 18:45:00
2,20200106233000,Seven arrested after gas rig protest,https://www.stourbridgenews.co.uk/resources/im...,https://www.stourbridgenews.co.uk/news/nationa...,three demands for the Scottish and UK Governm...,2020-01-06 23:30:00
3,20200101111500,Australia sending aid to wildfire towns as dea...,https://bloximages.newyork1.vip.townnews.com/h...,https://www.heraldmailmedia.com/news/nation/au...,this season the worst on record and reignited ...,2020-01-01 11:15:00
4,20200107101500,"A hot, dry country caught between fire and a c...",,http://global.chinadaily.com.cn/a/202001/07/WS...,", which is burned to generate electricity, wit...",2020-01-07 10:15:00


## Collect data for unit tests
Using tools in the package, run queries that give known responses, and save those responses as json fixtures for unit tests

In [5]:
import pkg_resources
import json
from tests.fixtures import save_json_request_fixture, load_json_fixture
import logging

logger = logging.basicConfig()

SAVE_FIXTURES = False

In [6]:
url  = url_df.content_url.values[0]
payload = {
    "topic": topic,
    "userID": user_id,
    "apiKey": api_key,
    "targetUrl": url,
    "model": "default",
    "topicRelevance": classify.TopicRelevance.WORD2VEC,
    "predictStance": True,  # we don't want to predict stance without context
    "computeAttention": False,  # doesnt work for BERT-based models (the default model)
    "showOnlyArguments": True,  # only return sentences classified as arguments
    "userMetadata": url,
}

result = session.fetch(session.ApiUrl.CLASSIFY_BASE_URL, payload)

In [7]:
result.keys()

dict_keys(['metadata', 'sentences'])

In [8]:
if SAVE_FIXTURES:
    save_json_request_fixture("response_classify_only_args.json", payload, result, status_code=200, drop_keys=['apiKey'])

### known bad article request, gives 404 on server

In [9]:
# this url gives a known 404 on the article server
url = url_df.content_url.values[3]
result = None
print(url)
payload = {
    "topic": topic,
    "userID": user_id,
    "apiKey": api_key,
    "targetUrl": url,
    "model": "default",
    "topicRelevance": classify.TopicRelevance.WORD2VEC,
    "predictStance": True,  # we don't want to predict stance without context
    "computeAttention": False,  # doesnt work for BERT-based models (the default model)
    "showOnlyArguments": True,  # only return sentences classified as arguments
    "userMetadata": url,
}

try: 
    result = session.fetch(session.ApiUrl.CLASSIFY_BASE_URL, payload)
except errors.Refused as e:
    print(result)
    print(e)

https://www.heraldmailmedia.com/news/nation/australia-sending-aid-to-wildfire-towns-as-death-toll-rises/article_883fa793-6c0a-547e-8f77-b5964f1d7182.html


ERROR:arg_mine.api.session:400 : {'error': 'Website could not be crawled or returned an empty result. Please contact an administrator.'}


None
Refused: 400: Website could not be crawled or returned an empty result. Please contact an administrator.


In [10]:
response = requests.post(session.ApiUrl.CLASSIFY_BASE_URL, json=payload)
print(response.json())
error_response = response.json()
if SAVE_FIXTURES:
    save_json_request_fixture("response_classify_refused_remote_404.json", payload, error_response, status_code=400, drop_keys=['apiKey'])

{'error': 'Website could not be crawled or returned an empty result. Please contact an administrator.'}


### Bad parameters test

In [12]:
# good url
url = url_df.content_url.values[0]
result = None
print(url)
payload = {
    "topic": topic,
    "userID": user_id,
    "apiKey": api_key,
    "targetUrl": url,
    "model": "i_am_a_ bad_model",
    "userMetadata": url,
}
try:
    result = session.fetch(session.ApiUrl.CLASSIFY_BASE_URL, payload)
except errors.ArgumenTextGatewayError as e:
    print(result)
    print(e)

https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html


ERROR:arg_mine.api.session:400 : {'error': 'Model "i_am_a_ bad_model" does not exist. Please refer to the documentation and choose another model.'}


None
ArgumenTextGatewayError: 400: Model "i_am_a_ bad_model" does not exist. Please refer to the documentation and choose another model.


In [13]:
response = requests.post(session.ApiUrl.CLASSIFY_BASE_URL, json=payload)
if SAVE_FIXTURES:
    save_json_request_fixture("response_classify_500_bad_payload.json", payload, "", status_code=response.status_code, drop_keys=['apiKey'])

### timeout test

In [14]:
url = url_df.content_url.values[0]
result = None
print(url)
payload = {
    "topic": topic,
    "userID": user_id,
    "apiKey": api_key,
    "targetUrl": url,
    "model": "default",
    "topicRelevance": classify.TopicRelevance.WORD2VEC,
    "predictStance": True,  # we don't want to predict stance without context
    "computeAttention": False,  # doesnt work for BERT-based models (the default model)
    "showOnlyArguments": True,  # only return sentences classified as arguments
    "userMetadata": url,
}
try:
    result = session.fetch(session.ApiUrl.CLASSIFY_BASE_URL, payload, timeout=0.1)
except errors.NotResponding as e:
    print(result)
    print(e)

https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html
None
Server not responding, ConnectionError or Timeout (0.1 s)


In [15]:
try:
    response = requests.post(session.ApiUrl.CLASSIFY_BASE_URL, json=payload, timeout=0.1)
except requests.Timeout as e:
    print(e)

HTTPSConnectionPool(host='api.argumentsearch.com', port=443): Max retries exceeded with url: /en/classify (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7ff8613e1190>, 'Connection to api.argumentsearch.com timed out. (connect timeout=0.1)'))
