# Argument Mining API query testing

Load the target datafile, and see how the query results work

In [1]:
# reload local package definitions for each cell
%load_ext autoreload
%autoreload 2

In [31]:
import os
from dotenv import find_dotenv, load_dotenv
import requests
import pandas as pd

from arg_mine import DATA_DIR
from arg_mine.data.loaders import get_gdelt_df
from arg_mine.api import classify
from arg_mine import utils

In [3]:
# load the env variables to get the API key
load_dotenv(find_dotenv())
am_user_id = os.getenv("ARGUMENTEXT_USERID")
am_user_key = os.getenv("ARGUMENTEXT_KEY")

In [4]:
csv_datapath = os.path.join(DATA_DIR, "raw", "2020-climate-change-narrative")
csv_filepath = os.path.join(csv_datapath, "WebNewsEnglishSnippets.2020.csv")

url_df = get_gdelt_df(csv_filepath)
url_df.head()

INFO:2020-06-16 21:45:27,146:arg_mine.data.loaders: reading data from: /opt/workspace/data/raw/2020-climate-change-narrative/WebNewsEnglishSnippets.2020.csv


Unnamed: 0,datetime,title,headline_image_url,content_url,snippit,timestamp
0,20200115111500,Liberal MPs back Science Minister Karen Andrew...,https://static.ffx.io/images/$zoom_0.2627%2C$m...,https://www.smh.com.au/politics/federal/libera...,the science in her interview with The Age and...,2020-01-15 11:15:00
1,20200117184500,Several crowd-pullers on day two of KLF,https://www.thehindu.com/news/cities/kozhikode...,https://www.thehindu.com/news/cities/kozhikode...,"Guha, who talked about patriotism and jingois...",2020-01-17 18:45:00
2,20200106233000,Seven arrested after gas rig protest,https://www.stourbridgenews.co.uk/resources/im...,https://www.stourbridgenews.co.uk/news/nationa...,three demands for the Scottish and UK Governm...,2020-01-06 23:30:00
3,20200101111500,Australia sending aid to wildfire towns as dea...,https://bloximages.newyork1.vip.townnews.com/h...,https://www.heraldmailmedia.com/news/nation/au...,this season the worst on record and reignited ...,2020-01-01 11:15:00
4,20200107101500,"A hot, dry country caught between fire and a c...",,http://global.chinadaily.com.cn/a/202001/07/WS...,", which is burned to generate electricity, wit...",2020-01-07 10:15:00


## Build the `classify` request

In [5]:
url = url_df.iloc[0].content_url

In [42]:
CLASSIFY_BASE_URL = "https://api.argumentsearch.com/en/classify"
timeout = 5

topic = "climate change"


payload = {
    "topic": topic,
    "userID": am_user_id,
    "apiKey": am_user_key,
    "targetUrl": url,
    "topicRelevance": "word2vec",
    "predictStance": True,
    "computeAttention": True,
    "showOnlyArguments": False,
}

response = requests.post(
    CLASSIFY_BASE_URL,
    json=payload,
    timeout=timeout,
)
response.raise_for_status()

DEBUG:2020-06-16 23:12:21,852:urllib3.connectionpool: Starting new HTTPS connection (1): api.argumentsearch.com:443
DEBUG:2020-06-16 23:12:23,867:urllib3.connectionpool: https://api.argumentsearch.com:443 "POST /en/classify HTTP/1.1" 200 4463


In [8]:
out = response.json()

In [9]:
out['metadata']

{'computeAttention': True,
 'language': 'en',
 'modelVersion': 0.1,
 'predictStance': True,
 'removeDuplicates': True,
 'showOnlyArguments': False,
 'sortBy': 'argumentConfidence',
 'timeArgumentPrediction': 0.5229649543762207,
 'timeAttentionComputation': -1,
 'timeLogging': 0.12923741340637207,
 'timePreprocessing': 3.552436828613281e-05,
 'timeStancePrediction': -1,
 'timeTotal': 1.2727065086364746,
 'topic': 'climate change',
 'totalArguments': 5,
 'totalClassifiedSentences': 37,
 'totalContraArguments': 3,
 'totalNonArguments': 32,
 'totalProArguments': 2,
 'userMetadata': ''}

In [10]:
type(out['metadata']['modelVersion'])

float

In [20]:
url

'https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html'

In [21]:
topic

'climate change'

In [41]:
response = classify.classify_url_sentences(topic, url, am_user_id, am_user_key)
response

DEBUG:2020-06-16 22:43:11,670:urllib3.connectionpool: Starting new HTTPS connection (1): api.argumentsearch.com:443
DEBUG:2020-06-16 22:43:13,538:urllib3.connectionpool: https://api.argumentsearch.com:443 "POST /en/classify HTTP/1.1" 200 4202


{'metadata': {'computeAttention': False,
  'language': 'en',
  'modelVersion': 0.1,
  'predictStance': True,
  'removeDuplicates': True,
  'showOnlyArguments': False,
  'sortBy': 'argumentConfidence',
  'timeArgumentPrediction': 0.49022936820983887,
  'timeAttentionComputation': -1,
  'timeLogging': 0.036528825759887695,
  'timePreprocessing': 4.482269287109375e-05,
  'timeStancePrediction': -1,
  'timeTotal': 1.1283648014068604,
  'topic': 'climate change',
  'totalArguments': 5,
  'totalClassifiedSentences': 37,
  'totalContraArguments': 3,
  'totalNonArguments': 32,
  'totalProArguments': 2,
  'userMetadata': 'https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html'},
 'sentences': [{'argumentConfidence': 0.8324097394943237,
   'argumentLabel': 'argument',
   'sentenceOriginal': '"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," h

In [39]:
classify.ClassifiedSentence.from_dict(url, topic, response['sentences'][0])

ClassifiedSentence(url='https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html', doc_id='657f9dd95eb97597e34d0c05b5a93ba6', topic='climate change', argument_confidence=0.8324097394943237, argument_label='argument', sentence_original='"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," he said of the minister\'s comments.', sentence_preprocessed='"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," he said of the minister\'s comments.', sort_confidence=0.9131057365821765, stance_confidence=0.9938017336700293, stance_label='pro')

In [40]:
response['sentences'][0]

{'argumentConfidence': 0.8324097394943237,
 'argumentLabel': 'argument',
 'sentenceOriginal': '"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," he said of the minister\'s comments.',
 'sentencePreprocessed': '"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," he said of the minister\'s comments.',
 'sortConfidence': 0.9131057365821765,
 'stanceConfidence': 0.9938017336700293,
 'stanceLabel': 'pro'}

In [66]:
# good example of a link that is no longer valid

response = classify.classify_url_sentences(topic, url_df.content_url.values[3], am_user_id, am_user_key)

DEBUG:2020-06-16 23:49:53,049:urllib3.connectionpool: Starting new HTTPS connection (1): api.argumentsearch.com:443
DEBUG:2020-06-16 23:49:54,187:urllib3.connectionpool: https://api.argumentsearch.com:443 "POST /en/classify HTTP/1.1" 400 103


{'error': 'Website could not be crawled or returned an empty result. Please contact an administrator.'}


Refused: Website could not be crawled or returned an empty result. Please contact an administrator.

## run a batch of urls

In [76]:
doc_list, sentence_list, refused_doc_list = classify.collect_sentences_by_topic(topic, url_df.content_url.values[:10])

DEBUG:2020-06-16 23:59:56,794:arg_mine.api.classify: Attempting url 0, try #1
DEBUG:2020-06-16 23:59:56,796:urllib3.connectionpool: Starting new HTTPS connection (1): api.argumentsearch.com:443
DEBUG:2020-06-16 23:59:58,660:urllib3.connectionpool: https://api.argumentsearch.com:443 "POST /en/classify HTTP/1.1" 200 1211
DEBUG:2020-06-16 23:59:58,664:arg_mine.api.classify: Attempting url 1, try #1
DEBUG:2020-06-16 23:59:58,666:urllib3.connectionpool: Starting new HTTPS connection (1): api.argumentsearch.com:443
DEBUG:2020-06-17 00:00:00,154:urllib3.connectionpool: https://api.argumentsearch.com:443 "POST /en/classify HTTP/1.1" 200 432
DEBUG:2020-06-17 00:00:00,158:arg_mine.api.classify: Attempting url 2, try #1
DEBUG:2020-06-17 00:00:00,162:urllib3.connectionpool: Starting new HTTPS connection (1): api.argumentsearch.com:443
DEBUG:2020-06-17 00:00:02,447:urllib3.connectionpool: https://api.argumentsearch.com:443 "POST /en/classify HTTP/1.1" 200 1085
DEBUG:2020-06-17 00:00:02,451:arg_mine

In [78]:
print(len(refused_doc_list))
refused_doc_list

4


['https://www.heraldmailmedia.com/news/nation/australia-sending-aid-to-wildfire-towns-as-death-toll-rises/article_883fa793-6c0a-547e-8f77-b5964f1d7182.html',
 'https://www.seattlepi.com/news/world/article/Cooler-weather-brings-respite-in-Australian-14950114.php',
 'https://www.airdrietoday.com/national-business/starbucks-goals-for-sustainability-will-require-significant-consumer-buy-in-2037947',
 'https://www.heraldbulletin.com/news/nation_world/davos-chief-welcomes-views-of-trump-greta-thunberg-at-forum/article_24dbb151-7132-5c48-a7df-c13b67004ec0.html']

In [62]:
sentence_list

[ClassifiedSentence(url='https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html', doc_id='657f9dd95eb97597e34d0c05b5a93ba6', topic='climate change', sentence_id='c68493474ee7757287eeff11ac1bd1ea', argument_confidence=0.8324097394943237, argument_label='argument', sentence_original='"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," he said of the minister\'s comments.', sentence_preprocessed='"So she\'s right - there is no point wasting time discussing climate change - because we are not going to stop it from happening," he said of the minister\'s comments.', sort_confidence=0.9131057365821765, stance_confidence=0.9938017336700293, stance_label='pro'),
 ClassifiedSentence(url='https://www.smh.com.au/politics/federal/liberals-speak-out-to-back-science-minister-on-climate-change-action-20200115-p53rs1.html', doc_id='657f9dd95eb97597e34