# Querying news articles via Stardog

News articles ingested in the notebook, Demo_Article_Ingest, with execution/visualization of these in the cells below.


In [1]:
from dataclasses import dataclass
from dotenv import load_dotenv
import json
import os
import pandas as pd
import panel as pn
import requests
import stardog

load_dotenv()  # take environment variables from .env
pn.extension('tabulator')

sd_dna = 'dna'
sd_conn_details = {'endpoint': os.environ.get('STARDOG_ENDPOINT'),
                   'username': os.getenv('STARDOG_USER'),
                   'password': os.environ.get('STARDOG_PASSWORD')}
dna = 'urn:ontoinsights:dna'

conn = stardog.Connection(sd_dna, **sd_conn_details)

In [2]:
@dataclass
class Metadata:
    repository: str
    characterization: str
    source: str
    numb_sentences: int
    title: str

article_collection = ['abortion', 'debate']

international = ('Al Jazeera', 'Economist', 'Financial Express', 'Globe and Mail', 'Guardian', 
                 'Hindustan Times', 'Indo-Asian News Service', 'Irish Times', 'Namibian', 
                 'Radio France Internationale', 'Toronto Star')
center = ('Christian Science Monitor', 'Wall Street Journal')
conservative = ('Breitbart', 'Fox News', 'Washington Times')
liberal = ('Huffington Post', 'New York Times', 'USA Today', 'Washington Post')

In [3]:
# Associate graph IRIs with sources
article_dict = dict()

response = requests.get('http://127.0.0.1:5000/dna/v1/repositories/narratives?repository=abortion')
narratives = response.json()['narratives']     # Array of narrative metadata dictionaries
for narr in narratives:
    source = narr['narrativeMetadata']['source']
    char_type = 'international' if source in international else ('center' if source in center else 
                                                                 ('conservative' if source in conservative else 'liberal'))
    article_dict[narr['narrativeId']] = Metadata('abortion', char_type, source, narr['numberOfSentences'],
                                                 narr['narrativeMetadata']['title'])

response = requests.get('http://127.0.0.1:5000/dna/v1/repositories/narratives?repository=debate')
narratives = response.json()['narratives']     # Array of narrative metadata dictionaries
for narr in narratives:
    source = narr['narrativeMetadata']['source']
    char_type = 'international' if source in international else ('center' if source in center else 
                                                                 ('conservative' if source in conservative else 'liberal'))
    article_dict[narr['narrativeId']] = Metadata('debate', char_type, source, narr['numberOfSentences'],
                                                 narr['narrativeMetadata']['title'])
    
article_df = pd.DataFrame.from_dict(article_dict, orient='index')

In [4]:
df_widget = pn.widgets.Tabulator(article_df, selectable='checkbox', show_index=False, pagination=None)
df_widget

## Select two or more articles to analyze from the table above

In [3]:
# Queries

# Article length (number of sentences)
length_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select * where { ' \
    '    graph ?defGraph { ?narrative a :Narrative ; :source ?source ; :number_sentences ?numbSents} ' \
    '} ORDER BY DESC(?numbSents)'

# Agent mentions
names_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?agent (COUNT(?sent) as ?cnt) where { ' \
    '    BIND(IRI(CONCAT(CONCAT("urn:ontoinsights:dna:", ?repo), "_default")) as ?defGraph) ' \
    '    VALUES ?narrative {"?narratives"} ' \
    '    BIND(IRI(CONCAT(CONCAT(CONCAT("urn:ontoinsights:dna:", ?repo), "_"), ?narrative)) as ?narrGraph) ' \
    '    ?type rdfs:subClassOf* :Agent . FILTER NOT EXISTS {?type rdfs:subClassOf* :Location} ' \
    '    {graph ?defGraph {?agent a ?type}} ' \
    '    {graph ?narrGraph {?sent a :Sentence ; :mentions ?agent}} ' \
    '} GROUP BY ?agent ORDER BY DESC(?cnt)'

# Sentiment
sentiment_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?source ?sent ?conserv ?neutral ?liberal ?expl where { ' \
    '    graph ?defGraph {?narr a :Narrative ; :source ?source; :sentiment ?sent ; :sentiment_explanation ?expl ; ' \
    '                           :ranking_conservative ?conserv ; :ranking_neutral ?neutral ; :ranking_liberal ?liberal} ' \
    }'

# Average grade level
grade_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select (COUNT(?sent) as ?cnt) (MIN(?grade) as ?min) (MAX(?grade) as ?max) (AVG(?grade) as ?avg) where { ' \
    '    graph ?narrGraph {?sent a :Sentence ; :grade_level ?grade} ' \
    '}'

# Rhetorical devices (note that only a subset of the devices are reported by this query)
rhetorical_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?device (COUNT(?sent) as ?cnt) where { ' \
    '    VALUES ?device {"ad baculum" "ad hominem" "ad populum" "exceptionalism" "expletive" "hyperbole" ' \
    '                    "invective" "loaded language" "paralipsis" "pathos"} ' \
    '    {graph ?narrGraph {?sent a :Sentence ; :rhetorical_device ?device}} ' \
    '} GROUP BY ?device'

# Number of overall quotations and their attribution
quote_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select (COUNT(?quote) as ?numbQuotes) where { ' \
    '    graph ?narrGraph { ?quote a :Quote} ' \
    '}'
attribution_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?agent ?label (COUNT(?quote) as ?cnt) where { ' \
    '    graph ?narrGraph {?quote a :Quote ; :attributed_to ?agent} ' \
    '        OPTIONAL {?agent rdfs:label ?label}} ' \
    '} GROUP BY ?agent ?label'
# Get label in case the quote is attributed to non-specific entities such as a "court's justices"

# Quotation sentiment, avg grade level and rhetorical devices
# Queries from above with ?sent and :Sentence replaced by ?quote and :Quote
quote_sentiment_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?sentiment (COUNT(?quote) as ?cnt) where { ' \
    '    graph ?narrGraph {?quote a :Quote ; :sentiment ?sentiment} ' \
    '} GROUP BY ?sentiment'
quote_rhetorical_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?device (COUNT(?quote) as ?cnt) where { ' \
    'VALUES ?device {"ad baculum" "ad hominem" "ad populum" "exceptionalism" "expletive" "hyperbole" ' \
    '                "invective" "loaded language" "paralipsis" "pathos"} ' \
    '    {graph ?narrGraph {?quote a :Quote ; :rhetorical_device ?device}} ' \
    '} GROUP BY ?device'

# Appeal
appeal_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select * where { ' \
    '    graph ?defGraph {?narrative a :Narrative ; ' \
    '        :ranking_conservative ?conserv ; :ranking_liberal ?lib ; :ranking_neutral ?neutral ; ' \
    '        :interpretation_conservative ?conserv_int ; :interpretation_liberal ?lib_int ; ' \
    '        :interpretation_neutral ?conserv_neutral} ' \
    '}'

# Events
event_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select distinct ?offset ?text ?eventType ?eventText where { ' \
    '    ?eventType rdfs:subClassOf* :EventAndState . ' \
    '    {graph ?narrGraph {?sent a :Sentence ; :offset ?offset ; :text ?text ; :has_semantic ?event . ' \
    '                {{?event a ?eventType ; :text ?eventText . FILTER NOT EXISTS{?event :negated true}} ' \
    '                     UNION {?event :has_topic ?topic . ?topic rdf:type ?eventType ; :text ?eventText} ' \
    '                     UNION {?event :has_quantification ?quant . ?quant rdf:type ?eventType; ' \
    '                            :text ?eventType}}}} ' \
    '} ORDER BY ?offset ?eventType '

## Query processing