# Querying news articles via Stardog

News articles ingested in the notebook, Demo_Article_Ingest, with execution/visualization of these in the cells below.


In [1]:
import os
import requests
import stardog

sd_dna = 'dna'
sd_conn_details = {'endpoint': os.environ.get('STARDOG_ENDPOINT'),
                   'username': os.getenv('STARDOG_USER'),
                   'password': os.environ.get('STARDOG_PASSWORD')}
dna = 'urn:ontoinsights:dna'

conn = stardog.Connection(sd_dna, **sd_conn_details)

In [2]:
article_collection = ['fl_abortion', 'landslide', 'trump_trial']

sources = ['Al Jazeera', 'Breitbart', 'CNN', 'Fox News', 'Huffington Post',
           'New York Times', 'Wall Street Journal', 'Washington Times' ]  

In [3]:
# Queries

# Article length (number of sentences)
length_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select * where { ' \
    '    graph ?defGraph { ?narrative a :Narrative ; :source ?source ; :number_sentences ?numbSents} ' \
    '} ORDER BY DESC(?numbSents)'

# Person or org mentions
names_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?agent (COUNT(?sent) as ?cnt) where { ' \
    '    ?type rdfs:subClassOf* ?baseType ' \
    '    {graph ?defGraph {?agent a ?type . FILTER NOT EXISTS{?agent a :GeopoliticalEntity}}} ' \
    '    {graph ?narrGraph {?sent a :Sentence ; :mentions ?agent}} ' \
    '} GROUP BY ?agent'

# Sentiment
sentiment_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?sentiment (COUNT(?sent) as ?cnt) where { ' \
    '    graph ?narrGraph {?sent a :Sentence ; :sentiment ?sentiment} ' \
    '} GROUP BY ?sentiment'

# Average grade level
grade_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select (COUNT(?sent) as ?cnt) (MIN(?grade) as ?min) (MAX(?grade) as ?max) (AVG(?grade) as ?avg) where { ' \
    '    graph ?narrGraph {?sent a :Sentence ; :grade_level ?grade} ' \
    '}'

# Rhetorical devices (note that only a subset of the devices are reported by this query)
rhetorical_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?device (COUNT(?sent) as ?cnt) where { ' \
    '    VALUES ?device {"ad baculum" "ad hominem" "ad populum" "exceptionalism" "expletive" "hyperbole" ' \
    '                    "invective" "loaded language" "paralipsis" "pathos"} ' \
    '    {graph ?narrGraph {?sent a :Sentence ; :rhetorical_device ?device}} ' \
    '} GROUP BY ?device'

# Number of overall quotations and their attribution
quote_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select (COUNT(?quote) as ?numbQuotes) where { ' \
    '    graph ?narrGraph { ?quote a :Quote} ' \
    '}'
attribution_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?agent ?label (COUNT(?quote) as ?cnt) where { ' \
    '    graph ?narrGraph {?quote a :Quote ; :attributed_to ?agent . ' \
    '        OPTIONAL {?agent rdfs:label ?label}} ' \
    '} GROUP BY ?agent ?label'
# Get label in case the quote is attributed to non-specific entities such as a "court's justices"

# Quotation sentiment, avg grade level and rhetorical devices
# Queries from above with ?sent and :Sentence replaced by ?quote and :Quote
quote_sentiment_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?sentiment (COUNT(?quote) as ?cnt) where { ' \
    '    graph ?narrGraph {?quote a :Quote ; :sentiment ?sentiment} ' \
    '} GROUP BY ?sentiment'
quote_rhetorical_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select ?device (COUNT(?quote) as ?cnt) where { ' \
    'VALUES ?device {"ad baculum" "ad hominem" "ad populum" "exceptionalism" "expletive" "hyperbole" ' \
    '                "invective" "loaded language" "paralipsis" "pathos"} ' \
    '    {graph ?narrGraph {?quote a :Quote ; :rhetorical_device ?device}} ' \
    '} GROUP BY ?device'

# Appeal
appeal_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select * where { ' \
    '    graph ?defGraph {?narrative a :Narrative ; ' \
    '        :ranking_conservative ?conserv ; :ranking_liberal ?lib ; :ranking_neutral ?neutral ; ' \
    '        :interpretation_conservative ?conserv_int ; :interpretation_liberal ?lib_int ; ' \
    '        :interpretation_neutral ?conserv_neutral} ' \
    '}'

# Events
event_query = \
    'prefix : <urn:ontoinsights:dna:> prefix dna: <urn:ontoinsights:dna:> prefix dc: <http://purl.org/dc/terms/> ' \
    'select distinct ?offset ?text ?eventType ?eventText where { ' \
    '    ?eventType rdfs:subClassOf* :EventAndState . ' \
    '    {graph ?narrGraph {?sent a :Sentence ; :offset ?offset ; :text ?text ; :has_semantic ?event . ' \
    '                {{?event a ?eventType ; :text ?eventText . FILTER NOT EXISTS{?event :negated true}} ' \
    '                     UNION {?event :has_topic ?topic . ?topic rdf:type ?eventType ; :text ?eventText} ' \
    '                     UNION {?event :has_quantification ?quant . ?quant rdf:type ?eventType; ' \
    '                            :text ?eventType}}}} ' \
    '} ORDER BY ?offset ?eventType '

## Query processing

In [4]:
# Overall counts of sentences/quotes per article
for topic in article_collection:
    counts_dict = dict()  # Key = Narrative source; Value = array of number sentences, number quotes 
    defGraph = f'dna:{topic}_default'
    query_results = conn.select(length_query.replace('?defGraph', defGraph), 
                                content_type='application/sparql-results+json')
    if 'results' in query_results and 'bindings' in query_results['results']:
        bindings = query_results['results']['bindings']
    else:
        bindings = []
    for binding in bindings:
        narr = binding['narrative']['value'].split(':dna:Narrative_')[1]
        source = binding['source']['value']
        numbSents = int(binding['numbSents']['value'])
        narrGraph = f'dna:{topic}_{narr}'
        query_results = conn.select(quote_query.replace('?narrGraph', narrGraph), 
                                    content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            quote_binding = query_results['results']['bindings'][0]
            numbQuotes = int(quote_binding['numbQuotes']['value'])
            counts_dict[source] = [numbSents, round(numbQuotes * 100/numbSents)]
    print(topic)
    print('source -> # sentences, % quotes')
    print(counts_dict)
    print()

fl_abortion
source -> # sentences, % quotes
{'CNN': [47, 26], 'Huffington Post': [37, 35], 'New York Times': [36, 25], 'Wall Street Journal': [36, 14], 'Al Jazeera': [35, 20], 'Breitbart': [32, 28], 'Washington Times': [17, 0], 'Fox News': [13, 54]}

landslide
source -> # sentences, % quotes
{'Huffington Post': [32, 19], 'Washington Times': [28, 21], 'Al Jazeera': [19, 21], 'Breitbart': [16, 19], 'Fox News': [15, 33], 'New York Times': [15, 0], 'CNN': [13, 0]}

trump_trial
source -> # sentences, % quotes
{'New York Times': [122, 16], 'Fox News': [64, 56], 'Wall Street Journal': [60, 18], 'Al Jazeera': [51, 47], 'Washington Times': [50, 16], 'Huffington Post': [44, 14], 'Breitbart': [28, 18], 'CNN': [23, 39]}



In [5]:
# Overall % of sentences with pos, neg, neutral sentiments per article
for topic in article_collection:
    counts_dict = dict()  # Key = Narrative source; Value = array of percentage pos, neg, neutral
    defGraph = f'dna:{topic}_default'
    query_results = conn.select(length_query.replace('?defGraph', defGraph), 
                                content_type='application/sparql-results+json')
    if 'results' in query_results and 'bindings' in query_results['results']:
        bindings = query_results['results']['bindings']
    else:
        bindings = []
    for binding in bindings:
        narr = binding['narrative']['value'].split(':dna:Narrative_')[1]
        source = binding['source']['value']
        numbSents = int(binding['numbSents']['value'])
        narrGraph = f'dna:{topic}_{narr}'
        query_results = conn.select(sentiment_query.replace('?narrGraph', narrGraph), 
                                    content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            sent_bindings = query_results['results']['bindings']
        else:
            sent_bindings = []
        numbPos = numbNeg = numbNeutral = 0
        for sent_binding in sent_bindings:
            sentiment = sent_binding['sentiment']['value']
            cnt = int(sent_binding['cnt']['value'])
            if sentiment == 'positive':
                numbPos = cnt
            elif sentiment == 'negative':
                numbNeg = cnt
            else:
                numbNeutral = cnt
        counts_dict[source] = [round(numbPos * 100/numbSents), round(numbNeg * 100/numbSents), round(numbNeutral * 100/numbSents)]
    print(topic)
    print('source -> %pos, %neg, %neutral of all sentences')
    print(counts_dict)
    print()

fl_abortion
source -> %pos, %neg, %neutral of all sentences
{'CNN': [89, 4, 6], 'Huffington Post': [0, 92, 8], 'New York Times': [3, 6, 92], 'Wall Street Journal': [92, 3, 6], 'Al Jazeera': [0, 94, 6], 'Breitbart': [81, 0, 19], 'Washington Times': [0, 12, 88], 'Fox News': [62, 15, 23]}

landslide
source -> %pos, %neg, %neutral of all sentences
{'Huffington Post': [0, 94, 6], 'Washington Times': [0, 93, 7], 'Al Jazeera': [0, 84, 16], 'Breitbart': [0, 94, 6], 'Fox News': [0, 100, 0], 'New York Times': [7, 27, 67], 'CNN': [0, 92, 8]}

trump_trial
source -> %pos, %neg, %neutral of all sentences
{'New York Times': [0, 5, 95], 'Fox News': [0, 97, 3], 'Wall Street Journal': [0, 75, 25], 'Al Jazeera': [0, 10, 90], 'Washington Times': [0, 6, 94], 'Huffington Post': [2, 2, 95], 'Breitbart': [79, 18, 4], 'CNN': [0, 35, 65]}



In [6]:
# Overall % of quotes with pos, neg, neutral sentiments per article
for topic in article_collection:
    quote_counts_dict = dict()  # Key = Narrative source; Value = array of percentage pos, neg, neutral 
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        narrGraph = f'dna:{topic}_{narr_id}'
        query_results = conn.select(quote_query.replace('?narrGraph', narrGraph), 
                                    content_type='application/sparql-results+json')
        numbQuotes = 0
        if 'results' in query_results and 'bindings' in query_results['results']:
            quote_binding = query_results['results']['bindings'][0]
            numbQuotes = int(quote_binding['numbQuotes']['value'])
        if numbQuotes == 0:
            continue
        query_results = conn.select(quote_sentiment_query.replace('?narrGraph', narrGraph), 
                                    content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            sent_bindings = query_results['results']['bindings']
        else:
            sent_bindings = []
        numbPos = numbNeg = numbNeutral = 0
        for sent_binding in sent_bindings:
            sentiment = sent_binding['sentiment']['value']
            cnt = int(sent_binding['cnt']['value'])
            if sentiment == 'positive':
                numbPos = cnt
            elif sentiment == 'negative':
                numbNeg = cnt
            else:
                numbNeutral = cnt
        quote_counts_dict[narr_dict[narr_id]] = [round(numbPos * 100/numbQuotes), round(numbNeg * 100/numbQuotes), round(numbNeutral * 100/numbQuotes)]
    print(topic)
    print('source -> %pos, %neg, %neutral of all full sentence quotes')
    print(quote_counts_dict)
    print()

fl_abortion
source -> %pos, %neg, %neutral of all full sentence quotes
{'Al Jazeera': [43, 43, 14], 'Wall Street Journal': [60, 0, 40], 'Breitbart': [22, 33, 44], 'Fox News': [14, 86, 0], 'CNN': [58, 17, 25], 'Huffington Post': [23, 38, 38], 'New York Times': [33, 67, 0]}

landslide
source -> %pos, %neg, %neutral of all full sentence quotes
{'Al Jazeera': [0, 100, 0], 'Breitbart': [0, 100, 0], 'Fox News': [0, 40, 60], 'Washington Times': [0, 67, 33], 'Huffington Post': [0, 67, 33]}

trump_trial
source -> %pos, %neg, %neutral of all full sentence quotes
{'Al Jazeera': [21, 54, 25], 'Wall Street Journal': [36, 45, 18], 'Breitbart': [0, 40, 60], 'Fox News': [22, 47, 31], 'Washington Times': [12, 62, 25], 'CNN': [0, 56, 44], 'Huffington Post': [17, 33, 50], 'New York Times': [21, 42, 37]}



In [7]:
# Mentions of persons or orgs across 7-8 articles
for baseType in ('dna:OrganizationalEntity', 'dna:Person'):
    for topic in article_collection:
        agent_mentions_dict = dict()  # Key = person or org instance; Value = array of counts by source
        defGraph = f'dna:{topic}_default'
        response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
        result = response.json()
        narr_dict = dict()
        for narrative in result['narratives']:
            narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
        for narr_id in narr_dict.keys():
            narrGraph = f'dna:{topic}_{narr_id}'
            query_results = conn.select(
                names_query.replace('?baseType', baseType).replace('?defGraph', defGraph).replace('?narrGraph', narrGraph), 
                content_type='application/sparql-results+json')
            if 'results' in query_results and 'bindings' in query_results['results']:
                bindings = query_results['results']['bindings']
            else:
                bindings = []
            for binding in bindings:
                if 'agent' not in binding:     # No OrgEntities or no Persons mentioned in that article
                    continue     
                agent = binding['agent']['value'].split(':dna:')[1]
                count = int(binding['cnt']['value'])
                if agent in agent_mentions_dict.keys():
                    count_array = agent_mentions_dict[agent]
                else:
                    count_array = [0, 0, 0, 0, 0, 0, 0, 0]    # Init new count array
                count_array[sources.index(narr_dict[narr_id])] = count    # Should only be 1 count per agent per source
                agent_mentions_dict[agent] = count_array
        print(topic, baseType)
        print("entity -> array of counts for ")
        print("'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'")
        print(agent_mentions_dict)
        print()

fl_abortion dna:OrganizationalEntity
entity -> array of counts for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'Pew_Research_Center': [1, 0, 0, 0, 0, 0, 0, 0], 'Electoral_College': [2, 0, 0, 0, 0, 0, 0, 0], 'Center_for_Reproductive_Rights': [1, 0, 0, 0, 0, 0, 0, 0], 'University_of_North_Florida_Public_Opinion_Research_Lab': [1, 0, 0, 0, 0, 0, 0, 0], 'ACLU_of_Florida': [1, 0, 0, 0, 0, 0, 0, 0], 'Florida_Supreme_Court': [3, 5, 3, 1, 1, 2, 1, 1], 'US_Supreme_Court': [2, 1, 2, 1, 2, 3, 2, 1], 'American_Civil_Liberties_Union': [1, 1, 0, 0, 2, 1, 0, 1], 'GOP': [0, 0, 0, 0, 0, 0, 2, 0], 'Alabama_Supreme_Court': [0, 0, 0, 0, 0, 0, 1, 0], 'White_House': [0, 0, 1, 0, 0, 0, 1, 0], 'Senate': [0, 0, 0, 0, 0, 0, 5, 0], 'House': [0, 0, 0, 0, 0, 0, 2, 0], 'Congress': [0, 0, 0, 0, 0, 0, 1, 0], 'Wall_Street_Journal': [0, 0, 0, 0, 0, 0, 1, 0], 'EMILYs_List': [0, 1, 0, 0, 0, 0, 0, 0], 'NBC_News': [0, 1, 0, 0, 0, 0, 0, 0], 'Florida_Voice_for_the_Unborn': [

In [8]:
# Quote attribution across 7-8 articles 
for topic in article_collection:
    attrib_dict = dict()  # Key = person instance; Value = count of quotes attributed to the person(s)
    defGraph = f'dna:{topic}_default'
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        narrGraph = f'dna:{topic}_{narr_id}'
        query_results = conn.select(
            attribution_query.replace('?narrGraph', narrGraph), content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            bindings = query_results['results']['bindings']
        else:
            bindings = []
        for binding in bindings:
            if 'agent' not in binding:     # No quotations or no quotation attributions
                continue
            agent = binding['agent']['value'].split(':dna:')[1]
            if 'label' in binding:
                label = binding['label']['value']
            else:
                label = ''
            count = int(binding['cnt']['value'])
            if agent.startswith('Noun_') and label:
                agent_name = label
            else:
                agent_name = agent
            if agent_name in attrib_dict.keys():
                count_array = attrib_dict[agent_name]
            else:
                count_array = [0, 0, 0, 0, 0, 0, 0, 0]    # Init new count array
            count_array[sources.index(narr_dict[narr_id])] = count    # Should only be 1 count per agent per source
            attrib_dict[agent_name] = count_array
    print(topic)
    print("entity -> array of attribution counts for ")
    print("'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'")
    print(attrib_dict)
    print()

fl_abortion
entity -> array of attribution counts for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'Joe_Biden': [2, 0, 0, 0, 0, 0, 1, 0], 'Amendment_4': [1, 0, 0, 0, 0, 0, 0, 0], 'ACLU_of_Florida': [2, 0, 0, 0, 0, 0, 0, 0], 'Matt_Grodsky': [0, 0, 0, 0, 0, 0, 1, 0], 'Julie_Chavez_Rodriguez': [0, 0, 3, 0, 0, 0, 1, 0], 'Brian_Hughes': [0, 0, 0, 0, 0, 0, 1, 0], 'Alice_Stewart': [0, 0, 0, 0, 0, 0, 1, 0], 'Florida_Supreme_Court': [0, 1, 0, 0, 3, 0, 0, 0], 'Katie_Daniel': [0, 1, 0, 2, 0, 0, 0, 0], 'Ashley_Moody': [0, 0, 0, 2, 0, 0, 0, 0], 'Julia_Friedland': [0, 0, 0, 2, 0, 0, 0, 0], 'Donald_Trump': [0, 0, 1, 0, 0, 0, 0, 0], 'Lauren_Brenzel': [0, 0, 3, 0, 3, 2, 0, 0], 'Alexandra_Mandado': [0, 0, 2, 0, 0, 0, 0, 0], 'Cecilia_Grande': [0, 0, 0, 0, 2, 0, 0, 0], 'Whitney_White': [0, 0, 0, 0, 1, 0, 0, 0], 'Robyn_Schickler': [0, 0, 0, 0, 3, 0, 0, 0], 'Carlos_Muniz': [0, 0, 0, 0, 1, 0, 0, 0], 'Jamie_R_Grosshans': [0, 0, 0, 0, 0, 1, 0, 0], 'Jorge_Labarg

In [9]:
# Average grade level
for topic in article_collection:
    grade_dict = dict()  # Key = source; Value = min, max and avg grade level for sentences from source
    defGraph = f'dna:{topic}_default'
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        narrGraph = f'dna:{topic}_{narr_id}'
        query_results = conn.select(
            grade_query.replace('?narrGraph', narrGraph), content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            bindings = query_results['results']['bindings']
        else:
            bindings = []
        for binding in bindings:
            numbSentences = int(binding['cnt']['value'])
            minimum = float(binding['min']['value'])
            maximum = float(binding['max']['value'])
            avg = float(binding['avg']['value'])
            grade_dict[narr_dict[narr_id]] = round(minimum), round(maximum), round(avg) 
    print(topic)
    print("source -> array of attribution counts for")
    print("'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'")
    print(grade_dict)
    print()

fl_abortion
source -> array of attribution counts for
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'Al Jazeera': (5, 12, 10), 'Wall Street Journal': (8, 12, 8), 'Breitbart': (8, 12, 11), 'Fox News': (8, 12, 9), 'Washington Times': (8, 12, 9), 'CNN': (5, 12, 9), 'Huffington Post': (8, 12, 8), 'New York Times': (8, 12, 12)}

landslide
source -> array of attribution counts for
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'Al Jazeera': (5, 10, 6), 'Breitbart': (5, 8, 7), 'Fox News': (3, 8, 4), 'Washington Times': (5, 10, 8), 'CNN': (5, 8, 6), 'Huffington Post': (5, 9, 8), 'New York Times': (5, 8, 6)}

trump_trial
source -> array of attribution counts for
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'Al Jazeera': (8, 9, 8), 'Wall Street Journal': (5, 9, 5), 'Breitbart': (6, 12, 7), 'Fox News': (5, 12, 8), 'Washington Times': (5, 8, 5), 'CNN': (5,

In [10]:
# Appeal
for topic in article_collection:
    appeal_dict = dict()  # Key = source instance; Value = conservative, liberal and neutral appeal
    defGraph = f'dna:{topic}_default'
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        defGraph = f'dna:{topic}_default'
        query_results = conn.select(
            appeal_query.replace('?defGraph', defGraph), content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            bindings = query_results['results']['bindings']
        else:
            bindings = []
        for binding in bindings:
            narr_id = binding['narrative']['value'].split(':dna:Narrative_')[1]
            conservative = int(binding['conserv']['value'])
            liberal = int(binding['lib']['value'])
            neutral = int(binding['neutral']['value'])
            appeal_dict[narr_dict[narr_id]] = conservative, liberal, neutral
    print(topic)
    print("source -> conservative, liberal, neutral appeal on scale 1-5 (5=most appeal)")
    print(appeal_dict)
    print()
    
    for key, value in appeal_dict.items():
        conserv, lib, neutral = value
        if conserv > 3:
            print(key, 'conservative')
        if lib > 3:
            print(key, 'liberal')
        if neutral > 3:
            print(key, 'neutral')
    print()

fl_abortion
source -> conservative, liberal, neutral appeal on scale 1-5 (5=most appeal)
{'Al Jazeera': (2, 4, 3), 'Wall Street Journal': (2, 4, 3), 'Breitbart': (3, 3, 4), 'Fox News': (3, 2, 4), 'Washington Times': (5, 1, 3), 'CNN': (3, 3, 4), 'Huffington Post': (2, 4, 3), 'New York Times': (3, 3, 4)}

Al Jazeera liberal
Wall Street Journal liberal
Breitbart neutral
Fox News neutral
Washington Times conservative
CNN neutral
Huffington Post liberal
New York Times neutral

landslide
source -> conservative, liberal, neutral appeal on scale 1-5 (5=most appeal)
{'Al Jazeera': (3, 4, 4), 'Breitbart': (3, 4, 4), 'Fox News': (3, 4, 4), 'Washington Times': (3, 4, 5), 'CNN': (3, 4, 5), 'Huffington Post': (3, 4, 4), 'New York Times': (3, 4, 5)}

Al Jazeera liberal
Al Jazeera neutral
Breitbart liberal
Breitbart neutral
Fox News liberal
Fox News neutral
Washington Times liberal
Washington Times neutral
CNN liberal
CNN neutral
Huffington Post liberal
Huffington Post neutral
New York Times liberal
N

In [11]:
# Rhetorical devices in article
# Note that only a subset of devices are queried
for topic in article_collection:
    devices_dict = dict()  # Key = rhetorical device; Value = array of counts across sentences by source
    defGraph = f'dna:{topic}_default'
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        narrGraph = f'dna:{topic}_{narr_id}'
        query_results = conn.select(rhetorical_query.replace('?narrGraph', narrGraph), content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            bindings = query_results['results']['bindings']
        else:
            bindings = []
        for binding in bindings:
            if 'device' not in binding:     # No devices used in that article
                continue     
            device = binding['device']['value']
            count = int(binding['cnt']['value'])
            if device in devices_dict.keys():
                count_array = devices_dict[device]
            else:
                count_array = [0, 0, 0, 0, 0, 0, 0, 0]    # Init new count array
            count_array[sources.index(narr_dict[narr_id])] = count    # Should only be 1 count per agent per source
            devices_dict[device] = count_array
    print(topic)
    print("rhetorical device -> array of counts for ")
    print("'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'")
    print(devices_dict)
    print()

fl_abortion
rhetorical device -> array of counts for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'ad populum': [1, 1, 1, 0, 1, 2, 4, 1], 'exceptionalism': [3, 2, 3, 1, 2, 1, 2, 1], 'hyperbole': [2, 6, 6, 1, 4, 6, 2, 1], 'invective': [1, 0, 0, 0, 0, 0, 0, 0], 'pathos': [4, 10, 9, 1, 8, 6, 5, 2], 'expletive': [0, 0, 1, 0, 0, 0, 1, 0], 'paralipsis': [0, 2, 1, 0, 1, 1, 2, 0], 'ad hominem': [0, 1, 1, 0, 0, 0, 0, 0], 'loaded language': [0, 2, 0, 0, 1, 0, 0, 0]}

landslide
rhetorical device -> array of counts for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'exceptionalism': [1, 0, 1, 0, 2, 0, 0, 0], 'hyperbole': [3, 3, 3, 1, 2, 1, 0, 4], 'pathos': [2, 4, 1, 4, 5, 2, 0, 7], 'paralipsis': [0, 0, 0, 0, 0, 1, 0, 0]}

trump_trial
rhetorical device -> array of counts for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'ad hominem': [1, 0, 0, 0, 0, 8, 1

In [13]:
# Rhetorical devices in articles' quotes
# Note that only a subset of devices are queried
for topic in article_collection:
    quote_devices_dict = dict()  # Key = rhetorical device; Value = array of counts across sentences by source
    defGraph = f'dna:{topic}_default'
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        narrGraph = f'dna:{topic}_{narr_id}'
        query_results = conn.select(quote_rhetorical_query.replace('?narrGraph', narrGraph), content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            bindings = query_results['results']['bindings']
        else:
            bindings = []
        for binding in bindings:
            if 'device' not in binding:     # No devices used in that article
                continue     
            device = binding['device']['value']
            count = int(binding['cnt']['value'])
            if device in quote_devices_dict.keys():
                count_array = quote_devices_dict[device]
            else:
                count_array = [0, 0, 0, 0, 0, 0, 0, 0]    # Init new count array
            count_array[sources.index(narr_dict[narr_id])] = count    # Should only be 1 count per agent per source
            quote_devices_dict[device] = count_array
    print(topic)
    print("rhetorical device -> array of counts in full quotes for ")
    print("'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'")
    print(quote_devices_dict)
    print()

fl_abortion
rhetorical device -> array of counts in full quotes for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'ad populum': [2, 1, 3, 0, 0, 0, 1, 0], 'exceptionalism': [1, 0, 1, 0, 1, 1, 0, 0], 'hyperbole': [1, 1, 4, 6, 3, 2, 0, 0], 'pathos': [5, 1, 8, 4, 7, 5, 2, 0], 'expletive': [0, 1, 1, 0, 0, 0, 1, 0], 'loaded language': [0, 0, 1, 1, 0, 0, 0, 0], 'paralipsis': [0, 0, 1, 1, 0, 1, 0, 0], 'ad hominem': [0, 0, 0, 0, 0, 1, 0, 0]}

landslide
rhetorical device -> array of counts in full quotes for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'hyperbole': [2, 1, 0, 1, 2, 0, 0, 2], 'pathos': [1, 1, 0, 0, 4, 0, 0, 3], 'paralipsis': [0, 0, 0, 1, 0, 0, 0, 0]}

trump_trial
rhetorical device -> array of counts in full quotes for 
'Al Jazeera', 'Breitbart', 'CNN', 'Fox', 'Huffington Post', 'NYT', 'WSJ', 'Washington Times'
{'ad populum': [2, 0, 1, 2, 1, 0, 0, 0], 'exceptionalism': [2, 1, 1, 5, 0,

In [14]:
# Events noted in the first 7 sentences of each article
for topic in article_collection:
    events_dict = dict()  # Key = source; Value = array of tuples of the sentence offset, event type and its triggering text
    defGraph = f'dna:{topic}_default'
    response = requests.get(f'http://127.0.0.1:5000/dna/v1/repositories/narratives?repository={topic}')
    result = response.json()
    narr_dict = dict()
    for narrative in result['narratives']:
        narr_dict[narrative['narrativeId']] = narrative['narrativeMetadata']['source']
    for narr_id in narr_dict.keys():
        narrGraph = f'dna:{topic}_{narr_id}'
        query_results = conn.select(event_query.replace('?narrGraph', narrGraph), content_type='application/sparql-results+json')
        if 'results' in query_results and 'bindings' in query_results['results']:
            bindings = query_results['results']['bindings']
        else:
            bindings = []
        for binding in bindings:   
            offset = int(binding['offset']['value'])
            event_type = binding['eventType']['value'].split(':dna:')[1]
            event_text = binding['eventText']['value']
            if narr_dict[narr_id] in events_dict:
                curr_list = events_dict[narr_dict[narr_id]]
            else:
                curr_list = []
            curr_list.append((offset, event_type, event_text))
            events_dict[narr_dict[narr_id]] = curr_list 
    print(topic)
    print("source -> array of tuples of sentence offset, event, event trigger text")
    print()
    for key, value in events_dict.items():
        print(key)
        print()
        print(value)
        print()
    print()

fl_abortion
source -> array of tuples of sentence offset, event, event trigger text

Al Jazeera

[(1, 'CommunicationAndSpeechAct', 'criticizes'), (1, 'DisagreementAndDispute', 'criticizes'), (1, 'LawAndPolicy', 'Florida Supreme Court decision'), (2, 'CommunicationAndSpeechAct', 'criticizes'), (2, 'DisagreementAndDispute', 'criticizes'), (3, 'IssuingAndPublishing', 'wrote'), (4, 'LawAndPolicy', '15-week abortion ban'), (4, 'LegalEvent', 'upholds'), (5, 'LawAndPolicy', '15-week abortion ban'), (5, 'RemovalAndRestriction', 'likely enables'), (6, 'CommunicationAndSpeechAct', 'signed'), (6, 'CommunicationAndSpeechAct', 'passed'), (6, 'LawAndPolicy', '15-week abortion ban'), (6, 'LegalEvent', 'signed'), (6, 'LegalEvent', 'passed'), (7, 'LawAndPolicy', '15-week abortion ban'), (7, 'Marriage', 'faced')]

Wall Street Journal

[(1, 'AchievementAndAccomplishment', 'may benefit'), (2, 'CommunicationAndSpeechAct', 'allows'), (2, 'LawAndPolicy', 'early pregnancy abortion ban'), (2, 'LegalEvent', 'al

trump_trial
source -> array of tuples of sentence offset, event, event trigger text

Al Jazeera

[(1, 'LegalEvent', 'found guilty'), (2, 'DeceptionAndDishonesty', 'of a crime'), (2, 'LegalEvent', 'was convicted'), (3, 'LegalEvent', 'convicts'), (4, 'Cognition', 'deliberated for two days'), (4, 'CommunicationAndSpeechAct', 'summoned'), (4, 'LegalEvent', 'deliberated for two days'), (4, 'MeetingAndEncounter', 'summoned'), (5, 'LawAndPolicy', '34 felony charges'), (5, 'LegalEvent', 'faced'), (6, 'CommunicationAndSpeechAct', 'claim'), (6, 'DeceptionAndDishonesty', 'Donald Trump hid payments to influence election'), (6, 'DeceptionAndDishonesty', 'hid'), (6, 'PoliticalEvent', 'to influence election'), (7, 'MeetingAndEncounter', 'faces')]

Wall Street Journal

[(1, 'AggressiveCriminalOrHostileAct', '34 felonies'), (1, 'ArrestAndImprisonment', 'convicts'), (1, 'LegalEvent', 'convicts'), (2, 'AggressiveCriminalOrHostileAct', 'may jeopardize'), (2, 'PoliticalEvent', "Donald Trump's 2024 presiden