## TargetSentiment Analysis

In [139]:
from NewsSentiment import TargetSentimentClassifier
import warnings
warnings.filterwarnings('ignore')

tsc = TargetSentimentClassifier()
text = [
"the obama administration largely had a hands-off approach to marijuana directing federal prosecutors to allow cannabis businesses to operate as long they followed state laws. u.s. attorney general jeff sessions has been an aggressive opponent of marijuana likening it to heroin and blaming it for spikes in violence. however a task force of law enforcement officials that sessions convened made no new policy recommendations. rep. dana rohrabacher (r. calif.) in an inquirer and daily news opinion piece voiced his disappointment in sessions his \"longtime friend,\" for urging congress to drop the amendment. \"this despite president trump's belief made clear in his campaign and as president that states alone should decide", "medical marijuana","policies.\""
]
sentiment = tsc.infer_from_text(text[0], text[1], text[2])
print(sentiment[0])

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{'class_id': 0, 'class_label': 'negative', 'class_prob': 0.6389052867889404}


In [1]:
import pandas as pd
import json

path = "../Data/"
file_names = ["1Jan2013-31Dec2017.json", "1Jan2018-4Feb2021.json", "5Feb2021-9Mar2023.json"]

articles_df = pd.DataFrame()
for file_name in file_names:
    with open(path + file_name, "r") as f:
        data = json.load(f)
    df = pd.DataFrame(data)
    # concat df
    articles_df = pd.concat([articles_df, df], axis=0)

articles_df.reset_index(drop=True, inplace=True)
drug_names = pd.read_csv("./data/drug_names.csv")
import re
# concatenate name and commercial name
drugs = drug_names["Name"] + ", " + drug_names["Commercial Names"]
drugs = drugs.apply(lambda x: re.sub(r"[\([{'})\]]", "", x))

# remove No commercial uses
not_drugs = ["No commercial uses", "No commercial names", "Various brand names", "is being researched as therapy for Post Traumatic Stress Disorder PTSD under strict medical supervision.",
             'names include "DM"', "Alcohol", "Sleep Medications", "Over-the-Counter Medicines",
             "an OTC medication for diarrhea", "Imodium", "being researched as therapy for treatment-resistant depression under strict medical supervision.",
             'No commercial uses for ingested "bath salts." No relation to “Epsom salt,” sold as a bath product.', "Tobacco", 
             "Nicotine", "Vaping", "Multiple brand names", "Crack", "Speed", "Spice", "Pot", "Molly", "Crystal", "Sonata", "Coke", "Anabolic"]
drugs = drugs.apply(lambda x: x.split(", "))
# drugs = [drug for drug in drugs if drug not in not_drugs]
real_drugs = []
for row in drugs:
    for thing in row:
        if thing not in not_drugs:
            real_drugs.append(thing)

# to lower
missing_drugs = ["suboxone", "naltrexone", "buprenorphine", "oxy", "narcan", "naloxone"]
real_drugs = [drug.lower() for drug in real_drugs]
# add missing drugs
real_drugs.extend(missing_drugs)
real_drugs = list(set(real_drugs))

real_drugs.remove("acid")
real_drugs.remove("bath salts")

# drop 2023
articles_df["Date"] = pd.to_datetime(articles_df["Date"])
articles_df = articles_df[articles_df["Date"].dt.year != 2023]

import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import contractions

nlp = spacy.load('en_core_web_md', exclude=['tagger', 'parser', 'ner', 'attribute_ruler', 'lemmatizer'])

def preprocess(text):
    # lower case
    text = text.lower()
    # remove contractions
    text = contractions.fix(text)
    # remove urls
    text = re.sub(r"http\S+", "", text)
    # tokenize
    text = text.split()
    # remove stop words
    text = [token for token in text if token not in STOP_WORDS]

    return text

articles_df["clean_text"] = articles_df["Text"].apply(lambda x: preprocess(x))
sample = "this sentence contains the word narcotic and the word methamphetamine and oxy"
[x for x in sample.split() if x in real_drugs]

drug_articles = pd.DataFrame()

for i, row in articles_df.iterrows():
    if any([x in row["clean_text"] for x in real_drugs]):
        # code is working correctly for the word "weed". the condition "weeds" in "weed" will return false, so nothing to worry about
        matches = [x for x in row["clean_text"] if x in real_drugs]
        if "weed" in matches:
            if "weed out" in row["Text"]:
                matches.remove("weed")
            elif "weed killer" in row["Text"]:
                matches.remove("weed")
        if len(matches) == 0:
            continue
        row["matches"] = matches
        drug_articles = drug_articles.append(row)

drug_articles["drug_mentions"] = drug_articles["matches"].apply(lambda x: len(x))

# make drug classes: DEA
stimulants = ["cocaine", "methamphetamine", "amphetamine", "speed", "meth", "crystal meth", "adderall", "vyvanse", "ritalin", "dexedrine", "desoxyn", "coke"]
hallucinogens = ["hallucinogens","mdma", "lsd", "ecstasy", "peyote", "mescaline", "mushrooms", "dmt", "ayahuasca", "ketamine", "nitrous", "psilocybin", "pcp", "angel dust", "ketalar", "spravato", "esketamine", "ketaset", "shrooms"]
depressants = ["barbiturates", "benzodiazepines", "xanax", "valium", "benzos","pentobarbital","nembutal","alprazolam","chlorodiazepoxide","librium","diazepam","valium","lorazepam","ativan","triazolam","halicon","sleep medications","eszopiclone","lunesta","zaleplon","sonata","zolpidem","ambien",
"ghb", "gamma-hydroxybutyric acid", "rohypnol", "flunitrazepam", "sonata", "sodium oxybate", "xyrem"]
narcotics = ["opioids", "heroin", "opiate", "fentanyl", "hydromorphone", "hydrocodone", "oxycodone", "codeine", "morphine", "dilaudid", "percocet", "vicodin", "oxycontin", "actiq", "duragesic", "sublimaze","dihydrocodeinone", "norco", "zohydro","meperidine", "demerol", "morphine duramorph", "MS Contin","percodan", "oxymorphone", "opana", "percs"]
treatment = ["naloxone", "naltrexone", "methadone", "buprenorphine", "suboxone", "subutex", "antabuse", "acamprosate", "naltrexone", "disulfiram", "methadose", "dolophine", "narcan"]
designer_drugs = ["bath salts", "flakka", "k2", "spice", "u-47700", "synthetic cannabinoids", "synthetic cathinones"]
cannabis = ["cannabis", "marijuana", "weed", "thc"]
drugs_of_concern = ["kratom", "salvia", "dxm", "dextromethorphan", "salvia divinorum"]

# create drug class column
drug_articles["class_drug"] = ""
drug_articles["drug_class"] = ""
for i,row in enumerate( drug_articles["matches"]):
    class_drug = []
    for drug in row:
        if drug in stimulants:
            # add key and value to dictionary
            # drug_classes["stimulants"] = drug
            a = ("stimulants", drug)
            class_drug.append(a)
        elif drug in hallucinogens:
            a = ("hallucinogens", drug)
            class_drug.append(a)
        elif drug in depressants:
            a = ("depressants", drug)
            class_drug.append(a)
        elif drug in narcotics:
            a = ("narcotics", drug)
            class_drug.append(a)
        elif drug in treatment:
            a = ("treatment", drug)
            class_drug.append(a)
        elif drug in designer_drugs:
            a = ("designer_drugs", drug)
            class_drug.append(a)
        elif drug in cannabis:
            a = ("cannabis", drug)
            class_drug.append(a)
        elif drug in drugs_of_concern:
            a = ("drugs_of_concern", drug)
            class_drug.append(a)
        else:
            a = ("miscellaneous", drug)
            class_drug.append(a)
    class_drug = list(set(class_drug))
    drug_articles["class_drug"].iloc[i] = class_drug
    drug_articles["drug_class"].iloc[i] = [x[0] for x in class_drug]
print(len(drug_articles))
drug_articles_stimulants = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['stimulants']).any(1).values]
drug_articles_cannabis = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['cannabis']).any(1).values]
drug_articles_depressants = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['depressants']).any(1).values]
drug_articles_treatment = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['treatment']).any(1).values]
drug_articles_narcotics = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['narcotics']).any(1).values]
drug_articles_hallucinogens = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['hallucinogens']).any(1).values]
cannabis_articles = drug_articles_cannabis['Text'].tolist()
cannabis_articles_dates = drug_articles_cannabis['Date'].tolist()

stimulants_articles = drug_articles_stimulants['Text'].tolist()
stimulants_articles_dates = drug_articles_stimulants['Date'].tolist()

depressants_articles = drug_articles_depressants['Text'].tolist()
depressants_articles_dates = drug_articles_depressants['Date'].tolist()

treatment_articles = drug_articles_treatment['Text'].tolist()
treatment_articles_dates = drug_articles_treatment['Date'].tolist()

narcotics_articles = drug_articles_narcotics['Text'].tolist()
narcotics_articles_dates = drug_articles_narcotics['Date'].tolist()

hallucinogens_articles = drug_articles_hallucinogens['Text'].tolist()
hallucinogens_articles_dates = drug_articles_hallucinogens['Date'].tolist()


  from .autonotebook import tqdm as notebook_tqdm
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_articles.append(row)
  drug_articles = drug_art

3661


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drug_articles["class_drug"].iloc[i] = class_drug
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drug_articles["drug_class"].iloc[i] = [x[0] for x in class_drug]
  drug_articles_stimulants = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['stimulants']).any(1).values]
  drug_articles_cannabis = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['cannabis']).any(1).values]
  drug_articles_depressants = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).isin(['depressants']).any(1).values]
  drug_articles_treatment = drug_articles[pd.DataFrame(drug_articles.drug_class.tolist()).i

In [31]:
num = 0
paragraphs = []
texts = []
date = ''
for i in range(0, len(cannabis_articles)):
    text = ''
    art = cannabis_articles[i]
    art = art.lower()
    art = art.split('\n')
    art = [txt for txt in art if txt != '']
    if date != str(cannabis_articles_dates[i]).split("-")[0] and i!=0:
        # print(date)
        paragraphs.append(texts)
        texts = []
    date = str(cannabis_articles_dates[i]).split("-")[0]

    for j in range(len(art)):
        sent = art[j]
        for drug in cannabis:
            if (drug in sent) and (sent not in text):
                # if (j != 0 and (art[j-1] not in text)):
                #     text += art[j-1] + ' '
                text += sent + ' '
                # if (j != len(art)-1 and (art[j+1] not in text)):
                #     text += art[j+1] + ' '
                break
    texts.append(text)
paragraphs.append(texts)

In [17]:
def partition_keyword(keyword, mystring):
    newlist = []
    if keyword in mystring:
        string = ''
        for i in mystring.split():
            if i == keyword:
                newlist.append(string)
                newlist.append(i)
                string = ''
            else:
                string += i + ' '
        newlist.append(string)
        newlist = [i.strip() for i in newlist if i]
    return newlist

## Cannabis

In [68]:
first = ['marijuana dispensaries', 
         'medical marijuana legislation', 
         'marijuana decriminalization', 
         'medicinal marijuana', 
         'recreational marijuana']
second = ['marijuana legalization',
 'marijuana policy project',
 'medical marijuana',
 'marijuana advocates',
 'medical marijuana new jersey']
third = ['marijuana legalization efforts',
 'marijuana reform coalition',
 'marijuana decriminalization bill',
 'marijuana policy issues']
four = ['marijuana offenses']
five = ['licensed marijuana',
 'medical marijuana patient',
 'medical cannabis laws',
 'medical marijuana laws',
 'medical marijuana clients']
six = ['medical marijuana states',
 'unauthorized medical marijuana use',
 'medical marijuana law',
 'pennsylvania medical marijuana growers',
 'pennsylvania marijuana dispensaries']
seven = ['marijuana reform',
 'cannabis investments',
 'nascent marijuana industry',
 'cannabis industry news',
 'marijuana cannabis legislation']
eight = ['philadelphia worker relief fund',
 'dramatic june refinery blast']
nine = ['recreational marijuana',
 'new marijuana laws',
 'legal marijuana program',
 'marijuana convictions']
ten = ['cannabis impairment',
 'cannabis compound cbd',
 'tetrahydrocannabinol',
 'endocannabinoids']
aspects = [first, second, third, four, five, six, seven, eight, nine, ten]

In [70]:
import nltk

absa_list = []
for i in range(0, 10):
    para = paragraphs[i]
    aspect = aspects[i]

    for word in aspect:
        scores = []
        # print(word)
        for art in para:
            if word in art:
                # print(word)
                partition = art.partition(word)
                # print(partition)
                if (len(partition) == 3):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                    except:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        if len(sentiment[0]) == 0:
                            print(sents)
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])

                        
                elif (len(partition) == 2 and word in partition[0]):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1],'')
                    except:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        if len(sentiment[0]) == 0:
                            print(sents)
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                else:
                    try:
                        sentiment = tsc.infer_from_text('', partition[0], partition[1])
                    except:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        if len(sentiment[0]) == 0:
                            print(sents)
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                scores.append(sentiment[0])
        absa_list.append(scores)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [80]:
from collections import defaultdict

def decide_class_with_averaging(probabilities_list):
    predicted_classes = []
    
    for prob_list in probabilities_list:
        if len(prob_list) == 0:
            predicted_classes.append('empty')
            continue
        class_probs = defaultdict(list)
        
        # Group probabilities by class label
        for entry in prob_list:
            class_probs[entry['class_label']].append(entry['class_prob'])
        
        # Calculate average probabilities for each class
        avg_probs = {class_label: sum(probs) / len(probs) for class_label, probs in class_probs.items()}
        
        # Choose the class with the highest average probability
        predicted_class = max(avg_probs, key=avg_probs.get)
        predicted_classes.append(predicted_class)
    
    return predicted_classes

In [78]:
absa_list

[[{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.48278534412384033},
  {'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.7132776379585266},
  {'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.4264422655105591},
  {'class_id': 1,
   'class_label': 'neutral',
   'class_prob': 0.43391478061676025}],
 [{'class_id': 2, 'class_label': 'positive', 'class_prob': 0.608654260635376}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.7906837463378906}],
 [{'class_id': 2, 'class_label': 'positive', 'class_prob': 0.5035661458969116},
  {'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.6001613736152649},
  {'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.4669572114944458},
  {'class_id': 2, 'class_label': 'positive', 'class_prob': 0.6103493571281433},
  {'class_id': 2, 'class_label': 'positive', 'class_prob': 0.6103493571281433},
  {'class_id': 2, 'class_label': 'positive', 'class_prob': 0.5249195694923401},
  {'class_id': 1, 'class_label

In [83]:
print(decide_class_with_averaging(absa_list))

['neutral', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'negative', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative', 'empty', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'neutral', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative']


---

## Hallucinogens

In [101]:
first = ['south camden', 'drug charges', 'drug arrests', 'camden county prosecutor', 'camden drug ring']
second = ['drug trade', 'crushed ecstasy', 'drugs', 'unfiltered ecstasy', 'crack cocaine']
third = ['lsd', 'drug possession', 'network episode', 'unutterable ecstasy', 'roger']
fourth = ['smuggled drugs', 'illegal psychedelic drugs', 'lsd', 'drug situation', 'villanova university students']
fifth = ['parole policies', 'probation', 'drug violations', 'allen', 'byron allen']
sixth = ['cannabis', 'gun control act', 'marijuana', 'medical marijuana program', 'federal gun control act']
seventh = ['favorite drug conversation', 'marijuana cannabis legislation', 'prevalent drug use', 'marijuana users', 'party drug']
eighth = ['eagles owner', 'lombardi trophy', 'jeffrey lurie', 'super bowl ring', 'super bowl run']
ninth = ['new psychiatric medicines', 'behavioral neuropharmacology', 'psychoactive compounds', 'new psychedelic therapies', 'psychedelic therapeutics company']
tenth = ['conviction', 'gauzy', 'ga', 'rodgers', 'amy eldridge']
aspects = [first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth]

In [155]:
num = 0
paragraphs = []
texts = []
date = ''
for i in range(0, len(hallucinogens_articles)):
    text = ''
    art = hallucinogens_articles[i]
    art = art.lower()
    art = art.split('\n')
    art = [txt for txt in art if txt != '']
    if date != str(hallucinogens_articles_dates[i]).split("-")[0] and i!=0:
        # print(date)
        paragraphs.append(texts)
        texts = []
    date = str(hallucinogens_articles_dates[i]).split("-")[0]

    for j in range(len(art)):
        sent = art[j]
        for drug in hallucinogens:
            if (drug in sent) and (sent not in text):
                if (j != 0 and (art[j-1] not in text)):
                    text += art[j-1] + ' '
                text += sent + ' '
                if (j != len(art)-1 and (art[j+1] not in text)):
                    text += art[j+1] + ' '
                break
    texts.append(text)
paragraphs.append(texts)

In [162]:
for i in range(0, 10):
    para = paragraphs[i]
    for art in para:
        if 'rodgers' in art:
            print(art)

progressives pounced on former prog-darling turned anti-vaxxer aaron rodgers this weekend after he and his packers lost their nfl playoff game. the woke-mob internet turned its full wrath toward a-rod with a heat usually reserved for florida politicians. all weekend, rodgers got dunked on like he was charles shackleford. this online outcry was unprecedented, if not predictable; an entire bloc of generally sensible, empathetic folks found ecstasy at witnessing the greatest passer in history, who will probably win a fourth mvp award, lose perhaps the last game he’ll ever play. “it’s going to be a tough decision,” he said after the game. it’s fair to ask if all that rejoicing will contribute to rodgers’ retirement decision. he is, after all, the biggest snowflake in wisconsin. 


In [96]:
import nltk
from NewsSentiment.customexceptions import TooLongTextException

absa_list = []
for i in range(0, 10):
    para = paragraphs[i]
    aspect = aspects[i]

    for word in aspect:
        scores = []
        # print(word)
        for art in para:
            if word in art:
                # print(word)
                partition = art.partition(word)
                # print(partition)
                if (len(partition) == 3):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        
                elif (len(partition) == 2 and word in partition[0]):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1],'')
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                else:
                    try:
                        sentiment = tsc.infer_from_text('', partition[0], partition[1])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                scores.append(sentiment[0])
        absa_list.append(scores)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [97]:
absa_list

[[{'class_id': 0, 'class_label': 'negative', 'class_prob': 0.985813558101654},
  {'class_id': 0, 'class_label': 'negative', 'class_prob': 0.8435596227645874},
  {'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9796075820922852}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.7995744347572327}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.8494923710823059}],
 [{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.7277555465698242},
  {'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.6384358406066895}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9929799437522888}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9116336703300476}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.6264641284942627}],
 [{'class_id': 0, 'class_label': 'negative', 'class_prob': 0.7977312207221985},
  {'class_id': 0, 'class_label': 'negative', 'class_prob': 0.9175374507904053},


In [98]:
print(decide_class_with_averaging(absa_list))

['negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'positive', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'neutral', 'negative', 'positive', 'negative', 'negative', 'negative', 'neutral', 'positive', 'positive', 'positive', 'negative', 'positive', 'neutral', 'neutral', 'positive', 'positive', 'neutral', 'positive', 'negative', 'negative', 'positive']


---

## Depressants

In [103]:
first = ['ambien', 'common insomnia drug', 'insomnia drugs', 'illustration insomnia drug ambien', 'sleep drug zolpidem']
second = ['prosecution', 'rape case', 'montgomery county republican committee', 'county district attorney', 'montgomery county prosecutors']
third = ['patients drugs', 'prescribed oxycodone', 'prescriptions', 'pill zaleplon', 'zaleplon prescription pill']
fourth = ['patient blue xanax pills', 'insomnia drugs', 'opioid painkillers', 'benzodiazepines', 'lorazepam']
fifth = ['overdose statistics', 'overdoses', 'overdose deaths', 'overdose cases', 'overdose substance']
sixth = ['morphine', 'xanax overdose', 'opioid overdoses', 'blaming ambien', 'sleep drug ambien']
seventh = ['criminal charges', 'recent drug arrests', 'federal court', 'homicide case', 'brooks']
eighth = ['medical marijuana', 'medical marijuana patients', 'medical cannabis', 'medical marijuana program law', 'medical marijuana laws']
ninth = ['xanax pills', 'jesus malverde', 'trexler truck stop', 'heroin', 'narcos']
tenth = ['addictive medications', 'prescription opioids', 'opioid users compassion', 'opioid use', 'opioid users']

aspects = [first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth]

In [171]:
num = 0
paragraphs = []
texts = []
date = ''
for i in range(0, len(depressants_articles)):
    text = ''
    art = depressants_articles[i]
    art = art.lower()
    art = art.split('\n')
    art = [txt for txt in art if txt != '']
    if date != str(depressants_articles_dates[i]).split("-")[0] and i!=0:
        # print(date)
        paragraphs.append(texts)
        texts = []
    date = str(depressants_articles_dates[i]).split("-")[0]

    for j in range(len(art)):
        sent = art[j]
        for drug in depressants:
            if (drug in sent) and (sent not in text):
                if (j != 0 and (art[j-1] not in text)):
                    text += art[j-1] + ' '
                text += sent + ' '
                if (j != len(art)-1 and (art[j+1] not in text)):
                    text += art[j+1] + ' '
                break
    texts.append(text)
paragraphs.append(texts)

In [174]:
for i in range(0, 10):
    para = paragraphs[i]
    for art in para:
        if 'brooks' in art:
            print(art)

"we'd have scores - dozens, potentially - you know, i mean, mchugh's opinion is a recipe for relief in every one of these cases," the d.a.'s federal litigation supervisor, max cooper kaufman, said during arguments in federal court in may 2018, according to transcripts. it began with the case of basil brooks, who was convicted of the 2005 slaying of derrick jones, shot dead on the street in west philadelphia. the evidence against brooks was, by all accounts, thin: primarily, the testimony of a single eyewitness who could not pick brooks out of a photo array, who was high on xanax at the time the crime occurred, and who faced pending criminal charges that were dismissed for lack of prosecution shortly after he implicated brooks. as the trial concluded, hughes charged the jury with assessing whether brooks was guilty beyond a reasonable doubt. for now, gant and brooks are back in common pleas court and could stand trial again unless they can reach agreements with the d.a negotiations are 

In [105]:
import nltk
from NewsSentiment.customexceptions import TooLongTextException

absa_list = []
for i in range(0, 10):
    para = paragraphs[i]
    aspect = aspects[i]

    for word in aspect:
        scores = []
        # print(word)
        for art in para:
            if word in art:
                # print(word)
                partition = art.partition(word)
                # print(partition)
                if (len(partition) == 3):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        
                elif (len(partition) == 2 and word in partition[0]):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1],'')
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                else:
                    try:
                        sentiment = tsc.infer_from_text('', partition[0], partition[1])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                scores.append(sentiment[0])
        absa_list.append(scores)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [106]:
absa_list

[[{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.455217570066452},
  {'class_id': 0, 'class_label': 'negative', 'class_prob': 0.6756682395935059},
  {'class_id': 0, 'class_label': 'negative', 'class_prob': 0.9640350937843323},
  {'class_id': 0, 'class_label': 'negative', 'class_prob': 0.9805535078048706},
  {'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9700745344161987}],
 [{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.4221510887145996}],
 [{'class_id': 1,
   'class_label': 'neutral',
   'class_prob': 0.44339877367019653}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.5829100608825684}],
 [{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.3937489092350006}],
 [{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.6054429411888123},
  {'class_id': 0, 'class_label': 'negative', 'class_prob': 0.9514244198799133},
  {'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9514244198799133}],
 [{'class_id': 

In [107]:
print(decide_class_with_averaging(absa_list))

['negative', 'neutral', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'positive', 'neutral', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative']


---

## Narcotics

In [108]:
first = ['opioid overdose', 'accidental heroin overdose', 'drug overdoses', 'reid', 'drug deaths']
second = ['bail', 'new jersey court', 'chester county prison', 'camden county prosecutor', 'handcuffed noonan']
third = ['other illegal drugs', 'deadly drug crisis', 'drug convictions', 'painkiller percocet', 'drug possession']
fourth = ['camden county spokesman dan keashen', 'lou gehrig', 'cherry hill', 'heroin addict chet baker', 'kacey prim']
fifth = ['state drug agent', 'medical marijuana', 'opiate investigation', 'new jersey drug policy alliance', 'federal drug laws']
sixth = ['opioid drugmakers', 'drug laws ease', 'opioid litigation', 'public drug use', 'statewide heroin']
seventh = ['aids crisis', 'overall hiv rates', 'neonatal abstinence syndrome', 'prevalent opioid', 'fewer opioids']
eighth = ['heroin overdoses', 'fatal overdose victims', 'heroin overdose', 'overdose crisis', 'fatima musa']
ninth = ['accidental overdose death', 'cocaine overdose', 'overdose victims', 'heroin overdose death', 'fentanyl overdoses']
tenth = ['fentanyl crisis', 'pardons biden issues', 'border protection', 'worst fiscal policy mistake', 'russian aggression']

aspects = [first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth]

In [169]:
num = 0
paragraphs = []
texts = []
date = ''
for i in range(0, len(narcotics_articles)):
    text = ''
    art = narcotics_articles[i]
    art = art.lower()
    art = art.split('\n')
    art = [txt for txt in art if txt != '']
    if date != str(narcotics_articles_dates[i]).split("-")[0] and i!=0:
        # print(date)
        paragraphs.append(texts)
        texts = []
    date = str(narcotics_articles_dates[i]).split("-")[0]

    for j in range(len(art)):
        sent = art[j]
        for drug in narcotics:
            if (drug in sent) and (sent not in text):
                if (j != 0 and (art[j-1] not in text)):
                    text += art[j-1] + ' '
                text += sent + ' '
                if (j != len(art)-1 and (art[j+1] not in text)):
                    text += art[j+1] + ' '
                break
    texts.append(text)
paragraphs.append(texts)

In [170]:
for i in range(0, 10):
    para = paragraphs[i]
    for art in para:
        if 'reid' in art:
            print(art)

"you notice it with any organization that has had a lot of success, that you will start to reach, thinking, 'that's the thing that's going to [get us over the top], that's the player, that's the method, that's the mechanism, that's the coach, that's the thing that is going to put us over the top,' " lurie said. reid's decisions fell short in 2011, but he brought back almost the same group for a do-over, claiming a full offseason with no lockout would benefit the team. and then in early august during training camp his son garrett died of an accidental heroin overdose. reid missed only two days of practice. 
on the sidelines was the walrus-mustached reid, a commanding presence, and highly respected in the league. he ranks fifth among active coaches in wins behind the patriots' bill belichick, the washington redskins' mike shanahan, the giants' tom coughlin, and the st. louis rams' jeff fisher. philadelphians will also remember reid for the emotional roller coaster his family endured as t

In [140]:
import nltk
from NewsSentiment.customexceptions import TooLongTextException

absa_list = []
for i in range(0, 10):
    para = paragraphs[i]
    aspect = aspects[i]

    for word in aspect:
        scores = []
        # print(word)
        for art in para:
            if word in art:
                art = art.replace(',', '')
                # print(word)
                partition = art.partition(word)
                if (len(partition) == 3):
                    try:
                        # print(partition[0], partition[1], partition[2])
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        
                elif (len(partition) == 2 and word in partition[0]):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1],'')
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                else:
                    try:
                        sentiment = tsc.infer_from_text('', partition[0], partition[1])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                scores.append(sentiment[0])
        absa_list.append(scores)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Token indices sequence length is longer than the specified maximum sequence length for this model (623 > 512). Running this sequence through the model will result in indexing errors
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strat

In [141]:
print(decide_class_with_averaging(absa_list))

['negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'positive', 'negative', 'negative', 'positive', 'neutral', 'positive', 'neutral', 'positive', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'positive', 'negative', 'negative', 'negative', 'negative']


---

## Stimulants

In [142]:
first = ['drug deaths', 'serial arsons', 'camden county murder', 'crime spree', 'philadelphia police']
second = ['grant dekernion', 'comedians', 'rapper pal lil za', 'chozen', 'ribald hbo comedy eastbound']
third = ['accidental drug overdose', 'keisha williams', 'more deaths', 'lowest city murder tally', 'laura araujo']
fourth = ['young smoking crack cocaine', 'odom', 'felony drug', 'lamar odom', 'fictional band ex']
fifth = ['probation violation', 'cocaine use', 'byron allen', 'bennett homes cocaine', 'earlier drug case']
sixth = ['customs officials', 'cocaine possession', 'deadly poison shipment', 'largest cocaine seizure', 'transnational drug trafficking organizations']
seventh = ['illicit fentanyl', 'drug violations', 'large cocaine distribution ring', 'crack cocaine buyer', 'lehtera cocaine']
eighth = ['homeless sleeping', 'neighbors', 'tent city', 'richard', 'luz cordero']
ninth = ['soar corp methadone clinic', 'recent methamphetamine use', 'antidepressant', 'few medication options', 'addiction treatment program']
tenth = ['voters', 'frank discussion', 'bbwaa members', 'home run king', 'slugger mark mcgwire']

aspects = [first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth]

In [163]:
num = 0
paragraphs = []
texts = []
date = ''
for i in range(0, len(stimulants_articles)):
    text = ''
    art = stimulants_articles[i]
    art = art.lower()
    art = art.split('\n')
    art = [txt for txt in art if txt != '']
    if date != str(stimulants_articles_dates[i]).split("-")[0] and i!=0:
        # print(date)
        paragraphs.append(texts)
        texts = []
    date = str(stimulants_articles_dates[i]).split("-")[0]

    for j in range(len(art)):
        sent = art[j]
        for drug in stimulants:
            if (drug in sent) and (sent not in text):
                if (j != 0 and (art[j-1] not in text)):
                    text += art[j-1] + ' '
                text += sent + ' '
                if (j != len(art)-1 and (art[j+1] not in text)):
                    text += art[j+1] + ' '
                break
    texts.append(text)
paragraphs.append(texts)

In [168]:
for i in range(0, 10):
    para = paragraphs[i]
    for art in para:
        if 'richard' in art:
            print(art)

"i certainly believe that mike richards must be held accountable for his actions, but when a player who at one time symbolized everything that was special about the sport can become caught in such a destructive spiral, then i believe the institution of sport must begin to examine its level of culpability," lombardi said. last week, in an interview with tsn, nhl deputy commissioner bill daly said cocaine use in the league had risen. daly said he didn't think it was a crisis involving more than 20 players. "that's disturbing," hextall said, adding it was "something the organization would monitor." 
quinones and others describe how the marketing to doctors of painkillers, followed by a crackdown on overprescribing, helped create a new class of opiate addicts: mostly white, living in towns and suburbs, not inner cities, and targeted by drug cartels that set up shop in areas where prescription-drug users live. their customers may be people like cari creasia of kent, wash., who was prescribe

In [144]:
import nltk
from NewsSentiment.customexceptions import TooLongTextException

absa_list = []
for i in range(0, 10):
    para = paragraphs[i]
    aspect = aspects[i]

    for word in aspect:
        scores = []
        # print(word)
        for art in para:
            if word in art:
                art = art.replace(',', '')
                # print(word)
                partition = art.partition(word)
                if (len(partition) == 3):
                    try:
                        # print(partition[0], partition[1], partition[2])
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        
                elif (len(partition) == 2 and word in partition[0]):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1],'')
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                else:
                    try:
                        sentiment = tsc.infer_from_text('', partition[0], partition[1])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                scores.append(sentiment[0])
        absa_list.append(scores)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [145]:
print(decide_class_with_averaging(absa_list))

['negative', 'negative', 'negative', 'negative', 'neutral', 'positive', 'negative', 'negative', 'positive', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'neutral', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'negative', 'negative', 'negative', 'neutral', 'negative', 'positive', 'neutral', 'positive', 'negative', 'negative', 'neutral', 'negative', 'neutral', 'neutral', 'positive', 'neutral', 'negative', 'negative']


---

## Treatment

In [146]:
first = ['prescription drugs', 'overdose death', 'overdose fatalities', 'blocks opioids', 'naloxone']
second = ['excess narcotic pain relievers', 'narcan firsthand', 'lifesaving drug', 'past drug abuse', 'drug response']
third = ['nasal narcan', 'heroin overdose cases', 'reversal medication narcan', 'overdose drug naloxone', 'additional narcan doses']
fourth = ['drug costs', 'narcan spray', 'narcan nasal spray', 'pennsylvania drug official', 'many methadone programs']
fifth = ['opioid dependence', 'fewer opioids', 'addicted babies', 'agitated young addicts', 'current opioid epidemic']
sixth = ['opioid addiction care', 'opioid users', 'opioid prescriptions', 'fewer opioid medications', 'opioid abuse crisis']
seventh = ['opioid buprenorphine', 'buprenorphine', 'buprenorphine prescription', 'pilot buprenorphine program', 'buprenorphine program']
eighth = ['drug overdose deaths', 'opioid crisis', 'drug death toll', 'more overdose deaths', 'worst urban opioid crisis']
ninth = ['buprenorphine providers', 'opioid use disorder medication buprenorphine', 'opioid addiction treatment drug buprenorphine', 'prescribing buprenorphine', 'buprenorphine misuse']
tenth = ['opioid litigation', 'opioid use disorder', 'other opioid painkillers', 'same endocannabinoid boosts', 'different endocannabinoids']

aspects = [first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth]

In [147]:
num = 0
paragraphs = []
texts = []
date = ''
for i in range(0, len(treatment_articles)):
    text = ''
    art = treatment_articles[i]
    art = art.lower()
    art = art.split('\n')
    art = [txt for txt in art if txt != '']
    if date != str(treatment_articles_dates[i]).split("-")[0] and i!=0:
        # print(date)
        paragraphs.append(texts)
        texts = []
    date = str(treatment_articles_dates[i]).split("-")[0]

    for j in range(len(art)):
        sent = art[j]
        for drug in treatment:
            if (drug in sent) and (sent not in text):
                if (j != 0 and (art[j-1] not in text)):
                    text += art[j-1] + ' '
                text += sent + ' '
                if (j != len(art)-1 and (art[j+1] not in text)):
                    text += art[j+1] + ' '
                break
    texts.append(text)
paragraphs.append(texts)

In [148]:
import nltk
from NewsSentiment.customexceptions import TooLongTextException

absa_list = []
for i in range(0, 10):
    para = paragraphs[i]
    aspect = aspects[i]

    for word in aspect:
        scores = []
        # print(word)
        for art in para:
            if word in art:
                art = art.replace(',', '')
                # print(word)
                partition = art.partition(word)
                if (len(partition) == 3):
                    try:
                        # print(partition[0], partition[1], partition[2])
                        sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        
                elif (len(partition) == 2 and word in partition[0]):
                    try:
                        sentiment = tsc.infer_from_text(partition[0], partition[1],'')
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                else:
                    try:
                        sentiment = tsc.infer_from_text('', partition[0], partition[1])
                    except TooLongTextException as e:
                        sentList = nltk.sent_tokenize(art)
                        sents = ''
                        for i in range(0, len(sentList)):
                            if word in sentList[i]:
                                if i != 0:
                                    sents += sentList[i-1]
                                sents += sentList[i]
                                if i != len(sentList)-1:
                                    sents += sentList[i+1]
                                break
                        partition = sents.partition(word)
                        try:
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                        except TooLongTextException as e:
                            partition = sentList[i].partition(word)
                            sentiment = tsc.infer_from_text(partition[0], partition[1], partition[2])
                scores.append(sentiment[0])
        absa_list.append(scores)

Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pai

In [175]:
absa_list

[[{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.5546302795410156}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9088168740272522}],
 [{'class_id': 2,
   'class_label': 'positive',
   'class_prob': 0.6136319041252136}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.5520140528678894}],
 [{'class_id': 2, 'class_label': 'positive', 'class_prob': 0.7100523114204407},
  {'class_id': 2,
   'class_label': 'positive',
   'class_prob': 0.6835057139396667}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.7419960498809814}],
 [{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.4406115710735321}],
 [{'class_id': 2, 'class_label': 'positive', 'class_prob': 0.866007924079895}],
 [{'class_id': 0,
   'class_label': 'negative',
   'class_prob': 0.9560024738311768}],
 [{'class_id': 1, 'class_label': 'neutral', 'class_prob': 0.5590668320655823}],
 [{'class_id': 2, 'class_label': 'positive', 'class_prob': 0.5421641469

In [149]:
print(decide_class_with_averaging(absa_list))

['negative', 'negative', 'positive', 'negative', 'positive', 'negative', 'neutral', 'positive', 'negative', 'neutral', 'positive', 'neutral', 'positive', 'positive', 'neutral', 'negative', 'neutral', 'positive', 'neutral', 'negative', 'neutral', 'negative', 'negative', 'negative', 'negative', 'positive', 'negative', 'neutral', 'negative', 'negative', 'neutral', 'positive', 'positive', 'neutral', 'neutral', 'positive', 'negative', 'negative', 'negative', 'negative', 'neutral', 'positive', 'positive', 'positive', 'negative', 'positive', 'negative', 'neutral', 'neutral', 'positive']
