In [None]:
from datasets import load_dataset, Dataset
from nltk.corpus import wordnet as wn
from spacy.tokens import Doc, Token
from nltk import data
import pandas as pd
import numpy as np
from supporter.utils import utils
from supporter.utils.utils import POSConverter
from lemminflect import getInflection
from nltk import pos_tag, word_tokenize
from nltk.chunk import RegexpParser
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from nltk.translate.meteor_score import meteor_score
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer
from gensim import corpora, models
import nltk
from nltk.corpus import stopwords
import re
from sklearn.metrics.pairwise import cosine_similarity
from nltk import sent_tokenize
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from bert_score import score


In [45]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
dataset_dir = r"D:\my_files\gitRepo\cs5246-project\data"
dataset = load_dataset("multi_news", trust_remote_code=True, cache_dir=dataset_dir)

train_dataset: Dataset = dataset["train"]
val_dataset: Dataset = dataset["validation"]
test_dataset: Dataset = dataset["test"]

In [47]:
print(train_dataset)
print(train_dataset.description)

Dataset({
    features: ['document', 'summary'],
    num_rows: 44972
})

Multi-News, consists of news articles and human-written summaries
of these articles from the site newser.com.
Each summary is professionally written by editors and
includes links to the original articles cited.

There are two features:
  - document: text of news articles seperated by special token "|||||".
  - summary: news summary.



In [48]:
def split_documents(ds):
    documents = ds["document"].split("|||||")
    return {"articles": "\n\n".join(documents)}


_train_dataset = train_dataset.map(split_documents, batched=False)
_val_dataset = val_dataset.map(split_documents, batched=False)
_test_dataset = test_dataset.map(split_documents, batched=False)

In [49]:
# print(type(_train_dataset["document"]))
# print(len(_train_dataset["document"]))
# print(_train_dataset["document"][0])
#
# print("\n\n====================================================")
# print(type(_train_dataset["summary"]))
# print(len(_train_dataset["summary"]))
# print(_train_dataset["summary"][0])
#
# print("\n\n====================================================")
# print(type(_train_dataset["articles"]))
# print(len(_train_dataset["articles"]))
# print(_train_dataset["articles"][0])


def get_data(idx):
    return {"articles": _train_dataset["articles"][idx], "summary": _train_dataset["summary"][idx]}

In [50]:
NLTK_DIR = r"D:\my_files\gitRepo\cs5246-project\data\nltk"
nltk.download('wordnet', download_dir=NLTK_DIR)
nltk.download('stopwords', download_dir=NLTK_DIR)
nltk.download('brown', download_dir=NLTK_DIR)
nltk.download('webtext', download_dir=NLTK_DIR)
nltk.download('reuters', download_dir=NLTK_DIR)
nltk.download('punkt_tab', download_dir=NLTK_DIR)
nltk.download('punkt', download_dir=NLTK_DIR)
nltk.download('averaged_perceptron_tagger_eng', download_dir=NLTK_DIR)

data.path.append(NLTK_DIR)

[nltk_data] Downloading package wordnet to
[nltk_data]     D:\my_files\gitRepo\cs5246-project\data\nltk...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     D:\my_files\gitRepo\cs5246-project\data\nltk...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package brown to
[nltk_data]     D:\my_files\gitRepo\cs5246-project\data\nltk...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package webtext to
[nltk_data]     D:\my_files\gitRepo\cs5246-project\data\nltk...
[nltk_data]   Package webtext is already up-to-date!
[nltk_data] Downloading package reuters to
[nltk_data]     D:\my_files\gitRepo\cs5246-project\data\nltk...
[nltk_data]   Package reuters is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     D:\my_files\gitRepo\cs5246-project\data\nltk...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package punk

In [51]:
# TASK 1: Simplify the language or vocabulary

FREQ_FILE = r"D:\my_files\gitRepo\cs5246-project\data\word frequency list 60000 English.xlsx"


class VocabularySimplifier():
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")
        self.freq_dict = self.__load_freq_file(FREQ_FILE)

    def __load_freq_file(self, file_path: str) -> dict[(str, str), int]:
        freq_dict = {}
        df = pd.read_excel(file_path)
        for index, row in df.iterrows():
            freq_dict[(row[" word"].strip(), row["PoS"].strip())] = int(row["TOTAL"])
        return freq_dict

    def __parse_pos(self, doc: str) -> Doc:
        tokens = self.nlp(doc)
        return tokens

    def __is_complex(self, token: Token) -> bool:
        replace_pos = ("ADJ", "ADV", "NOUN", "VERB")
        if token.pos_ not in replace_pos:
            return False

        lemma = token.lemma_
        pos = utils.POSConverter().decode(token.pos_, POSConverter.SPACY_FORMAT).encode(POSConverter.COCA_FORMAT)
        freq_word = self.freq_dict.get((lemma, pos), 0)
        print(token.text, freq_word)
        if freq_word > 3000 or freq_word == 0:
            return False
        return True

    def __replace_with_synonym(self, token: Token) -> str:
        pos = POSConverter().decode(token.pos_, POSConverter.SPACY_FORMAT).encode(POSConverter.WORDNET_FORMAT)
        synonyms = wn.synsets(token.lemma_, pos=pos)
        if len(synonyms) == 0:
            return token.text

        pos = POSConverter().decode(token.pos_, POSConverter.SPACY_FORMAT).encode(POSConverter.COCA_FORMAT)
        syns = [lemma.name() for syn in synonyms for lemma in syn.lemmas()]
        print(token.text, syns)
        new_word = max(syns, key=lambda x: self.freq_dict.get((x, pos), 0))
        new_word = getInflection(new_word, tag=token.tag_)
        return new_word[0]

    def simplify(self, text) -> str:
        tokens: Doc = self.__parse_pos(text)
        simplyfied_tokens = []
        for token in tokens:
            print(token.text, token.pos_)
            if self.__is_complex(token):
                simplyfied_tokens.append(self.__replace_with_synonym(token))
            else:
                simplyfied_tokens.append(token.text)
        return " ".join(simplyfied_tokens)

    def test(self, text):
        # ret = self.__is_complex((text, pos))
        # print(ret)
        tokens = self.__parse_pos(text)
        simplyfied_tokens = []
        for token in tokens:
            if self.__is_complex(token):
                simplyfied_tokens.append(self.__replace_with_synonym(token))
            else:
                simplyfied_tokens.append(token.text)
        return " ".join(simplyfied_tokens)


simplifier = VocabularySimplifier()
original_text = """
Trump Escalates Trade Rift: Quadruple Tariff Impositions Spark Global Economic Turbulence
The Trump administration has unilaterally  imposed a 25% tariff on all Mexican and Canadian imports, coupled with an additional 10% levy on Chinese goods, effective March 4. This protectionist maneuver risks exacerbating inflationary pressures on U.S. consumers already grappling with skyrocketing living costs.
The announcement triggered immediate volatility across global indices, with the Dow Jones plunging 3.2% intraday. Analysts warn of potential retaliatory measures from affected nations, which could destabilize supply chains in sectors like ceramics, steel, and synthetic narcotics.
Canada responded by launching "Operation Blizzard", a cross-border initiative targeting contraband such as fentanyl—a potent opioid linked to the U.S. overdose crisis. However, Trump dismissed these efforts as "insufficient", hinting at reciprocal tariffs** to offset perceived trade disparities.
A contentious 15% reciprocal tariff regime, slated for April 2, aims to counterbalance nations imposing exorbitant duties on U.S. exports. Economists caution this could precipitate a full-blown trade war, jeopardizing industries reliant on trans-Pacific partnerships.
"""
simplified_text = simplifier.simplify(original_text)
print(f"original text: \n{original_text}\n\n\nsimplify text: \n{simplified_text}")



 SPACE
Trump PROPN
Escalates PROPN
Trade PROPN
Rift PROPN
: PUNCT
Quadruple PROPN
Tariff PROPN
Impositions PROPN
Spark PROPN
Global PROPN
Economic PROPN
Turbulence PROPN

 SPACE
The DET
Trump PROPN
administration NOUN
administration 68066
has AUX
unilaterally ADV
unilaterally 1035
unilaterally ['unilaterally', 'one-sidedly']
  SPACE
imposed VERB
imposed 15945
a DET
25 NUM
% NOUN
% 0
tariff NOUN
tariff 2762
tariff ['duty', 'tariff']
on ADP
all DET
Mexican ADJ
Mexican 16700
and CCONJ
Canadian ADJ
Canadian 13595
imports NOUN
imports 7286
, PUNCT
coupled VERB
coupled 4738
with ADP
an DET
additional ADJ
additional 32280
10 NUM
% NOUN
% 0
levy NOUN
levy 0
on ADP
Chinese ADJ
Chinese 35222
goods NOUN
goods 18073
, PUNCT
effective ADJ
effective 38704
March PROPN
4 NUM
. PUNCT
This DET
protectionist ADJ
protectionist 532
maneuver NOUN
maneuver 2605
maneuver ['maneuver', 'manoeuvre', 'simulated_military_operation', 'tactic', 'tactics', 'maneuver', 'manoeuvre', 'maneuver', 'manoeuvre', 'play', 'm

In [52]:
# TASK 2: Simplify and/or shorten sentences
class SentenceSimplifier():
    def __init__(self):
        pass

    def dependency_tree_simplify(self, text: str) -> str:
        nlp = spacy.load("en_core_web_sm")
        doc = nlp(text)

        core_tokens = []
        for token in doc:
            print(token.text, token.dep_)
            if token.dep_ in {"ROOT", "nsubj", "nsubjpass", "dobj", "dative", "attr", "cop", "oprd", "ccomp", "xcomp"}:
                core_tokens.append(token.text)
        return " ".join(core_tokens)

    def pos_tagging_simplify(self, text: str) -> str:
        nlp = spacy.load("en_core_web_sm")
        doc = nlp(text)

        allowed_pos = {"NOUN", "NUM", "VERB", "PART", "CONJ", "CCONJ"}
        simplified = [token.text for token in doc if token.pos_ in allowed_pos]
        return " ".join(simplified)

    def nltk_syntax_simplify(self, text: str) -> str:
        # 分词和词性标注
        tokens = word_tokenize(text)
        tagged = pos_tag(tokens)

        grammar = r"""
            NP: {<DT>?<JJ>*<NN.*>+}   # none phrase ("the quick brown fox")
            VP: {<VB.*><NP|PP>*}      # verb phrase ("jumps over the dog")
        """
        parser = RegexpParser(grammar)
        tree = parser.parse(tagged)

        simplified = []
        for subtree in tree.subtrees():
            if subtree.label() in ["NP", "VP"]:
                simplified.append(" ".join(word for word, pos in subtree.leaves()))
        return " ".join(simplified)

    def evaluate(self, original_text: str, simplified_text: str):
        original_tokens = word_tokenize(original_text)
        simplified_tokens = word_tokenize(simplified_text)
        bleu = sentence_bleu(original_tokens, simplified_tokens, weights=(0.5, 0.5))

        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])
        scores = scorer.score(original_text, simplified_text)
        rouge_1 = scores['rouge1'].fmeasure
        rouge_2 = scores['rouge2'].fmeasure
        rouge_l = scores['rougeL'].fmeasure

        meteor = meteor_score([original_tokens], simplified_tokens)

        print("EVALUATION:")
        print(f"bleu:    {bleu}")
        print(f"rouge_1: {rouge_1}")
        print(f"rouge_2: {rouge_2}")
        print(f"rouge_l: {rouge_l}")
        print(f"meteor:  {meteor}")


text = "A small, private jet has crashed into a house in Maryland's Montgomery County on Monday, killing at least three people on board, authorities said."
sentence_simplifier = SentenceSimplifier()
simplify_text1 = sentence_simplifier.dependency_tree_simplify(text)
simplify_text2 = sentence_simplifier.pos_tagging_simplify(text)
simplify_text3 = sentence_simplifier.nltk_syntax_simplify(text)
print("original text: \n", text, "\n\n")
print("simplify text1: \n", simplify_text1, "\n")
print("simplify text2: \n", simplify_text2, "\n")
print("simplify text3: \n", simplify_text3, "\n")
print("\n\n\n")
print("simplify text1: ")
sentence_simplifier.evaluate(text, simplify_text1)
print("simplify text2: ")
sentence_simplifier.evaluate(text, simplify_text2)
print("simplify text3: ")
sentence_simplifier.evaluate(text, simplify_text3)


A det
small amod
, punct
private amod
jet nsubj
has aux
crashed ccomp
into prep
a det
house pobj
in prep
Maryland poss
's case
Montgomery compound
County pobj
on prep
Monday pobj
, punct
killing advcl
at advmod
least advmod
three nummod
people dobj
on prep
board pobj
, punct
authorities nsubj
said ROOT
. punct
original text: 
 A small, private jet has crashed into a house in Maryland's Montgomery County on Monday, killing at least three people on board, authorities said. 


simplify text1: 
 jet crashed people authorities said 

simplify text2: 
 jet crashed house 's killing three people board authorities said 

simplify text3: 
 private jet has crashed a house Maryland Montgomery County Monday killing people board authorities said 





simplify text1: 
EVALUATION:
bleu:    0
rouge_1: 0.33333333333333337
rouge_2: 0.07142857142857142
rouge_l: 0.33333333333333337
meteor:  0.13984962406015036
simplify text2: 
EVALUATION:
bleu:    0
rouge_1: 0.5714285714285715
rouge_2: 0.18181818181818182

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [53]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
displacy.serve(doc, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [54]:
# TASK 3: Summarize articles by focusing on the core message
# Extractive & Abstractive

class ArticleSummarizer():
    def __init__(self):
        pass

    def kmeans_summarize(self, text: str):
        num_sentences = 6
        CACHE_FOLDER = r"D:\my_files\gitRepo\cs5246-project\data\sentence_transformer"

        sentences = nltk.sent_tokenize(text)

        model = SentenceTransformer('all-MiniLM-L6-v2', cache_folder=CACHE_FOLDER)
        embeddings = model.encode(sentences)

        n_clusters = min(num_sentences, len(sentences))
        kmeans = KMeans(n_clusters=n_clusters).fit(embeddings)

        selected_indices = []
        for cluster_id in range(n_clusters):
            cluster_center = kmeans.cluster_centers_[cluster_id]
            distances = np.linalg.norm(embeddings - cluster_center, axis=1)
            selected_indices.append(np.argmin(distances))

        return " ".join([sentences[idx] for idx in sorted(selected_indices)])

    def text_rank_summarize(self, text):
        num_sentences = 6
        sentences = sent_tokenize(text)
        clean_sentences = [s.lower().replace('[^a-zA-Z]', '') for s in sentences]

        vectorizer = TfidfVectorizer(stop_words='english')
        X = vectorizer.fit_transform(clean_sentences)
        sim_matrix = cosine_similarity(X)

        nx_graph = nx.from_numpy_array(sim_matrix)
        scores = nx.pagerank(nx_graph)

        ranked = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
        return " ".join([s for _,s in ranked[:num_sentences]])

    def lda_summarize(self, text):
        num_sentences = 6
        sentences = nltk.sent_tokenize(text)
        stop_words = set(stopwords.words('english'))

        texts = [[word for word in re.findall(r'\w+', sent.lower())
                 if word not in stop_words] for sent in sentences]

        dictionary = corpora.Dictionary(texts)
        corpus = [dictionary.doc2bow(text) for text in texts]

        lda = models.LdaModel(corpus, num_topics=6, id2word=dictionary)

        sentence_scores = []
        for i, sent in enumerate(sentences):
            bow = dictionary.doc2bow(re.findall(r'\w+', sent.lower()))
            topic_dist = lda.get_document_topics(bow)
            score = max([prob for _, prob in topic_dist], default=0)
            sentence_scores.append((score, i))

        top_indices = sorted(sentence_scores, reverse=True)[:num_sentences]
        return " ".join([sentences[idx] for _, idx in sorted(top_indices, key=lambda x: x[1])])

    def evaluate(self, original_text, simplified_text):
        original_tokens = word_tokenize(original_text)
        simplified_tokens = word_tokenize(simplified_text)
        bleu = sentence_bleu(original_tokens, simplified_tokens, weights=(0.5, 0.5))

        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'])
        scores = scorer.score(original_text, simplified_text)
        rouge_1 = scores['rouge1'].fmeasure
        rouge_2 = scores['rouge2'].fmeasure
        rouge_l = scores['rougeL'].fmeasure

        meteor = meteor_score([original_tokens], simplified_tokens)

        P, R, F1 = score([original_text], [simplified_text], lang="en")

        print("EVALUATION:")
        print(f"bleu:    {bleu}")
        print(f"rouge_1: {rouge_1}")
        print(f"rouge_2: {rouge_2}")
        print(f"rouge_l: {rouge_l}")
        print(f"meteor:  {meteor}")
        print(f"BERTScore F1: {F1.mean().item():.3f}")

article_summarizer = ArticleSummarizer()
text = get_data(1)["articles"]
benchmark = get_data(1)["summary"]
simplify_text1 = article_summarizer.kmeans_summarize(text)
simplify_text2 = article_summarizer.text_rank_summarize(text)
simplify_text3 = article_summarizer.lda_summarize(text)
print("original text: \n", text, "\n\n")
print("simplified text1: \n", simplify_text1, "\n\n")
print("simplified text2: \n", simplify_text2, "\n\n")
print("simplified text3: \n", simplify_text3, "\n\n")
print("benchmark: \n", benchmark, "\n\n")
print("\n\n\n")
print("simplify text1: ")
article_summarizer.evaluate(benchmark, simplify_text1)
print("simplify text2: ")
article_summarizer.evaluate(benchmark, simplify_text2)
print("simplify text3: ")
article_summarizer.evaluate(benchmark, simplify_text3)



The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


original text: 
 LOS ANGELES (AP) — In her first interview since the NBA banned her estranged husband, Shelly Sterling says she will fight to keep her share of the Los Angeles Clippers and plans one day to divorce Donald Sterling. 
 
 (Click Prev or Next to continue viewing images.) 
 
 ADVERTISEMENT (Click Prev or Next to continue viewing images.) 
 
 Los Angeles Clippers co-owner Shelly Sterling, below, watches the Clippers play the Oklahoma City Thunder along with her attorney, Pierce O'Donnell, in the first half of Game 3 of the Western Conference... (Associated Press) 
 
 Shelly Sterling spoke to Barbara Walters, and ABC News posted a short story with excerpts from the conversation Sunday. 
 
 NBA Commissioner Adam Silver has banned Donald Sterling for making racist comments and urged owners to force Sterling to sell the team. Silver added that no decisions had been made about the rest of Sterling's family. 
 
 According to ABC's story, Shelly Sterling told Walters: "I will fight 

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


EVALUATION:
bleu:    2.741740581068161e-155
rouge_1: 0.5358851674641149
rouge_2: 0.22222222222222224
rouge_l: 0.29665071770334933
meteor:  0.42428712393330403
BERTScore F1: 0.878
simplify text2: 


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


EVALUATION:
bleu:    1.937885166182978e-155
rouge_1: 0.436241610738255
rouge_2: 0.19594594594594592
rouge_l: 0.24832214765100669
meteor:  0.422850520674946
BERTScore F1: 0.876
simplify text3: 


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


EVALUATION:
bleu:    2.1993446941558915e-155
rouge_1: 0.3508771929824561
rouge_2: 0.12941176470588234
rouge_l: 0.17543859649122806
meteor:  0.33168731865506057
BERTScore F1: 0.853


In [55]:
# TASK 4: Provide support (e.g., provide definitions, synonyms, translations for words)

FREQ_FILE = r"D:\my_files\gitRepo\cs5246-project\data\word frequency list 60000 English.xlsx"

class ArticleSupporter():
    def __init__(self):
        self.nlp = spacy.load("en_core_web_sm")
        self.freq_dict = self.__load_freq_file(FREQ_FILE)

    def __load_freq_file(self, file_path: str) -> dict[(str, str), int]:
        freq_dict = {}
        df = pd.read_excel(file_path)
        for index, row in df.iterrows():
            freq_dict[(row[" word"].strip(), row["PoS"].strip())] = int(row["TOTAL"])
        return freq_dict

    def __parse_pos(self, doc: str) -> Doc:
        tokens = self.nlp(doc)
        return tokens

    def __is_complex(self, token: Token) -> bool:
        replace_pos = ("ADJ", "ADV", "NOUN", "VERB")
        if token.pos_ not in replace_pos:
            return False

        lemma = token.lemma_
        pos = utils.POSConverter().decode(token.pos_, POSConverter.SPACY_FORMAT).encode(POSConverter.COCA_FORMAT)
        freq_word = self.freq_dict.get((lemma, pos), 0)
        if freq_word > 3000 or freq_word == 0:
            return False
        return True

    def __support_single(self, token: Token):
        pos = utils.POSConverter().decode(token.pos_, POSConverter.SPACY_FORMAT).encode(POSConverter.WORDNET_FORMAT)
        synsets = wn.synsets(token.lemma_, pos=pos)
        if len(synsets) == 0:
            return
        print(token.text, ":", token.pos_)
        for idx, synset in enumerate(synsets):
            pos = utils.POSConverter().decode(synset.pos(), POSConverter.WORDNET_FORMAT).encode(POSConverter.SPACY_FORMAT)
            if pos == token.pos_:
                print(f"Def {idx}: {synset.definition()}")
                print(f"synonyms: {[syn.name() for syn in synset.lemmas()]}")
        print()


    def support(self, text: str):
        doc = self.__parse_pos(text)
        for token in doc:
            if self.__is_complex(token):
                self.__support_single(token)

article_supporter = ArticleSupporter()
text = """
Suicide hotlines can provide free and confidential support 24/7. Here's what to expect when you make the call.
The National Suicide Prevention Lifeline (1-800-273-8255) is a toll-free hotline in the US for people in distress who feel like they are at risk of harming themselves. But what actually happens when you call? D3sign / Getty Images / Via gettyimages.com Suicide is complicated and sometimes hard to predict, but health experts say it can be preventable. That's why there are services like the National Suicide Prevention Lifeline, and the hope is that people will use them if they, or someone they know, are having a crisis. But for many people, there is still some mystery about what actually happens during these calls, and some misconceptions can keep people from picking up the phone. So we put together a step-by-step guide about what to expect when you call a suicide hotline. Keep in mind, however, that everyone who calls into a suicide hotline may have a slightly different conversation and experience. And there are also hundreds of different suicide and crisis-prevention hotlines and chat services. For the purposes of this post, we will focus on the National Suicide Prevention Lifeline and another line that you can text, called the Crisis Text Line.
First, the basics: Lifeline provides free, confidential support 24 hours a day, 7 days a week, for anyone of any age — including non-English speakers. Anyone can call the Lifeline, whether they are thinking about suicide or not, and get emotional support. There is no minimum age, and you can receive support at any time, even on holidays. As long as you have a phone, you can call the number and talk to someone. Lifeline is also available for non-English speakers and people who are deaf or hard of hearing. If you are a Spanish speaker, call the Spanish-language Lifeline at 1-888-628-9454. Si hablas español, llama a 1-888-628-9454. Lifeline ofrece 24/7, gratuito servicios en español. If you speak another language, call the main line and wait to be connected to a person at a local crisis center who can connect with a translator. According to Lifeline's website, the crisis centers work with a service that can translate calls in over 150 languages. People who are deaf or hard of hearing can reach Lifeline via TTY by dialing 1-800-799-4889 or use the Lifeline Live Chat service online.
"""
article_supporter.support(text)


confidential : ADJ
Def 0: entrusted with private information and the confidence of another
synonyms: ['confidential']
Def 1: (of information) given in confidence or in secret
synonyms: ['confidential', 'secret']
Def 2: denoting confidence or intimacy
synonyms: ['confidential']
Def 3: the level of official classification for documents next above restricted and below secret; available only to persons authorized to see documents so classified
synonyms: ['confidential']

preventable : ADJ
Def 0: capable of being prevented; - A.L.Guerard
synonyms: ['preventable']

misconceptions : NOUN
Def 0: an incorrect conception
synonyms: ['misconception']

confidential : ADJ
Def 0: entrusted with private information and the confidence of another
synonyms: ['confidential']
Def 1: (of information) given in confidence or in secret
synonyms: ['confidential', 'secret']
Def 2: denoting confidence or intimacy
synonyms: ['confidential']
Def 3: the level of official classification for documents next above restr

In [56]:
# TASK 5: News4Kids: visualize news (e.g., convert quotes to images with speech bubbles)
