In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# Standard libraries
import re
import numpy as np
import pandas as pd
import networkx as nx

# Datasets and Models
from datasets import load_dataset, Dataset
from sentence_transformers import SentenceTransformer
from gensim import corpora, models
from bert_score import score

# Spacy
import spacy
from spacy.tokens import Doc, Token

# NLTK: core functionality, corpora, and evaluation metrics
import nltk
from nltk import data, pos_tag, word_tokenize, sent_tokenize
from nltk.corpus import wordnet as wn, stopwords
from nltk.chunk import RegexpParser
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score

# Other NLP tools and utilities
from lemminflect import getInflection
from rouge_score import rouge_scorer
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Custom utilities
from supporter.utils import POSConverter
from supporter.identifier import *
from supporter.embedder import *
from supporter.explainer import *


In [14]:
text = """
US President Donald Trump has defended sweeping tariffs on imports that sent shockwaves through global stock markets, saying "sometimes you have to take medicine to fix something".

Speaking to reporters aboard Air Force One late on Sunday, he said jobs and investment would return to the US to make it "wealthy like never before".

Trump's top officials stressed that the tariffs - announced last week - would be implemented as planned, playing down recession fears.

Just hours after Trump's comments, stock markets plunged in Asia early on Monday, with Japan's Nikkei 225 dropping by 6.3%, and Hong Kong's Hang Seng losing 9.8%.

On Friday, all three major stock indexes in the US fell more than 5%, while the S&P 500 dropped almost 6% in the worst week for the US stock market since 2020.

Saudi Arabia's stock exchange - which trades on Sundays - ended nearly 7% lower, its biggest daily loss since the pandemic, state-owned media said.

US banking giant JP Morgan has predicted a 60% chance of a US and global recession following Trump's tariffs announcement.

Speaking aboard the presidential plane on a flight back to Washington DC, Trump said European and Asian countries were "dying to make a deal".

He also pushed back against a reporter's inquiry about American consumers' "pain threshold" as fears of steep price increases and a market recession grow.

"I think your question is so stupid," he told the reporter. "I don't want anything to go down. But sometimes you have to take medicine to fix something."

In a series of TV interviews earlier on Sunday, Trump's top officials also played down recent stock market falls.

Treasury Secretary Scott Bessent told NBC's Meet the Press programme that there was "no reason" to expect a recession as a result. "This is an adjustment process," he added.

Bessent also argued that Trump had "created maximum leverage for himself, and more than 50 countries have approached the administration about lowering their non-tariff trade barriers, lowering their tariffs, stopping currency manipulation".

Meanwhile, Commerce Secretary Howard Lutnick told CBS News that the 10% "baseline" tariff on all imports, which came into effect a day earlier, will definitely "stay in place for days and weeks".

Lutnick went on say the steeper reciprocal tariffs were still on track.

Higher custom tariffs on roughly 60 countries, dubbed the "worst offenders", are due to come into effect on Wednesday 9 April.

When asked about these tariffs, Lutnick said they were coming. "[Trump] announced it and he wasn't kidding," he said.

Lutnick also defended tariffs imposed on two tiny Antarctic islands populated only by penguins, saying it was to close "loopholes" for countries such as China to "ship through".

Elsewhere, Indonesia and Taiwan have said over the weekend that they will not impose retaliatory tariffs after the US announced a 32% levy on imports from both countries.

Vietnam's leader, To Lam, has asked Trump to delay a 46% duty on Vietnamese exports to the US by "at least 45 days", according to a letter seen by news agency AFP and the New York Times.

However, China announced on Friday that it will impose a 34% tariff on all US imports, beginning on Thursday 10 April.

UK Prime Minister Sir Keir Starmer warned on Saturday that "the world as we knew it has gone".

Starmer said the UK government would keep pushing for an economic deal with the US that avoided some of the tariffs.

A Downing Street spokesman added Starmer and new Canadian Prime Minister Mark Carney agreed in a phone call that "an all-out trade war is in no-one's interest".

On Monday, Israeli Prime Minister Benjamin Netanyahu is expected to meet Trump for trade talks in Washington DC.

Netanyahu, speaking to reporters as he boarded a plane-bound for the US, said he would be "the first international leader that is meeting with Trump" since the new tariffs were introduced.

He said this showed their "personal connection and the connection between our countries that is so essential in this time".

Anti-Trump protests were held in cities across the US over the weekend, in the largest nationwide show of opposition since the president took office in January.

Hundreds of thousands of people turned out in Boston, Chicago, Los Angeles, New York and Washington DC, among other cities, with protesters citing grievances with Trump's agenda ranging from social to economic issues.
"""

In [15]:
identifier = PersonalizeIdentifier()
results = identifier.identify(text)
for res in results:
    if res.clazz == IdentifyResultClazz.HARD:
        print(res)
        print(res.token.lemma_)


IdentifyResult(start_inclusive=40, end_exclusive=48, text='sweeping', clazz=<IdentifyResultClazz.HARD: 'hard'>, hard_level=1.0, token=sweeping, label=None)
sweeping
IdentifyResult(start_inclusive=78, end_exclusive=88, text='shockwaves', clazz=<IdentifyResultClazz.HARD: 'hard'>, hard_level=1.0, token=shockwaves, label=None)
shockwave
IdentifyResult(start_inclusive=576, end_exclusive=584, text='dropping', clazz=<IdentifyResultClazz.HARD: 'hard'>, hard_level=1.0, token=dropping, label=None)
dropping
IdentifyResult(start_inclusive=909, end_exclusive=917, text='pandemic', clazz=<IdentifyResultClazz.HARD: 'hard'>, hard_level=1.0, token=pandemic, label=None)
pandemic
IdentifyResult(start_inclusive=947, end_exclusive=954, text='banking', clazz=<IdentifyResultClazz.HARD: 'hard'>, hard_level=1.0, token=banking, label=None)
banking
IdentifyResult(start_inclusive=1088, end_exclusive=1100, text='presidential', clazz=<IdentifyResultClazz.HARD: 'hard'>, hard_level=1.0, token=presidential, label=None)

In [28]:
data.path.append('./resources/data')
synsets = wn.synsets('sweeping')
for synset in synsets:
    print(f"{synset.name()} : {synset.definition()}")

sweeping.n.01 : the act of cleaning with a broom
brush.v.04 : sweep across or over
sweep.v.02 : move with sweeping, effortless, gliding motions
sweep.v.03 : sweep with a broom or as if with a broom
embroil.v.01 : force into some kind of situation, condition, or course of action
cross.v.05 : to cover or extend over an area or time period; ,
sweep.v.06 : clean by sweeping
sweep.v.07 : win an overwhelming victory in or on
sweep.v.08 : cover the entire range of
swing.v.05 : make a big sweeping gesture or movement
sweeping.s.01 : taking in or moving over (or as if over) a wide area; often used in combination
sweeping.s.02 : ignoring distinctions


In [2]:
import spacy
import nltk
from nltk import data

data.path.append('./resources/data')
nltk.download("punkt_tab", download_dir='./resources/data')
nltk.download('averaged_perceptron_tagger_eng', download_dir='./resources/data')

test_text = "Saudi Arabia's stock exchange - which trades on Sundays - ended nearly 7% lower, its biggest daily loss since the pandemic, state-owned media said."
# nlp = spacy.load("en_core_web_sm")
# doc = nlp(test_text)
# for token in doc:
#     print(token.text, token.pos_, token.dep_)

token = nltk.word_tokenize(test_text)
pos = nltk.pos_tag(token)
print(pos)


[nltk_data] Downloading package punkt_tab to ./resources/data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     ./resources/data...


[('Saudi', 'NNP'), ('Arabia', 'NNP'), ("'s", 'POS'), ('stock', 'NN'), ('exchange', 'NN'), ('-', ':'), ('which', 'WDT'), ('trades', 'VBZ'), ('on', 'IN'), ('Sundays', 'NNP'), ('-', ':'), ('ended', 'VBD'), ('nearly', 'RB'), ('7', 'CD'), ('%', 'NN'), ('lower', 'JJR'), (',', ','), ('its', 'PRP$'), ('biggest', 'JJS'), ('daily', 'JJ'), ('loss', 'NN'), ('since', 'IN'), ('the', 'DT'), ('pandemic', 'JJ'), (',', ','), ('state-owned', 'JJ'), ('media', 'NNS'), ('said', 'VBD'), ('.', '.')]


[nltk_data]   Unzipping taggers\averaged_perceptron_tagger_eng.zip.
