# An introduction to `relatio`
**Runtime $\sim$ 20min**

Original paper: ["Text Semantics Capture Political and Economic Narratives"](https://arxiv.org/abs/2108.01720)

----------------------------

This is a short demo of the package `relatio`.  It takes as input a text corpus and outputs a list of narrative statements. The pipeline is unsupervised: the user does not need to specify narratives beforehand. Narrative statements are defined as tuples of semantic roles with a (agent, verb, patient) structure.

Here, we present the main functions to quickly obtain narrative statements from a corpus.

----------------------------

In this tutorial, we work with the Trump Tweet Archive corpus.

----------------------------

In [1]:
# Catch warnings for an easy ride
from relatio import FileLogger
logger = FileLogger(level = 'WARNING')

  from .autonotebook import tqdm as notebook_tqdm


PermissionError: [Errno 13] Permission denied: 'C:\\relatio.log'

In [None]:
from relatio import load_data
df = load_data(dataset = "trump_tweet_archive", content = "raw")

In [None]:
from relatio import Preprocessor

p = Preprocessor(
    spacy_model = "en_core_web_sm",
    remove_punctuation = True,
    remove_digits = True,
    lowercase = True,
    lemmatize = True,
    remove_chars = ["\"",'-',"^",".","?","!",";","(",")",",",":","\'","+","&","|","/","{","}",
                    "~","_","`","[","]",">","<","=","*","%","$","@","#","’"],
    stop_words = [],
    n_process = -1,
    batch_size = 100
)

In [None]:
df = p.split_into_sentences(
    df, output_path = None, progress_bar = True
)

In [None]:
from relatio import SRL

SRL = SRL(
    path = "https://storage.googleapis.com/allennlp-public-models/openie-model.2020.03.26.tar.gz",
    batch_size = 10,
    cuda_device = -1
)

srl_res = SRL(df['sentence'][0:1000], progress_bar=True)

from relatio import extract_roles

roles, sentence_index = extract_roles(
    srl_res,
    used_roles = ["ARG0","B-V","B-ARGM-NEG","B-ARGM-MOD","ARG1","ARG2"],
    only_triplets = True,
    progress_bar = True
)

for d in roles[0:20]: print(d)

In [None]:
sentence_index, roles = p.extract_svos(df['sentence'], expand_nouns = True, only_triplets = False, progress_bar = True)
for svo in roles[0:20]: print(svo)

In [None]:
postproc_roles = p.process_roles(roles,
                                 max_length = 50,
                                 progress_bar = True,
                                 output_path = './output/postproc_roles.json')

In [None]:
for d in postproc_roles[0:20]: print(d)

In [None]:
from relatio.utils import load_roles
postproc_roles = load_roles('./output/postproc_roles.json')

In [None]:
known_entities = p.mine_entities(
    df['sentence'],
    clean_entities = True,
    progress_bar = True,
    output_path = './output/entities.pkl'
)

for n in known_entities.most_common(10): print(n)

In [None]:
from relatio.utils import load_entities
known_entities = load_entities('./output/entities.pkl')

top_known_entities = [e[0] for e in list(known_entities.most_common(100)) if e[0] != '']

In [None]:
from relatio.narrative_models import NarrativeModel

m = NarrativeModel(
    clustering = 'kmeans',
    PCA = True,
    UMAP = True,
    roles_considered = ['ARG0', 'B-V', 'B-ARGM-NEG', 'ARG1'],
    roles_with_known_entities = ['ARG0','ARG1'],
    known_entities = top_known_entities,
    assignment_to_known_entities = 'embeddings',
    roles_with_unknown_entities = ['ARG0','ARG1'],
    threshold = 0.1
)

m.fit(postproc_roles, progress_bar = True)

In [None]:
m.plot_selection_metric(metric = 'inertia')

In [None]:
m.plot_clusters(path = './output/clusters.pdf')

In [None]:
m.clusters_to_txt(path = './output/clusters.txt')

In [None]:
narratives = m.predict(postproc_roles, progress_bar = True)

In [None]:
from relatio.utils import prettify

pretty_narratives = []
for n in narratives:
    pretty_narratives.append(prettify(n))

for i in range(10):
    print(roles[i])
    print(postproc_roles[i])
    print(pretty_narratives[i])

In [None]:
from relatio import build_graph, draw_graph

G = build_graph(
    narratives,
    top_n = 100,
    prune_network = True
)

draw_graph(
    G,
    notebook = True,
    show_buttons = False,
    width="1600px",
    height="1000px",
    output_filename = './output/network_of_narratives.html'
    )

In [None]:
import pickle as pk

with open('./output/narrative_model.pkl','wb') as f:
    pk.dump(m,f)

In [None]:
with open('./output/narrative_model.pkl','rb') as f:
    m = pk.load(f)