In [1]:
import pandas as pd
from allennlp.predictors.predictor import Predictor
from datetime import datetime as dt
from dateutil.relativedelta import *
from tqdm import tqdm
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import os
import allennlp_models.tagging
import spacy
import re


In [2]:
# Load in the coreference resolution tool/object
predictor = Predictor.from_path('https://storage.googleapis.com/allennlp-public-models/coref-spanbert-large-2021.03.10.tar.gz')
nlp = spacy.load('en_core_web_sm')

error loading _jsonnet (this is expected on Windows), treating C:\Users\Joshua\AppData\Local\Temp\tmphj8yn63h\config.json as plain json
Some weights of BertModel were not initialized from the model checkpoint at SpanBERT/spanbert-large-cased and are newly initialized: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
## SOME DOCUMENTATION
# https://chairnerd.seatgeek.com/fuzzywuzzy-fuzzy-string-matching-in-python/
# https://github.com/seatgeek/fuzzywuzzy
def leaderAliasGenerator(leader_observation):
    row = leader_observation
    full_name_tuple = (row.econ_style_first, row.econ_style_last)
    full_name = str.title(' '.join(full_name_tuple))

    title_last_pref_tuple = (row.hos_title, row.econ_style_last)
    title_last_pref = str.title(' '.join(title_last_pref_tuple))


    
    # Adding gendered honorifics
    if row.gender == 1:
        hon_list = []
        hon = "Mr"
        hon_last = str.title(' '.join([hon, row.econ_style_last]))
        hon_list.append(hon_last)
    else:
        hon_1 = "Ms"
        hon_last_1 = str.title(' '.join([hon_1, row.econ_style_last]))
        hon_2 = "Miss"
        hon_last_2 = str.title(' '.join([hon_2, row.econ_style_last]))
        hon_3 = "Mrs"
        hon_last_3 = str.title(' '.join([hon_3, row.econ_style_last]))


    choices = [full_name, title_last_pref] + hon_list


    #Adding other/type of honorific if necessary/available
    if type(row.hos_title_other) != float:
        title_last_alt_tuple = (row.hos_title_other, row.econ_style_last)
        title_last_alt = str.title(' '.join(title_last_alt_tuple))
        
        choices.insert(2, title_last_alt)

    # Adding Economist-style alias if necessary/available
    if type(row.econ_style_alias) != float:
        alias = row.econ_style_alias
        econ_alias = str.title(alias)

        choices.insert(2, econ_alias)

    return choices


In [4]:
def fuzzy_leader_search(plain_text, choices): 
    honorifics = ["Mr", "Ms", "Miss", "Mrs"]
    topSetMatch = process.extract(plain_text, choices, scorer=fuzz.token_set_ratio)

    for alias in topSetMatch:
        # IF WE ARE CONSIDERING Mr/Ms _____ AS THE CANDIDATE HONORIFIC RAISE THE THRESHOLD FOR MATCH TO 100 (BASICALLY PERFECT MATCH)
        if alias[0].split()[0] in honorifics:
            if alias[1] == 100:
                return True
            else:
                continue
        # WE ARE CONSIDERING A FULL NAME OR A TILE NAME Like President ____
        else:
            if alias[1] >= 90:
                return True

    return False 


In [5]:
def replaceCoref(name, clusters, cluster_interest_index, document, tokenized_df):
    name_pos = name + "\'s"
    name_pos = name_pos.split()
    name = name.split()

    base_cluster = clusters[cluster_interest_index]
    print(base_cluster)
    
    #Coreferences are replaced in reverse order so that we as we replace references that occur towards the end of the article we don't have to worry about how new string/list length is affected. That is, the indices identified in base_cluster are still valid.
    rev_cluster  = sorted(base_cluster, key = lambda x: x[0], reverse = True)
    new_doc_list = document

    df = tokenized_df
    
    # print("REPLACING COREFERENCES")
    # print(name_pos)
    # print(name)
    for index_pair in rev_cluster:
        start_index = index_pair[0]
        stop_index = index_pair[1]+1

        # Replacing one element (i.e. pronouns/possessives)
        if start_index == stop_index:
            if containsPossessive(df, start_index, stop_index) == True:
                new_doc_list[start_index:stop_index] = name_pos
            else:
                new_doc_list[start_index:stop_index] = name
            new_doc_list.pop(start_index+2)
        # Replacing two or more elements
        else:
            if containsPossessive(df, start_index, stop_index) == True:
                new_doc_list[start_index:stop_index] = name_pos
            else:
                new_doc_list[start_index:stop_index] = name

    print(new_doc_list)
    return new_doc_list

In [6]:
def containsPossessive(df, start_index, stop_index):
    possessive_tags = ['POS', 'PRP$']
    cluster_tags = df['tag'].iloc[start_index:stop_index]
    # .isdisjoint returns true if the two sets are disjoin (i.e. do not have any elements interesecting (i.e. does not contain a POS or PRP$)). So negate to return True if "NOT DISJOINT" (CONTAINS POS/PRP$)
    if not set(possessive_tags).isdisjoint(set(cluster_tags)):
        return True
    else:
        return False

In [7]:
def identifyMainCluster(clusters, name, document):
    clusters = clusters
    aliases = name


    for cluster in clusters:
        for index_pair in cluster:
            start_index = index_pair[0]
            stop_index = index_pair[1]+1
            cluster_as_string = ' '.join(document[start_index:stop_index])

            top_set_match = process.extract(cluster_as_string, aliases)
            fuzz_match_results = zip(*top_set_match)
            fuzz_match_res_unzipped = list(fuzz_match_results)
            scores = fuzz_match_res_unzipped[1]

            if max(scores) >= 90:
                cluster_index = clusters.index(cluster)
                # print(cluster_index)
                # print(top_set_match)
                return cluster_index

    # There may be instances in which the leader is mentioned but not in a fashion that is sufficiently important to be picked up as an entity by the model. These are usually in instances where the leader is only an object in the sentence and never becomes a subject. In those instances no replacement takes place.


In [8]:
def termLimitChecker(date, start, end):

    if start <= date <= end: 
        return "IN TERM"
    elif date < start:
        return "PRE TERM"
    else:
        return "POST TERM"

In [9]:
date_index_reference = 'date_index_reference.csv'
chunksize = 1000
date_format = '%d-%b-%y'


custom_date_parser = lambda x: dt.strptime(x, date_format)

## DETERMINING IF WE NEED TO CRAETE A DATE-INDEX

if not os.path.isfile(date_index_reference):
    print('dates need to be instantiated, proceeding to do that')
    chunks = pd.read_csv('formatted_compiled_articles.csv', chunksize=chunksize, parse_dates =['date'], date_parser=custom_date_parser)
    df = pd.concat(chunks)
    last_row = len(df)
    df = df['date']
    df = df.drop_duplicates(keep='first')

    date_index_df = pd.DataFrame([], columns = ['date_issue', 'issue_starting_row', 'issue_ending_row'])

    for index in tqdm(df.index.tolist()):
        date_index_df.loc[index, 'date_issue'] = df.loc[index]
        date_index_df.loc[index, 'issue_starting_row'] = index
    
    date_index_df.reset_index(drop=True, inplace=True)

    for index in date_index_df.index:
        try:
           date_index_df.loc[index, 'issue_ending_row'] = date_index_df.loc[index+1, 'issue_starting_row']
        except: 
            date_index_df.loc[index, 'issue_ending_row'] = last_row
    
    date_index_df['date_issue'] = pd.to_datetime(date_index_df['date_issue'])
    date_index_df.to_csv('date_index_reference.csv', index=False)
else:
    print('dates already indexed, carry on')
    date_index_df = pd.read_csv('date_index_reference.csv', parse_dates=['date_issue'])


dates already indexed, carry on


In [10]:
### READING IN LEADER TERM DATA
leader_term_name_df = pd.read_csv('all_leaders_econ_styling.csv', encoding='latin1')
leader_term_name_df = leader_term_name_df.drop_duplicates(subset='leadid')


leader_term_name_df['term_start'] = dt.now
leader_term_name_df['term_end'] =dt.now

# leader_term_name_df
for index in tqdm(leader_term_name_df.index.tolist()):
    leader_term_name_df.loc[index, 'term_start'] = dt(leader_term_name_df.loc[index, 'start_year'], leader_term_name_df.loc[index, 'start_month'], leader_term_name_df.loc[index, 'start_date'])
    try:
        leader_term_name_df.loc[index, 'term_end'] = dt(leader_term_name_df.loc[index, 'end_year'], leader_term_name_df.loc[index, 'end_month'], leader_term_name_df[index+1, 'start_date'])
    except:
        leader_term_name_df.loc[index, 'term_end'] = dt(leader_term_name_df.loc[index, 'end_year'], leader_term_name_df.loc[index, 'end_month'], 28)


100%|██████████| 560/560 [00:00<00:00, 823.67it/s] 


In [11]:
# MATCHING TERM-WINDOWS (term length +/- 6 months) WITH ECONOMIST ISSUE DATES
# We then pair this with the dat_index_reference.csv to figure out which chunks of formatted_compiled_articles.csv we should read
six_month_margin = relativedelta(months = 6)

leaders_windows_indices_df = leader_term_name_df

# Constructing the 6 month window around the term start
leaders_windows_indices_df = leaders_windows_indices_df.assign(leader_aprox_starts = lambda df: df.term_start - six_month_margin)
leaders_windows_indices_df = leaders_windows_indices_df.assign(leader_aprox_ends = lambda df: df.term_end + six_month_margin)


# Merge window-start and window-end dates with the "nearest" (FORWARD OR BACKWARD) Economist issue date.
# Read in that issue's starting row/index
leaders_windows_indices_df = leaders_windows_indices_df.sort_values(by=['leader_aprox_starts'])
leaders_windows_indices_df = pd.merge_asof(left=leaders_windows_indices_df, right=date_index_df, left_on='leader_aprox_starts', right_on='date_issue', direction='nearest')
leaders_windows_indices_df = leaders_windows_indices_df.drop(columns=['issue_ending_row'])
leaders_windows_indices_df = leaders_windows_indices_df.rename(columns={'date_issue' : 'date_start_issue'})


# Merge window-start and window-end dates with the "nearest" (FORWARD OR BACKWARD) Economist issue date.
# Read in that issue's ending row/index
leaders_windows_indices_df = leaders_windows_indices_df.sort_values(by=['leader_aprox_ends'])
leaders_windows_indices_df = pd.merge_asof(left=leaders_windows_indices_df, right=date_index_df, left_on='leader_aprox_ends', right_on='date_issue', direction='nearest')
leaders_windows_indices_df = leaders_windows_indices_df.drop(columns=['issue_starting_row_y'])
leaders_windows_indices_df = leaders_windows_indices_df.rename(columns={'date_issue': 'date_end_issue', 'issue_starting_row_x': 'issue_starting_row'})


# Generating number of rows that need to be read-in from the formatted_compiled_articles.csv file
leaders_windows_indices_df = leaders_windows_indices_df.assign(nrows = lambda df: df.issue_ending_row - df.issue_starting_row - 1)

# Add in titles and adjectives
adjectives_df = pd.read_csv('national_titles_adjectives.csv').drop(columns=['country'])
leaders_windows_indices_df = pd.merge(leaders_windows_indices_df, adjectives_df, how='left', on='ccode')


# leaders_windows_indices_df.head(1)

In [13]:
LEADER_BATCH_SIZE  = 2

# Check leaders we have already done
resolved_leaders_df = pd.read_csv('resolved_leader_tracker_temp.csv')

non_resolved_leaders_df = pd.merge(leaders_windows_indices_df, resolved_leaders_df, how='outer', on='leadid',indicator=True).query('_merge == "left_only"').drop(columns=['_merge'])


# leader_batch_sample = non_resolved_leaders_df.sample(n=LEADER_BATCH_SIZE)
# leader_batch_sample = non_resolved_leaders_df.loc[non_resolved_leaders_df.leadid in ["A2.9-8200", "A2.9-4234"]]
leader_batch_sample = non_resolved_leaders_df.loc[non_resolved_leaders_df.leadid == "A2.9-4231"]

# Run this loop for every leader in this batch
for index in leader_batch_sample.index.tolist():
    
    term_info = leader_batch_sample.loc[index, :]
    name = term_info.econ_style_last.replace('.','')
    name = name.replace(' ', '')
    leaderid = term_info.leadid.replace('.','')
    leader_gender_hon = term_info.gender
    start_at_0 = True if term_info.issue_starting_row==0 else False

    # These are the columns that we are going to get by default (make sure to title the new df with these incase we don't read row 0 of the .csv)
    col_names = ['date', 'link', 'text']

    # Read in only the subset of the massive formatted_compiled_articles that pertain to a 6-month window around this leader's term
    if start_at_0:
        chunks = pd.read_csv('formatted_compiled_articles.csv', chunksize=1000, parse_dates=['date'], date_parser=custom_date_parser, nrows=term_info.nrows)
        df = pd.concat(chunks)
    else:
        chunks = pd.read_csv('formatted_compiled_articles.csv', names = col_names, chunksize=1000, parse_dates=['date'], date_parser=custom_date_parser, skiprows=term_info.issue_starting_row +1, nrows=term_info.nrows)
        df = pd.concat(chunks)

    # Add some extra meta-data to this article-level observation
    df['ccode'] = term_info.ccode
    df['country'] = term_info.country
    df['resolved_text'] = ''
    df['pre_in_post_term'] = ''
    df['coreference_resolved_ind'] = False
    df[name] = False

    # Coerce some of the df[name] observations to True(using a fuzzy match) to identify the subset of all articles in this term +/-6 month window that actually mention the leader
    leader_alias_choices = leaderAliasGenerator(term_info)
    print(leader_alias_choices)
    for leader_dummy_idx in tqdm(df.index.tolist(), desc='FUZZY MATCH/SEARCH'):
        string_to_search = df.loc[leader_dummy_idx, 'text']
        df.loc[leader_dummy_idx, name] = fuzzy_leader_search(string_to_search, leader_alias_choices)

    df = df[df[name] == True]
    df = df.head(8)


    print("number of articles identified for " + name + ": " + str(len(df)))


    startTime = dt.now()
    # Do coreference resolution on that subset of articles that mention the leader
    for row_index in tqdm(df.index.tolist()):
        try:
            print('trying')
            pre_resolve_text = df.loc[row_index, 'text']
            pred_obj = predictor.predict(document = pre_resolve_text)

            spacy_df = pd.DataFrame([], columns = ['text', 'lemma', 'pos', 'tag', 'dep'])

            document_clusters = pred_obj['clusters']    
            document_list = pred_obj['document']
            spacy_doc = nlp(pre_resolve_text) 

            leader_name = leader_alias_choices[0]

            for token in spacy_doc:
                tok_observation = [token.text, token.lemma_, token.pos_, token.tag_, token.dep_]
                spacy_df.loc[len(spacy_df)] = tok_observation

            cluster_of_interest = identifyMainCluster(document_clusters, leader_alias_choices, document_list)
            print(leader_name)

            replaced = replaceCoref(leader_name, document_clusters, cluster_of_interest, document_list, spacy_df)

            df.loc[row_index, 'resolved_text'] = ' '.join(replaced)
            df.loc[row_index, 'coreference_resolved_ind'] = True
            print('successfully replaced coreferences')

        except Exception as e:
            print('excepting')
            print(e)
            df.loc[row_index, 'resolved_text'] = df.loc[row_index, 'text']
            continue

        df.loc[row_index, 'pre_in_post_term'] = termLimitChecker(df.loc[row_index, 'date'], term_info.term_start, term_info.term_end)
    
    
    df.to_csv('leader_resolved/' + name + '_resolved' + leaderid + '.csv', index=False)
    print('Time to coresolve 8 observations: {}'.format(dt.now() - startTime))
    resolved_leaders_df.loc[len(resolved_leaders_df)]=[term_info.leader_x, term_info.leadid, 'RESOLVED']

    resolved_leaders_df.to_csv('resolved_leader_tracker_temp.csv', index=False)

['Jose Maria Aznar', 'Prime Minister Aznar', 'Mr Aznar']


FUZZY MATCH/SEARCH: 100%|██████████| 38260/38260 [01:25<00:00, 447.97it/s]


number of articles identified for aznar: 8


  0%|          | 0/8 [00:00<?, ?it/s]

trying


 12%|█▎        | 1/8 [04:57<34:45, 297.90s/it]

Jose Maria Aznar
[[33, 34], [52, 54], [65, 65], [69, 76], [91, 91], [139, 140], [183, 185], [332, 333], [376, 378], [381, 381], [383, 383], [431, 433], [437, 437], [482, 483], [483, 483], [491, 491], [497, 498], [506, 506], [512, 512], [534, 534], [537, 537], [567, 567], [590, 590], [603, 604], [634, 645], [642, 642], [650, 650], [747, 749], [765, 765], [805, 806], [834, 834], [840, 841], [849, 849], [858, 858], [999, 1001], [1009, 1009], [1062, 1063], [1071, 1071], [1888, 1897], [1899, 1899], [2142, 2143], [2815, 2816], [2921, 2922], [2932, 2932], [3115, 3117], [3149, 3150], [3236, 3238], [3290, 3316], [3301, 3301], [3328, 3329], [3336, 3342], [3345, 3346], [3355, 3355], [3362, 3362], [3373, 3373], [3384, 3384], [3387, 3387], [3391, 3391], [3395, 3395], [3408, 3408], [3413, 3413], [3434, 3434], [3457, 3458], [3470, 3470], [3489, 3489], [3495, 3495], [3536, 3537], [3573, 3573], [3588, 3588], [3596, 3596], [3602, 3602], [3612, 3612], [3622, 3622], [3644, 3644], [3650, 3650], [3656, 3657

 25%|██▌       | 2/8 [05:47<15:10, 151.72s/it]

Jose Maria Aznar
[[0, 2], [26, 26], [40, 40], [47, 47], [78, 78], [110, 111], [115, 115], [118, 118], [142, 143], [149, 149], [164, 164], [177, 177], [187, 187], [220, 220], [227, 227], [241, 241], [272, 272], [281, 281], [285, 291], [293, 293], [307, 307], [314, 314], [358, 360], [396, 396], [405, 405], [416, 417], [480, 481], [494, 494], [550, 551], [659, 660], [666, 666], [670, 670], [673, 673], [693, 693], [707, 707], [738, 739], [743, 743], [764, 764], [769, 769]]
['Jose', 'Maria', 'Aznar', 'has', 'been', 'trying', 'hard', 'not', 'to', 'look', 'smug', ',', 'but', 'it', 'must', 'have', 'taken', 'an', 'effort', '.', 'At', 'last', 'month', "'s", 'congress', 'of', 'Jose', 'Maria', "Aznar's", 'centre', '-', 'right', 'People', "'s", 'Party', ',', '99.5', '%', 'of', 'those', 'attending', 'endorsed', 'Jose', 'Maria', 'Aznar', 'as', 'leader', '.', 'Opinion', 'polls', 'give', 'Jose', 'Maria', "Aznar's", 'PP', 'a', 'fairly', 'comfortable', 'six', '-', 'to', '-', 'ten', 'percentage', '-', 'po

 38%|███▊      | 3/8 [06:43<09:00, 108.11s/it]

Jose Maria Aznar
[[71, 73], [355, 356], [371, 371], [389, 408], [389, 409], [407, 407], [425, 425], [438, 438], [607, 608], [706, 708]]
['FELIPE', 'GONZALEZ', ',', 'Spain', "'s", 'struggling', 'prime', 'minister', ',', 'is', 'miffed', '.', 'With', 'less', 'than', 'a', 'month', 'before', 'a', 'general', 'election', ',', 'friends', 'who', 'have', 'sailed', 'along', 'with', 'the', 'Socialist', 'helmsman', 'for', 'the', 'past', '13', 'years', 'seem', 'to', 'be', 'abandoning', 'him', 'one', 'by', 'one', '.', '\n', 'First', 'to', 'dive', 'overboard', 'was', 'Emilio', 'Botin', ',', 'head', 'of', 'the', 'Bank', 'of', 'Santander', '(', 'Spain', "'s", 'biggest', ')', ',', 'who', 'says', 'he', 'is', 'backing', 'Jose', 'Maria', 'Aznar', 'and', 'the', 'conservative', 'opposition', 'People', "'s", 'Party', '(', 'PP', ')', '.', 'Next', ',', 'Jose', 'Maria', 'Cuevas', ',', 'head', 'of', 'the', 'employers', "'", 'association', 'and', 'another', 'erstwhile', 'ally', 'of', 'Mr', 'Gonzalez', ',', 'followe

 50%|█████     | 4/8 [07:32<05:38, 84.70s/it] 

Jose Maria Aznar
[[34, 45], [61, 63], [74, 74], [121, 122], [138, 138], [209, 210], [229, 230], [260, 261], [267, 267], [288, 288], [533, 534], [556, 556], [567, 568], [584, 585], [599, 599], [611, 612], [667, 668]]
['SINCE', 'Felipe', 'Gonzalez', "'s", 'Socialists', 'lost', 'their', 'outright', 'majority', 'in', 'parliament', 'three', 'years', 'ago', ',', 'the', 'Basques', 'and', 'Catalans', 'have', 'got', 'used', 'to', 'pulling', 'a', 'lot', 'of', 'strings', 'in', 'Madrid', '.', 'Now', ',', 'with', 'Jose', 'Maria', "Aznar's", 'looking', 'set', 'to', 'win', 'the', 'general', 'election', 'on', 'March', '3rd', ',', 'that', 'may', 'change', '.', 'Jose', 'Maria', "Aznar's", 'campaign', 'slogan', ',', '"', 'the', 'New', 'Majority', '"', ',', 'underlines', 'Jose', 'Maria', "Aznar's", 'eagerness', 'to', 'govern', 'alone', '.', 'That', 'would', 'mean', 'a', 'weaker', 'voice', 'for', 'Spain', "'s", 'regions', '.', '\n ', 'In', 'any', 'event', ',', 'Xabier', 'Arzallus', ',', 'the', 'brash', 'an

 62%|██████▎   | 5/8 [08:13<03:27, 69.14s/it]

Jose Maria Aznar
[[536, 552], [568, 568], [576, 576], [609, 611], [627, 627], [636, 637], [693, 694], [698, 698], [703, 703], [708, 708], [723, 723], [760, 760]]
['A', 'strong', 'democracy', ',', 'Spain', 'now', 'needs', 'a', 'reformed', 'economy', '\n', 'FOR', 'two', 'decades', 'Spain', 'has', 'been', 'one', 'of', 'Europe', "'s", 'stirring', 'success', 'stories', '.', 'The', 'transition', 'to', 'democratic', 'government', ',', 'after', 'the', 'death', 'in', '1975', 'of', 'dour', 'Generalissimo', 'Franco', ',', 'went', 'smoothly', 'enough', '.', 'Half', 'a', 'century', 'after', 'a', 'bloody', 'civil', 'war', 'and', '15', 'years', 'after', 'an', 'attempted', 'military', 'coup', ',', 'Spain', 'has', 'a', 'solid', 'democracy', 'and', 'an', 'economy', 'that', ',', 'with', 'vigorous', 'pruning', ',', 'could', 'be', 'sturdy', 'enough', 'to', 'meet', 'the', 'challenges', 'ahead', '.', 'For', 'this', ',', 'Felipe', 'Gonzalez', ',', 'reigning', 'Socialist', 'prime', 'minister', 'for', '13', 'ye

 75%|███████▌  | 6/8 [09:12<02:11, 65.54s/it]

Jose Maria Aznar
[[0, 10], [28, 28], [54, 54], [68, 68], [87, 88], [91, 91], [94, 94], [102, 102], [114, 114], [132, 132], [224, 225], [235, 235], [242, 242], [257, 257], [274, 274], [525, 526], [553, 553], [760, 761], [803, 804]]
['Jose', 'Maria', 'Aznar', 'may', 'be', 'hit', 'by', 'a', 'double', 'blow', 'on', 'March', '3rd', '.', 'Not', 'only', 'do', 'opinion', 'polls', 'suggest', 'Jose', 'Maria', 'Aznar', 'will', 'lose', 'a', 'general', 'election', 'on', 'that', 'day', 'to', 'the', 'conservative', 'People', "'s", 'Party', '(', 'rr', ')', ',', 'led', 'by', 'Jose', 'Maria', 'Aznar', ',', 'but', 'Jose', 'Maria', "Aznar's", 'Socialists', 'may', 'even', 'get', 'shunted', 'into', 'second', 'place', 'in', 'the', 'regional', 'parliament', 'of', 'Jose', 'Maria', "Aznar's", 'backyard', ',', 'the', 'once', 'impregnable', 'Socialist', 'fief', 'of', 'Andalusia', '.', 'After', 'more', 'than', '13', 'years', 'in', 'power', ',', 'Jose', 'Maria', 'Aznar', 'has', 'lost', 'Jose', 'Maria', "Aznar's", '

 88%|████████▊ | 7/8 [10:05<01:01, 61.41s/it]

Jose Maria Aznar
excepting
list indices must be integers or slices, not NoneType
trying


100%|██████████| 8/8 [11:01<00:00, 82.66s/it]

Jose Maria Aznar
[[185, 187], [189, 189], [285, 311], [315, 315], [324, 326], [332, 332], [377, 378], [384, 384], [403, 404], [408, 408], [418, 418], [504, 505], [506, 506], [540, 541], [548, 550], [552, 552], [640, 641], [655, 655], [658, 659], [664, 664], [678, 678], [691, 691], [701, 701], [728, 729], [738, 739], [748, 748], [758, 759], [769, 769], [817, 818], [828, 828], [883, 884], [1012, 1013], [1025, 1025]]
['An', 'indecisive', 'result', 'in', 'Spain', "'s", 'general', 'election', 'last', 'week', 'carries', 'the', 'risk', 'of', 'weak', 'government', 'and', 'the', 'postponement', 'of', 'badly', 'needed', 'reform', '\n', 'AN', 'ERA', 'may', 'have', 'ended', '.', 'Spain', "'s", 'Socialists', ',', 'for', 'the', 'time', 'being', ',', 'are', 'out', '.', 'But', 'the', 'heralded', 'new', 'dawn', 'of', 'conservatism', ',', 'let', 'alone', 'radical', 'conservatism', ',', 'is', 'not', 'at', 'hand', '.', 'Worse', 'for', 'Spain', ',', 'the', 'country', 'may', 'now', 'enter', 'a', 'period', '


