# Data Structures: Vocab, Lexemes and StringStore

## Shared vocab and string store

In [1]:
import spacy

nlp = spacy.load("en_core_web_md")
nlp.meta["lang"], nlp.meta["name"]

('en', 'core_web_md')

### nlp object (spacy.lang.en.English)

In [2]:
type(nlp)

spacy.lang.en.English

In [3]:
coffee_hash = nlp.vocab.strings["coffee"]
coffee_hash

3197928453018144401

In [4]:
coffee_string = nlp.vocab.strings[coffee_hash]
coffee_string

'coffee'

### doc object (spacy.tokens.doc.Doc)

In [5]:
doc = nlp("I love coffee")
type(doc)

spacy.tokens.doc.Doc

In [6]:
coffee_hash = doc.vocab.strings["coffee"]
print("hash value:", coffee_hash)

hash value: 3197928453018144401


In [7]:
print("hash string:", doc.vocab.strings[coffee_hash])

hash string: coffee


## Lexemes: entries in vocabulary

In [8]:
doc = nlp("I love coffee")
lexeme = nlp.vocab["coffee"]
# print the lexical attributes: word text, the hash, lexical attributes
print(lexeme.text, lexeme.orth, lexeme.is_alpha)

coffee 3197928453018144401 True


No context-dependent part-of-speech tags, dependencies or entity labels

# Data Structures: Doc, Span and Token

## The Doc object

In [9]:
# Create an nlp object
from spacy.lang.en import English

nlp = English()
# Import the Doc class
from spacy.tokens import Doc

# The words and spaces to create the doc from
words = ["Hello", "world", "!"]
spaces = [True, False, False]
# Create a doc manually
doc = Doc(nlp.vocab, words=words, spaces=spaces)

In [10]:
doc.text

'Hello world!'

## The Span object

In [11]:
# Import the Doc and Span classes
from spacy.tokens import Doc, Span

In [12]:
# The words and spaces to create the doc from
words = ["Hello", "world", "!"]
spaces = [True, False, False]
# Create a doc manually
doc = Doc(nlp.vocab, words=words, spaces=spaces)
# Create a span manually
span = Span(doc, 0, 2)
# Create a span with an entity label
span_with_label = Span(doc, 0, 2, label="GREETING")
# Add span to the doc.ents
doc.ents = [span_with_label]

In [13]:
span.text, span.label_

('Hello world', '')

In [14]:
span_with_label.text, span_with_label.label_

('Hello world', 'GREETING')

In [15]:
{e.text: e.label_ for e in doc.ents}

{'Hello world': 'GREETING'}

## Data structures best practices

In [16]:
nlp = spacy.load("en_core_web_md")
doc = nlp("Berlin is a nice city")

### Bad code

In [17]:
# Get all tokens and part-of-speech tags
pos_tags = [token.pos_ for token in doc]
for index, pos in enumerate(pos_tags):
    # Check if the current token is a proper noun
    if pos == "PROPN":
        # Check if the next token is a verb
        if pos_tags[index + 1] in ("VERB", "AUX"):
            print("Found a verb after a proper noun!")

Found a verb after a proper noun!


### Good code

In [18]:
for token in doc:
    # Check if the current token is a proper noun
    if token.pos_ == "PROPN":
        # Check if the next token is a verb
        if doc[token.i + 1].pos_ in ("VERB", "AUX"):
            print("Found a verb after a proper noun!")

Found a verb after a proper noun!


## Word vectors and semantic similarity

### Compare two documents

In [19]:
doc1 = nlp("I like fast food")
doc2 = nlp("I like pizza")
doc1.similarity(doc2)

0.869833325851152

### Compare two tokens

In [20]:
doc = nlp("I like pizza and pasta")
token1 = doc[2]
token2 = doc[4]
token1.similarity(token2)

0.6850197911262512

### Compare a document with a token

In [21]:
doc = nlp("I like pizza")
token = nlp("soap")[0]
doc.similarity(token)

0.18213694934365615

### Compare a span with a document

In [22]:
span = nlp("I like pizza and pasta")[2:5]
doc = nlp("McDonalds sells burgers")
span.similarity(doc)

0.4719003666806404

### Word vectors in spaCy

In [23]:
# Load a larger model with vectors
nlp = spacy.load("en_core_web_md")
doc = nlp("I have a banana")
# Access the vector via the token.vector attribute
print(doc[3].vector)

[ 0.20778  -2.4151    0.36605   2.0139   -0.23752  -3.1952   -0.2952
  1.2272   -3.4129   -0.54969   0.32634  -1.0813    0.55626   1.5195
  0.97797  -3.1816   -0.37207  -0.86093   2.1509   -4.0845    0.035405
  3.5702   -0.79413  -1.7025   -1.6371   -3.198    -1.9387    0.91166
  0.85409   1.8039   -1.103    -2.5274    1.6365   -0.82082   1.0278
 -1.705     1.5511   -0.95633  -1.4702   -1.865    -0.19324  -0.49123
  2.2361    2.2119    3.6654    1.7943   -0.20601   1.5483   -1.3964
 -0.50819   2.1288   -2.332     1.3539   -2.1917    1.8923    0.28472
  0.54285   1.2309    0.26027   1.9542    1.1739   -0.40348   3.2028
  0.75381  -2.7179   -1.3587   -1.1965   -2.0923    2.2855   -0.3058
 -0.63174   0.70083   0.16899   1.2325    0.97006  -0.23356  -2.094
 -1.737     3.6075   -1.511    -0.9135    0.53878   0.49268   0.44751
  0.6315    1.4963    4.1725    2.1961   -1.2409    0.4214    2.9678
  1.841     3.0133   -4.4652    0.96521  -0.29787   4.3386   -1.2527
 -1.7734   -3.5637   -0.20035

### Similarity depends on the application context

In [24]:
doc1 = nlp("I like cats")
doc2 = nlp("I hate cats")
doc1.similarity(doc2)

0.9530093158841214

### Inspecting word vectors

In [25]:
len(nlp.vocab.vectors)

20000

## Combining models and rules

### Recap: Rule-based Matching

In [26]:
# Initialize with the shared vocab
from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)

# Patterns are lists of dictionaries describing the tokens
pattern = [{"LEMMA": "love", "POS": "VERB"}, {"LOWER": "cats"}]
matcher.add("LOVE_CATS", [pattern])

# Operators can specify how often a token should be matched
pattern = [{"TEXT": "very", "OP": "+"}, {"TEXT": "happy"}]
matcher.add("VERY_HAPPY", [pattern])

# Calling matcher on doc returns list of (match_id, start, end) tuples
doc = nlp("I love cats and I'm very very happy")
matches = matcher(doc)

In [27]:
# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print("Match found:", doc[start:end].text)

Match found: love cats
Match found: very happy
Match found: very very happy


### Adding statistical predictions

In [28]:
matcher = Matcher(nlp.vocab)
matcher.add("DOG", [[{"LOWER": "golden"}, {"LOWER": "retriever"}]])
doc = nlp("I have a Golden Retriever")
print(doc.text)
for match_id, start, end in matcher(doc):
    span = doc[start:end]
    print("Matched id:", match_id)
    print("Matched span:", span.text)
    # Get the span's root token
    print(
        "Root token:", span.root.text
    )  # if the span consists of more than on token, this will be the token that decides the category of the phrase
    # Get root head token
    print(
        "Root head token:", span.root.head.text
    )  # This is the syntatic "parent" that governs the phrase
    # Get the previous token and its POS tag
    print(
        "Previous token:",
        doc[start - 1].text,
        f"[{doc[start - 1].pos_}]",
        spacy.explain(doc[start - 1].pos_),
    )

I have a Golden Retriever
Matched id: 2951553348639939143
Matched span: Golden Retriever
Root token: Retriever
Root head token: have
Previous token: a [DET] determiner


### Efficient phrase matching

In [29]:
from spacy.matcher import PhraseMatcher

matcher = PhraseMatcher(nlp.vocab)
pattern = nlp("Golden Retriever")
matcher.add("DOG", [pattern])
doc = nlp("I have a Golden Retriever")
# iterate over the matches
for match_id, start, end in matcher(doc):
    # get the matched span
    span = doc[start:end]
    print("Matched span:", span.text)

Matched span: Golden Retriever


### Debugging patterns

In [30]:
# Create the match patterns
pattern1 = [{"LOWER": "amazon"}, {"IS_TITLE": True, "POS": "PROPN"}]
pattern2 = [{"LOWER": "ad"}, {"TEXT": "-"}, {"LOWER": "free"}, {"POS": "NOUN"}]

# Initialize the Matcher and add the patterns
matcher = Matcher(nlp.vocab)
matcher.add("PATTERN1", [pattern1])
matcher.add("PATTERN2", [pattern2])

doc = nlp(
    "Twitch Prime, the perks program for Amazon Prime members offering free loot, games and other benefits, is ditching one of its best features: ad-free viewing. According to an email sent out to Amazon Prime members today, ad-free viewing will no longer be included as a part of Twitch Prime for new members, beginning on September 14. However, members with existing annual subscriptions will be able to continue to enjoy ad-free viewing until their subscription comes up for renewal. Those with monthly subscriptions will have access to ad-free viewing until October 15."
)
# Iterate over the matches
for match_id, start, end in matcher(doc):
    # Print pattern string name and text of matched span
    print(doc.vocab.strings[match_id], doc[start:end].text)

PATTERN1 Amazon Prime
PATTERN2 ad-free viewing
PATTERN1 Amazon Prime
PATTERN2 ad-free viewing
PATTERN2 ad-free viewing
PATTERN2 ad-free viewing


### Efficient phrase matching

In [31]:
COUNTRIES = [
    "Afghanistan",
    "Åland Islands",
    "Albania",
    "Algeria",
    "American Samoa",
    "Andorra",
    "Angola",
    "Anguilla",
    "Antarctica",
    "Antigua and Barbuda",
    "Argentina",
    "Armenia",
    "Aruba",
    "Australia",
    "Austria",
    "Azerbaijan",
    "Bahamas",
    "Bahrain",
    "Bangladesh",
    "Barbados",
    "Belarus",
    "Belgium",
    "Belize",
    "Benin",
    "Bermuda",
    "Bhutan",
    "Bolivia (Plurinational State of)",
    "Bonaire, Sint Eustatius and Saba",
    "Bosnia and Herzegovina",
    "Botswana",
    "Bouvet Island",
    "Brasil",
    "British Indian Ocean Territory",
    "United States Minor Outlying Islands",
    "Virgin Islands (British)",
    "Virgin Islands (U.S.)",
    "Brunei Darussalam",
    "Bulgaria",
    "Burkina Faso",
    "Burundi",
    "Cambodia",
    "Cameroon",
    "Canada",
    "Cabo Verde",
    "Cayman Islands",
    "Central African Republic",
    "Chad",
    "Chile",
    "China",
    "Christmas Island",
    "Cocos (Keeling) Islands",
    "Colombia",
    "Comoros",
    "Congo",
    "Congo (Democratic Republic of the)",
    "Cook Islands",
    "Costa Rica",
    "Croatia",
    "Cuba",
    "Curaçao",
    "Cyprus",
    "Czech Republic",
    "Denmark",
    "Djibouti",
    "Dominica",
    "Dominican Republic",
    "Ecuador",
    "Egypt",
    "El Salvador",
    "Equatorial Guinea",
    "Eritrea",
    "Estonia",
    "Ethiopia",
    "Falkland Islands (Malvinas)",
    "Faroe Islands",
    "Fiji",
    "Finland",
    "France",
    "French Guiana",
    "French Polynesia",
    "French Southern Territories",
    "Gabon",
    "Gambia",
    "Georgia",
    "Germany",
    "Ghana",
    "Gibraltar",
    "Greece",
    "Greenland",
    "Grenada",
    "Guadeloupe",
    "Guam",
    "Guatemala",
    "Guernsey",
    "Guinea",
    "Guinea-Bissau",
    "Guyana",
    "Haiti",
    "Heard Island and McDonald Islands",
    "Holy See",
    "Honduras",
    "Hong Kong",
    "Hungary",
    "Iceland",
    "India",
    "Indonesia",
    "Côte d'Ivoire",
    "Iran (Islamic Republic of)",
    "Iraq",
    "Ireland",
    "Isle of Man",
    "Israel",
    "Italy",
    "Jamaica",
    "Japan",
    "Jersey",
    "Jordan",
    "Kazakhstan",
    "Kenya",
    "Kiribati",
    "Kuwait",
    "Kyrgyzstan",
    "Lao People's Democratic Republic",
    "Latvia",
    "Lebanon",
    "Lesotho",
    "Liberia",
    "Libya",
    "Liechtenstein",
    "Lithuania",
    "Luxembourg",
    "Macao",
    "Macedonia (the former Yugoslav Republic of)",
    "Madagascar",
    "Malawi",
    "Malaysia",
    "Maldives",
    "Mali",
    "Malta",
    "Marshall Islands",
    "Martinique",
    "Mauritania",
    "Mauritius",
    "Mayotte",
    "Mexico",
    "Micronesia (Federated States of)",
    "Moldova (Republic of)",
    "Monaco",
    "Mongolia",
    "Montenegro",
    "Montserrat",
    "Morocco",
    "Mozambique",
    "Myanmar",
    "Namibia",
    "Nauru",
    "Nepal",
    "Netherlands",
    "New Caledonia",
    "New Zealand",
    "Nicaragua",
    "Niger",
    "Nigeria",
    "Niue",
    "Norfolk Island",
    "Korea (Democratic People's Republic of)",
    "Northern Mariana Islands",
    "Norway",
    "Oman",
    "Pakistan",
    "Palau",
    "Palestine, State of",
    "Panama",
    "Papua New Guinea",
    "Paraguay",
    "Peru",
    "Philippines",
    "Pitcairn",
    "Poland",
    "Portugal",
    "Puerto Rico",
    "Qatar",
    "Republic of Kosovo",
    "Réunion",
    "Romania",
    "Russian Federation",
    "Rwanda",
    "Saint Barthélemy",
    "Saint Helena, Ascension and Tristan da Cunha",
    "Saint Kitts and Nevis",
    "Saint Lucia",
    "Saint Martin (French part)",
    "Saint Pierre and Miquelon",
    "Saint Vincent and the Grenadines",
    "Samoa",
    "San Marino",
    "Sao Tome and Principe",
    "Saudi Arabia",
    "Senegal",
    "Serbia",
    "Seychelles",
    "Sierra Leone",
    "Singapore",
    "Sint Maarten (Dutch part)",
    "Slovakia",
    "Slovenia",
    "Solomon Islands",
    "Somalia",
    "South Africa",
    "South Georgia and the South Sandwich Islands",
    "Korea (Republic of)",
    "South Sudan",
    "Spain",
    "Sri Lanka",
    "Sudan",
    "Suriname",
    "Svalbard and Jan Mayen",
    "Swaziland",
    "Sweden",
    "Switzerland",
    "Syrian Arab Republic",
    "Taiwan",
    "Tajikistan",
    "Tanzania, United Republic of",
    "Thailand",
    "Timor-Leste",
    "Togo",
    "Tokelau",
    "Tonga",
    "Trinidad and Tobago",
    "Tunisia",
    "Turkey",
    "Turkmenistan",
    "Turks and Caicos Islands",
    "Tuvalu",
    "Uganda",
    "Ukraine",
    "United Arab Emirates",
    "United Kingdom of Great Britain and Northern Ireland",
    "United States of America",
    "Uruguay",
    "Uzbekistan",
    "Vanuatu",
    "Venezuela (Bolivarian Republic of)",
    "Viet Nam",
    "Wallis and Futuna",
    "Western Sahara",
    "Yemen",
    "Zambia",
    "Zimbabwe",
]

In [32]:
doc = nlp('Czech Republic may help Slovakia protect its airspace')

In [33]:
# Import the PhraseMatcher and initialize it
matcher = PhraseMatcher(nlp.vocab)

# Create pattern Doc objects and add them to the matcher
# This is the faster version of: [nlp(country) for country in COUNTRIES]
patterns = nlp.pipe(COUNTRIES)
matcher.add('COUNTRY', patterns)

# Call the matcher on the test document and print the result
matches = matcher(doc)
print([doc[start:end].text for match_id, start, end in matches])

['Czech Republic', 'Slovakia']


### Extracting countries and relationships

In [468]:
text = 'After the Cold War, the UN saw a radical expansion in its peacekeeping duties, taking on more missions in ten years than it had in the previous four decades.Between 1988 and 2000, the number of adopted Security Council resolutions more than doubled, and the peacekeeping budget increased more than tenfold. The UN negotiated an end to the Salvadoran Civil War, launched a successful peacekeeping mission in Namibia, and oversaw democratic elections in post-apartheid South Africa and post-Khmer Rouge Cambodia. In 1991, the UN authorized a US-led coalition that repulsed the Iraqi invasion of Kuwait. Brian Urquhart, Under-Secretary-General from 1971 to 1985, later described the hopes raised by these successes as a "false renaissance" for the organization, given the more troubled missions that followed. Though the UN Charter had been written primarily to prevent aggression by one nation against another, in the early 1990s the UN faced a number of simultaneous, serious crises within nations such as Somalia, Haiti, Mozambique, and the former Yugoslavia. The UN mission in Somalia was widely viewed as a failure after the US withdrawal following casualties in the Battle of Mogadishu, and the UN mission to Bosnia faced "worldwide ridicule" for its indecisive and confused mission in the face of ethnic cleansing. In 1994, the UN Assistance Mission for Rwanda failed to intervene in the Rwandan genocide amid indecision in the Security Council. Beginning in the last decades of the Cold War, American and European critics of the UN condemned the organization for perceived mismanagement and corruption. In 1984, the US President, Ronald Reagan, withdrew his nation\'s funding from UNESCO (the United Nations Educational, Scientific and Cultural Organization, founded 1946) over allegations of mismanagement, followed by Britain and Singapore. Boutros Boutros-Ghali, Secretary-General from 1992 to 1996, initiated a reform of the Secretariat, reducing the size of the organization somewhat. His successor, Kofi Annan (1997–2006), initiated further management reforms in the face of threats from the United States to withhold its UN dues. In the late 1990s and 2000s, international interventions authorized by the UN took a wider variety of forms. The UN mission in the Sierra Leone Civil War of 1991–2002 was supplemented by British Royal Marines, and the invasion of Afghanistan in 2001 was overseen by NATO. In 2003, the United States invaded Iraq despite failing to pass a UN Security Council resolution for authorization, prompting a new round of questioning of the organization\'s effectiveness. Under the eighth Secretary-General, Ban Ki-moon, the UN has intervened with peacekeepers in crises including the War in Darfur in Sudan and the Kivu conflict in the Democratic Republic of Congo and sent observers and chemical weapons inspectors to the Syrian Civil War. In 2013, an internal review of UN actions in the final battles of the Sri Lankan Civil War in 2009 concluded that the organization had suffered "systemic failure". One hundred and one UN personnel died in the 2010 Haiti earthquake, the worst loss of life in the organization\'s history. The Millennium Summit was held in 2000 to discuss the UN\'s role in the 21st century. The three day meeting was the largest gathering of world leaders in history, and culminated in the adoption by all member states of the Millennium Development Goals (MDGs), a commitment to achieve international development in areas such as poverty reduction, gender equality, and public health. Progress towards these goals, which were to be met by 2015, was ultimately uneven. The 2005 World Summit reaffirmed the UN\'s focus on promoting development, peacekeeping, human rights, and global security. The Sustainable Development Goals were launched in 2015 to succeed the Millennium Development Goals. In addition to addressing global challenges, the UN has sought to improve its accountability and democratic legitimacy by engaging more with civil society and fostering a global constituency. In an effort to enhance transparency, in 2016 the organization held its first public debate between candidates for Secretary-General. On 1 January 2017, Portuguese diplomat António Guterres, who previously served as UN High Commissioner for Refugees, became the ninth Secretary-General. Guterres has highlighted several key goals for his administration, including an emphasis on diplomacy for preventing conflicts, more effective peacekeeping efforts, and streamlining the organization to be more responsive and versatile to global needs.'
print(text)
doc = nlp(text)

After the Cold War, the UN saw a radical expansion in its peacekeeping duties, taking on more missions in ten years than it had in the previous four decades.Between 1988 and 2000, the number of adopted Security Council resolutions more than doubled, and the peacekeeping budget increased more than tenfold. The UN negotiated an end to the Salvadoran Civil War, launched a successful peacekeeping mission in Namibia, and oversaw democratic elections in post-apartheid South Africa and post-Khmer Rouge Cambodia. In 1991, the UN authorized a US-led coalition that repulsed the Iraqi invasion of Kuwait. Brian Urquhart, Under-Secretary-General from 1971 to 1985, later described the hopes raised by these successes as a "false renaissance" for the organization, given the more troubled missions that followed. Though the UN Charter had been written primarily to prevent aggression by one nation against another, in the early 1990s the UN faced a number of simultaneous, serious crises within nations suc

In [469]:
# original spacy ents
print([(ent.text, ent.label_, ent.start, ent.end) for ent in doc.ents if ent.label_ == 'GPE'])

[('Namibia', 'GPE', 74, 75), ('South Africa', 'GPE', 84, 86), ('US', 'GPE', 100, 101), ('Kuwait', 'GPE', 110, 111), ('Somalia', 'GPE', 186, 187), ('Haiti', 'GPE', 188, 189), ('Mozambique', 'GPE', 190, 191), ('Yugoslavia', 'GPE', 195, 196), ('Somalia', 'GPE', 201, 202), ('US', 'GPE', 210, 211), ('Bosnia', 'GPE', 225, 226), ('Rwanda', 'GPE', 252, 253), ('US', 'GPE', 297, 298), ('Britain', 'GPE', 331, 332), ('Singapore', 'GPE', 333, 334), ('the United States', 'GPE', 382, 385), ('Afghanistan', 'GPE', 433, 434), ('the United States', 'GPE', 444, 447), ('Iraq', 'GPE', 448, 449), ('Darfur', 'GPE', 497, 498), ('Sudan', 'GPE', 499, 500), ('Kivu', 'GPE', 502, 503), ('the Democratic Republic of Congo', 'GPE', 505, 510), ('Haiti', 'GPE', 565, 566)]


In [474]:
from spacy.util import filter_spans
# Iterate over the matches
for match_id, start, end in matcher(doc):
    # Create a Span with the label for "GPE"
    span = Span(doc, start, end, label='GPE')
    # Overwrite the doc.ents and add the span
    doc.ents = filter_spans(list(doc.ents) + [span])
    # if span not in doc.ents:
    #     doc.ents = filter_spans(list(doc.ents) + [span])

In [476]:
# replace spacy entities
# doc.ents = filter_spans(original_ents)
# Print the entities in the document
print([(ent.text, ent.label_, ent.start, ent.end) for ent in doc.ents if ent.label_ == 'GPE'])

[('Namibia', 'GPE', 74, 75), ('South Africa', 'GPE', 84, 86), ('US', 'GPE', 100, 101), ('Kuwait', 'GPE', 110, 111), ('Somalia', 'GPE', 186, 187), ('Haiti', 'GPE', 188, 189), ('Mozambique', 'GPE', 190, 191), ('Yugoslavia', 'GPE', 195, 196), ('Somalia', 'GPE', 201, 202), ('US', 'GPE', 210, 211), ('Bosnia', 'GPE', 225, 226), ('Rwanda', 'GPE', 252, 253), ('US', 'GPE', 297, 298), ('Britain', 'GPE', 331, 332), ('Singapore', 'GPE', 333, 334), ('the United States', 'GPE', 382, 385), ('Afghanistan', 'GPE', 433, 434), ('the United States', 'GPE', 444, 447), ('Iraq', 'GPE', 448, 449), ('Darfur', 'GPE', 497, 498), ('Sudan', 'GPE', 499, 500), ('Kivu', 'GPE', 502, 503), ('the Democratic Republic of Congo', 'GPE', 505, 510), ('Haiti', 'GPE', 565, 566)]


In [477]:
# Expected results
print([('Namibia', 'GPE'), ('South Africa', 'GPE'), ('Cambodia', 'GPE'), ('Kuwait', 'GPE'), ('Somalia', 'GPE'), ('Haiti', 'GPE'), ('Mozambique', 'GPE'), ('Somalia', 'GPE'), ('Rwanda', 'GPE'), ('Singapore', 'GPE'), ('Sierra Leone', 'GPE'), ('Afghanistan', 'GPE'), ('Iraq', 'GPE'), ('Sudan', 'GPE'), ('Congo', 'GPE'), ('Haiti', 'GPE')])

[('Namibia', 'GPE'), ('South Africa', 'GPE'), ('Cambodia', 'GPE'), ('Kuwait', 'GPE'), ('Somalia', 'GPE'), ('Haiti', 'GPE'), ('Mozambique', 'GPE'), ('Somalia', 'GPE'), ('Rwanda', 'GPE'), ('Singapore', 'GPE'), ('Sierra Leone', 'GPE'), ('Afghanistan', 'GPE'), ('Iraq', 'GPE'), ('Sudan', 'GPE'), ('Congo', 'GPE'), ('Haiti', 'GPE')]
