In [None]:
from spacy.tokens import Doc, Span
from scipy.spatial.distance import cosine
import numpy as np

In [None]:
clean_labels_mapping = {'1': 0,
                        '2': 1,
                        '3': 2,
                        '4': 3,
                        '5': 4,
                        '6': 5,
                        '7': 6,
                        '10': 7,
                        '11': 8,
                        '12': 9,
                        '13': 10,
                        '14': 11,
                        '15': 12,
                        '16': 13}


goal_name_mapping = {0: 'No Poverty',
                     1: 'Zero Hunger',
                     2: 'Good Health and Well Being',
                     3: 'Quality Education',
                     4: 'Gender Equality',
                     5: 'Clean Water and Sanitation',
                     6: 'Affordable and Clean Energy',
                     7: 'Reduced Inequalities',
                     8: 'Sustainable Cities and Communities',
                     9: 'Responsible Consumption and Production',
                     10: 'Climate Action',
                     11: 'Life Below Water',
                     12: 'Life on Land',
                     13: 'Peace, Justice and Strong Institutions'}

goal_color_mapping = {0: 'Red',
                      1: 'Gold',
                      2: 'MediumAquamarine',
                      3: 'Orange',
                      4: 'Pink',
                      5: 'Cyan',
                      6: 'Lime',
                      7: 'DeepPink',
                      8: 'CadetBlue',
                      9: 'DarkKhaki',
                      10: 'Green',
                      11: 'Blue',
                      12: 'Peru',
                      13: 'Indigo'}

In [None]:
"""
Set the extended atributes and methods that are used throughout the code.
"""
def set_spacy_extensions():
    Doc.set_extension('project_id', default=None, force=True)
    Doc.set_extension('project_title', default=None, force=True)
    Doc.set_extension('goal_labels', default='', force=True)
    Doc.set_extension('custom_vector', default=None, force=True)
    Span.set_extension('custom_vector', default=None, force=True)
    Doc.set_extension('word_relevances', default=[], force=True)
    Span.set_extension('word_relevances', default=[], force=True)
    Doc.set_extension('predicted_goal_scores', default=None, force=True)
    Doc.set_extension('custom_similarity', method=custom_similarity, force=True)
    Span.set_extension('custom_similarity', method=custom_similarity, force=True)

In [None]:
"""
Creates an extended method for the class Doc. Returns the cosine similarity (1 - distance) between the custom vectors of two spacy documents.
"""
def custom_similarity(doc, other_doc):
    return 1 - cosine(doc._.custom_vector, other_doc._.custom_vector)

In [None]:
"""
Utility function that, depending on the values of the parameters "description", "facts" and "targets", selects the textual content of the goals contained in a pandas DataFrame and returns the concatenated texts in a list.
"""
def get_goal_texts(goals_df, description=True, facts=True, targets=True):
    assert description or facts or targets
    goal_texts = []
    for index, row in goals_df.iterrows():
        text = ''
        if description:
            text += row['description'] + ' '
        if facts:
            text += row['facts'] + ' '
        if targets:
            text += row['targets']
        goal_texts.append(text)
    return goal_texts

In [None]:
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

In [None]:
def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))