In [1]:
import spacy #for tokenization and lemmatization
nlp = spacy.load('en_core_web_sm')
lemmatizer = nlp.get_pipe("lemmatizer")

import nltk
from nltk.corpus import wordnet as wn #to use WordNet

import re #regular expression for removing punctuations
import requests #read the British to American dictionary
import pandas as pd
import csv

In [2]:
url ="https://raw.githubusercontent.com/hyperreality/American-British-English-Translator/master/data/british_spellings.json"
british_to_american = requests.get(url).json()

def preprocess(row):
    string = re.sub(r'[^\w\s]', '', row['exp_rev']) #Remove punctuations
    sentence = nlp(string) #Apply Spacy
    lemma_list = [token.lemma_ for token in sentence] #Lemmatization
    am_list = [] #Empty list to put changed spellings
    for word in lemma_list:
        if word in british_to_american:
            am_list.append(british_to_american[word]) #Replace with American spelling if there is a difference
        else:
            am_list.append(word) #Just append the original word if there is no difference
    return am_list

In [3]:
exp = pd.read_csv('exp_text_SE.csv') #Read the file with explanation text
exp = exp.astype({'exp_rev':'string','image':'string','label':'string','category':'string'})
exp.head()

Unnamed: 0,subject,exp_org,exp_rev,image,label,category
0,7,it is red and black in colour and is small wit...,it is red and black in colour and is small wit...,ant1.jpg,Ant,Natural
1,7,"it is small and red in colour with six legs, b...","it is small and red in colour with six legs, b...",ant2.jpg,Ant,Natural
2,7,it is small and red in colour with black eyes ...,it is small and red in colour with black eyes ...,ant3.jpg,Ant,Natural
3,7,small and red in colour with six legs,small and red in colour with six legs,ant4.jpg,Ant,Natural
4,7,it is small and red in colour with six legs an...,it is small and red in colour with six legs an...,ant5.jpg,Ant,Natural


In [4]:
exp['exp_pro'] = exp.apply(lambda row: preprocess(row), axis = 1) #Process explanations
exp.head()

Unnamed: 0,subject,exp_org,exp_rev,image,label,category,exp_pro
0,7,it is red and black in colour and is small wit...,it is red and black in colour and is small wit...,ant1.jpg,Ant,Natural,"[it, be, red, and, black, in, color, and, be, ..."
1,7,"it is small and red in colour with six legs, b...","it is small and red in colour with six legs, b...",ant2.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, six..."
2,7,it is small and red in colour with black eyes ...,it is small and red in colour with black eyes ...,ant3.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, bla..."
3,7,small and red in colour with six legs,small and red in colour with six legs,ant4.jpg,Ant,Natural,"[small, and, red, in, color, with, six, leg]"
4,7,it is small and red in colour with six legs an...,it is small and red in colour with six legs an...,ant5.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, six..."


In [5]:
def get_rating(item):
    rating = {}
    with open('sensorimotor.csv') as file: #Read the file with sensorimotor strength ratings
        reader = csv.DictReader(file)
        for row in reader:
            rating[row['word']] = float(row[item]) #Convert ratings into dictionaries
    return rating

visual_dict = get_rating('visual')

In [6]:
def avg_rating(row, dict_name): #Calculate average rating per word for each explanation
    rating_list = []
    for lemma in row['exp_pro']:
        lemma = lemma.upper()
        if lemma in dict_name:
            rating_list.append(dict_name[lemma])
    if len(rating_list) != 0:
        return sum(rating_list)/len(rating_list)
    else:
        return 0

In [7]:
exp['visual'] = exp.apply(lambda row: avg_rating(row, visual_dict), axis = 1) #Get visual strength for each explanation

exp.head()

Unnamed: 0,subject,exp_org,exp_rev,image,label,category,exp_pro,visual
0,7,it is red and black in colour and is small wit...,it is red and black in colour and is small wit...,ant1.jpg,Ant,Natural,"[it, be, red, and, black, in, color, and, be, ...",3.077109
1,7,"it is small and red in colour with six legs, b...","it is small and red in colour with six legs, b...",ant2.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, six...",3.381121
2,7,it is small and red in colour with black eyes ...,it is small and red in colour with black eyes ...,ant3.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, bla...",3.122847
3,7,small and red in colour with six legs,small and red in colour with six legs,ant4.jpg,Ant,Natural,"[small, and, red, in, color, with, six, leg]",3.438943
4,7,it is small and red in colour with six legs an...,it is small and red in colour with six legs an...,ant5.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, six...",2.997345


In [8]:
wn_lemmas = set(wn.all_lemma_names()) #Get all lemmas in WordNet

def avg_similarity(row): #Calculate average similarity to the label for each explanation
    sim_list = []
    label = row['label']
    if label == 'Tennis Ball': #Dealing with the special case of the two-word label, tennis ball
        label = 'tennis_ball'
    for lemma in row['exp_pro']:
        if lemma in wn_lemmas: #Check if the word is in WordNet
            sim = wn.synsets(label)[0].path_similarity(wn.synsets(lemma)[0],simulate_root = False) #Disable simulate root so that we only get similarities for nouns
            if sim is not None: #For other parts of speech, similarity will be empty
                sim_list.append(sim)
    if len(sim_list) != 0:
        return sum(sim_list)/len(sim_list)
    else:
        return 0

In [9]:
exp['wordnet_similarity'] = exp.apply(lambda row: avg_similarity(row), axis = 1) #Get WordNet similarity for each explanation
exp.head()

Unnamed: 0,subject,exp_org,exp_rev,image,label,category,exp_pro,visual,wordnet_similarity
0,7,it is red and black in colour and is small wit...,it is red and black in colour and is small wit...,ant1.jpg,Ant,Natural,"[it, be, red, and, black, in, color, and, be, ...",3.077109,0.058503
1,7,"it is small and red in colour with six legs, b...","it is small and red in colour with six legs, b...",ant2.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, six...",3.381121,0.058955
2,7,it is small and red in colour with black eyes ...,it is small and red in colour with black eyes ...,ant3.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, bla...",3.122847,0.059573
3,7,small and red in colour with six legs,small and red in colour with six legs,ant4.jpg,Ant,Natural,"[small, and, red, in, color, with, six, leg]",3.438943,0.056977
4,7,it is small and red in colour with six legs an...,it is small and red in colour with six legs an...,ant5.jpg,Ant,Natural,"[it, be, small, and, red, in, color, with, six...",2.997345,0.058829
