In [1]:
import pandas as pd
import os.path
root_path = os.path.dirname(os.getcwd())

# Import video data (thanks Cowen & Keltner!)
video_data = pd.read_csv(os.path.join(root_path, "DATA/video_data.csv"))

In [47]:
# Separate out 34 emotion category scores, downcase column names
# Fraction of respondents who chose from multiple-select
video_category_data = video_data.iloc[:, 1:35]

# Separate out 14 affective dimension scores
# 1-9 ratings
video_dimension_data = video_data.iloc[:, 35:49]

# Separate 600 free response term scores
# Fraction of respondants who chose from multiple-select dropdown
video_term_data = video_data.iloc[:, 49:]

# Get 27 distinct emotion category scores
# ???
video_distinct_category_data = video_category_data.drop

In [49]:
# Select distinct category scores
# Subset of category scores
video_distinct_category_data = video_data[[
    "Admiration",
    "Adoration",
    "Aesthetic Appreciation",
    "Amusement",
    "Anger",
    "Anxiety",
    "Awe",
    "Awkwardness",
    "Boredom",
    "Calmness",
    "Confusion",
    "Craving",
    "Disgust",
    "Empathic Pain",
    "Entrancement",
    "Excitement",
    "Fear",
    "Horror",
    "Interest",
    "Joy",
    "Nostalgia",
    "Relief",
    "Romance",
    "Sadness",
    "Satisfaction",
    "Surprise"
]]

In [3]:
# Install and import Spacy natural language utility
!pip install spacy
import spacy

[31mtensorflow 1.11.0 has requirement setuptools<=39.1.0, but you'll have setuptools 40.4.3 which is incompatible.[0m
[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [4]:
# Load large english language model
!python -m spacy download en_core_web_md
nlp = spacy.load('en_core_web_lg')

[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m

[93m    Linking successful[0m
    /home/russell/anaconda3/lib/python3.6/site-packages/en_core_web_md -->
    /home/russell/anaconda3/lib/python3.6/site-packages/spacy/data/en_core_web_md

    You can now load the model via spacy.load('en_core_web_md')



In [5]:
# Install and import spelling corrector
!pip install autocorrect
from autocorrect import spell

# Define function to normalize phrase
def normalize(text):
    return ' '.join([spell(word) for word in text.split()])

# Define function to lemmatize phrase
def lemmatize(text):
    return ' '.join([token.lemma_ for token in nlp(text)])

[31mtensorflow 1.11.0 has requirement setuptools<=39.1.0, but you'll have setuptools 40.4.3 which is incompatible.[0m
[33mYou are using pip version 10.0.1, however version 18.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [27]:
# Create dataframe for terms, lemmas, docs
term_data = pd.DataFrame([])
term_data['term'] = video_term_data.columns
term_data['term_lemma'] = term_data['term'].apply(lemmatize)
term_data['term_doc'] = term_data['term_lemma'].apply(nlp)
term_data.head()

Unnamed: 0,term,term_lemma,term_doc
0,a surge of pride,a surge of pride,"(a, surge, of, pride)"
1,abhorrence,abhorrence,(abhorrence)
2,admiration,admiration,(admiration)
3,adoration,adoration,(adoration)
4,adrenaline rush,adrenaline rush,"(adrenaline, rush)"


In [79]:
# NOT USED
import math

def normalize_vector(vector):
    magnitude = math.sqrt((vector * vector).sum())
    print(magnitude)
    return vector / magnitude

In [150]:
# Define function to get term vector from phrase
def get_term_from_phrase(phrase, n):
    phrase_normal = normalize(phrase)
    phrase_lemma = lemmatize(phrase_normal)
    phrase_doc = nlp(phrase_lemma)
    
    term_vector = term_data['term_doc'].apply(phrase_doc.similarity)
    term_vector.index = term_data['term']
    
    head_index = term_vector.sort_values(ascending=False).head(n).index
    term_vector[~term_vector.index.isin(head_index)] = 0
    term_vector /= n
    
    return term_vector

# Get term vector from phrase
# term_vector = get_term_from_phrase("I feel good", 3)

# Display top terms
# term_vector.sort_values(ascending=False).head()

In [151]:
def get_video_from_term(term_vector):
#     print(video_term_data.shape, term_vector.shape)
    video_vector = video_term_data.dot(term_vector)
#     print(video_vector.shape)
    return video_vector

In [152]:
def get_dimension_from_video(video_vector):
#     print(video_vector.shape, video_dimension_data.shape)
    dimension_vector = video_vector.dot(video_dimension_data)
#     print(dimension_vector.shape)
    return dimension_vector

In [153]:
def get_distinct_category_from_video(video_vector):
#     print(video_vector.shape, video_distinct_category_data.shape)
    distinct_category_vector = video_vector.dot(video_distinct_category_data)
#     print(distinct_category_vector.shape)
    return distinct_category_vector

In [161]:
def interpret_phrase(phrase):
    term_vector = get_term_from_phrase(phrase, 3)
    print('FREE RESPONSE TERMS:\n', term_vector.sort_values(ascending=False).head(), '\n')
    
    video_vector = get_video_from_term(term_vector)
    
    dimension_vector = get_dimension_from_video(video_vector)
    
    print('AFFECTIVE DEMENSIONS:\n', dimension_vector, '\n')
    
    distinct_category_vector = get_distinct_category_from_video(video_vector)
    
    print('DISTINCT CATEGORIES:\n', distinct_category_vector.sort_values(ascending=False), '\n')


In [164]:
interpret_phrase('very good')

FREE RESPONSE TERMS:
 term
feeling strong         0.255394
feeling important      0.254808
feeling impressed      0.247989
feeling appreciated    0.000000
feeling appalled       0.000000
Name: term_doc, dtype: float64 

AFFECTIVE DEMENSIONS:
 approach       53.173297
arousal        58.664934
attention      62.393080
certainty      52.095370
commitment     50.763248
control        62.637727
dominance      48.036224
effort         28.497934
fairness       56.242721
identity       46.696514
obstruction    31.360040
safety         50.396859
upswing        64.373175
valence        59.619534
dtype: float64 

DISTINCT CATEGORIES:
 Amusement                 2.742263
Interest                  2.016617
Awe                       1.913169
Surprise                  1.507147
Admiration                1.016044
Excitement                0.903494
Satisfaction              0.900492
Aesthetic Appreciation    0.746237
Joy                       0.594186
Entrancement              0.588361
Anxiety           