In [1]:
import os
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('ggplot')

# Set path to root directory so we can access core, tasks, data, etc.
os.chdir('..')

In [2]:
import numpy
def smooth(x,window_len=11,window='hanning'):
    if x.ndim != 1:
            raise ValueError, "smooth only accepts 1 dimension arrays."
    if x.size < window_len:
            raise ValueError, "Input vector needs to be bigger than window size."
    if window_len<3:
            return x
    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
            raise ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
    s=numpy.r_[2*x[0]-x[window_len-1::-1],x,2*x[-1]-x[-1:-window_len:-1]]
    if window == 'flat': #moving average
            w=numpy.ones(window_len,'d')
    else:  
            w=eval('numpy.'+window+'(window_len)')
    y=numpy.convolve(w/w.sum(),s,mode='same')
    return y[window_len:-window_len+1]

In [4]:
# Original vizsent torch
vid = pd.read_csv('data/videos/@Animated/@NewDisney/Feast/preds_old_torch/posneg.csv',
                  header=None, names=['Positive', 'Negative'])
smoothed = smooth(vid.Positive.values, window_len=48)

pd.Series(smoothed).plot()
plt.show()

In [None]:
# Sent biclass
vid = pd.read_csv('data/videos/@Animated/@NewDisney/Feast/preds/sent_biclass.csv')

# vid = pd.read_csv('data/plutchik/preds/sent_biclass_5.csv')
smoothed = smooth(vid.pos.values, window_len=48)
pd.Series(smoothed).plot()
plt.show()

In [None]:
# Plutchik
vid = pd.read_csv('data/plutchik/preds/sent_biclass_5.csv')
pd.Series(vid.pos.values).plot()
plt.show()

In [None]:
# Emo
vid = pd.read_csv('data/videos/@Animated/@NewDisney/Feast/preds/emo.csv')

w = 48
anger = smooth(vid.anger.values, window_len=w)
anticipation = smooth(vid.anticipation.values, window_len=w)
disgust = smooth(vid.anger.values, window_len=w)
fear = smooth(vid.anger.values, window_len=w)
joy = smooth(vid.joy.values, window_len=w)
sadness = smooth(vid.sadness.values, window_len=w)
surprise = smooth(vid.surprise.values, window_len=w)
trust = smooth(vid.trust.values, window_len=w)

# Trust dominates so plot without it too
df = pd.DataFrame({'anger': anger, 'anticipation': anticipation, 'disgust': disgust, 'fear': fear,
                   'joy': joy, 'sadness': sadness, 'surprise': surprise, 'trust': trust})
df.plot()
plt.show()

df = pd.DataFrame({'anger': anger, 'anticipation': anticipation, 'disgust': disgust, 'fear': fear,
                   'joy': joy, 'sadness': sadness, 'surprise': surprise})
df.plot()
plt.show()

pd.Series(trust).plot()
plt.show()

In [None]:
# Attempt to automatically find window size given one timeseries.
# Calculate difference between smooth(window_size=k) and smooth(window_size=k+1), try to find
# inflection points.
# Unfortunately, it seems like the differences just follow power law as k increases (run this cell and see plot).
# Must be mathematical reason for this.
import numpy as np
def DTWDistance(s1,s2,w=20):
    '''
    Calculates dynamic time warping Euclidean distance between two
    sequences. Option to enforce locality constraint for window w.
    '''
    DTW={}

    if w:
        w = max(w, abs(len(s1)-len(s2)))

        for i in range(-1,len(s1)):
            for j in range(-1,len(s2)):
                DTW[(i, j)] = float('inf')

    else:
        for i in range(len(s1)):
            DTW[(i, -1)] = float('inf')
        for i in range(len(s2)):
            DTW[(-1, i)] = float('inf')

    DTW[(-1, -1)] = 0

    for i in range(len(s1)):
        if w:
            for j in range(max(0, i-w), min(len(s2), i+w)):
                dist= (s1[i]-s2[j])**2
                DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])
        else:
            for j in range(len(s2)):
                dist= (s1[i]-s2[j])**2
                DTW[(i, j)] = dist + min(DTW[(i-1, j)],DTW[(i, j-1)], DTW[(i-1, j-1)])

def euclidean_dist(x1, x2):
    return np.sum(np.power((x1-x2), 2))
    
def infer_window_size(x, dist_f, min_w=60, max_w=1000):
    distances = {}
    prev_smoothed = smooth(x, window_len=min_w, window='flat')
    for w in range(min_w + 1, max_w + 1):
        cur_smoothed = smooth(x, window_len=w)
        distances[w] = dist_f(prev_smoothed, cur_smoothed)
        prev_smoothed = cur_smoothed
    return distances


vid = pd.read_csv('data/videos/@Animated/@NewDisney/Feast/preds_old_torch/posneg.csv',
                  header=None, names=['Positive', 'Negative'])

ds = infer_window_size(vid.Positive.values, euclidean_dist, min_w=5, max_w=200)
# ds = infer_window_size(vid.Positive.values, DTWDistance, min_w=5, max_w=200)
distances = []
for w in sorted(ds.keys()):
    distances.append(ds[w])
plt.plot(distances)
plt.show()
