In [7]:
import os
import glob
import copy

import matplotlib.pyplot as plt
import numpy as np

from functools import reduce

In [8]:
from enum import Enum
class Quality(Enum):
     CHAOS = 1
     DISTURBING = 2
     DISGUSTING = 3
     OFFENSIVE = 4
     POSITIVE = 5

for quality in Quality:
    print(quality)

ADJECTIVES = {
    Quality.CHAOS:'chaotic',
    Quality.DISTURBING:'disturbing',
    Quality.DISGUSTING:'disgusting',
    Quality.OFFENSIVE:'offensive',
    Quality.POSITIVE:'positive'
}

NOUNS = {
    Quality.CHAOS:'chaos',
    Quality.DISTURBING:'disturbing quality',
    Quality.DISGUSTING:'disgusting quality',
    Quality.OFFENSIVE:'offensiveness',
    Quality.POSITIVE:'positivity'
}

Quality.CHAOS
Quality.DISTURBING
Quality.DISGUSTING
Quality.OFFENSIVE
Quality.POSITIVE


# Define data model

In [9]:
class Describable:
    def __init__(self, children=[]):
        self.quality = {}
        self.qualities = {}
        self.mean = {}
        self.sums = {}
        self.maxs = {}
        self.children = []
        self.rank = {}
        self.top = {}
        self.tops = 0
        self.total_rank = 0
        for quality in Quality:
            self.quality[quality] = 0
            self.qualities[quality] = []
            self.mean[quality] = 0
            self.sums[quality] = 0
            self.maxs[quality] = 0
        if len(children) > 0:
            self.set_children(children)
            
    def __getitem__(self, key):
        return self.quality[key]
    
    def __setitem__(self, key, value):
        self.quality[key] = value
    
    def report(self):
        for quality in Quality:
            print(quality, self.mean[quality], self.maxs[quality], self.sums[quality])
        print()
    
    def set_children(self, children):
        self.children = children
        for quality in Quality:
            self.qualities[quality] = [c[quality] for c in self.children]
            self.mean[quality] = np.mean(self.qualities[quality])
            self.maxs[quality] = max(self.qualities[quality])
            self.sums[quality] = sum(self.qualities[quality])
            self[quality] = self.sums[quality]
            
            
            ranks = sorted(range(len(self.children)), key=lambda i: -self.children[i][quality])
            #ranks[0] is the index of the first-ranked child
            for index, rank in enumerate(ranks):
                self.children[rank].rank[quality] = index
            for child in [c for c in self.children if c.rank[quality] <= 4]:
                child.top[quality] = True
                child.tops += 1
            for child in self.children:
                child.total_rank = sum(child.rank.values())

In [10]:
def load_data(path):
    events = []
    with open(path, 'r') as infile:
        for line in infile:
            events.append(ComEvent(line))
    return events

class ComEvent(Describable):
    idn = ""
    noun = ""
    what_happened = ""
    occurrence = None
    def __init__(self, line):
        Describable.__init__(self)
        bits = line.strip().split(";")
        self.idn = bits[0]
        self.noun = bits[1] 
        self[Quality.DISTURBING] = float(bits[2])
        self[Quality.DISGUSTING] = float(bits[3])
        self[Quality.CHAOS] = float(bits[4])
        self[Quality.OFFENSIVE] = float(bits[5])
        self[Quality.POSITIVE] = float(bits[6])
        self.what_happened = bits[7]
    def __repr__(self):
        return self.what_happened

class Occurrence(Describable):
    events = []
    nouns = set()
    idn = ""
    def __init__(self, events):
        Describable.__init__(self)
        self.events = events
        self.idn = events[0].idn
        self.nouns = set([e.noun for e in events])
        for e in self.events:
            e.occurrence = self
        
        self.set_children(events)
        
    def report(self):
        print('{}'.format("\t"+"\n\t".join([e.what_happened for e in self.events])))
        print('--------------')
        Describable.report(self)

    def what_happened(self):
        return "\n".join([e.what_happened for e in self.events])
    
    def __repr__(self):
        return """<Occurrence {idn}>
        {evts}
        disturbing: {disturbing}
        disgusting: {disgusting}
        chaos: {chaos}
        offensive: {offensive}
        positive: {positive}
        """.format(idn=self.idn,
                  evts=self.what_happened(),
                  disturbing=self[Quality.DISTURBING],
                  disgusting=self[Quality.DISGUSTING],
                  chaos=self[Quality.CHAOS],
                  offensive=self[Quality.OFFENSIVE],
                  positive=self[Quality.POSITIVE]
                  )
    
class Commercial(Describable):
    title = ""
    events = []
    def __init__(self, path):
        Describable.__init__(self)
        self.events = load_data(path)
        self.title = path.split('_')[-1].split('.')[0]
        
        # generate occurrences
        es = self.events.copy()
        self.occurrences = []
        while len(es) > 0:
            e = es.pop()
            occur = [x for x in es if x.idn == e.idn]
            occur.append(e)
            es = [x for x in es if x not in occur]
            self.occurrences.append(Occurrence(occur))
            
        self.set_children(self.occurrences)
    
    def __repr__(self):
        return "<{}>".format(self.title)

In [16]:
class Report():
    report_type = ""
    description = ""
    def __init__(self, t, d):
        self.report_type = t
        self.description = d

# Analysis functions

In [11]:
def commercial_notes(com):
    notes = []
    
    ocs = copy.deepcopy(com.children)

    labels = reduce(lambda x, y: x |y, [o.nouns for o in ocs])
    label_sets = []
    for label in labels:
        label_set = Describable()
        label_set.label = label
        label_set.set_children([c for c in ocs if label in c.nouns])
        label_sets.append(label_set)

    all_labels = Describable(label_sets)

    principal_labels = [x.label for x in sorted(all_labels.children, key=lambda x:x.total_rank)][0:3]
    notes.append('the commercial is primarily characterized by {}, {}, and {}.'.format(*principal_labels))
    
    for label in principal_labels:
        ls = [x for x in label_sets if x.label==label][0]
        ls.frac = {}
        for quality in Quality:
            try:
                ls.frac[quality] = ls[quality]/com[quality]
            except:
                ls.frac[quality] = -1
        nouns = [NOUNS[key] for key, value in ls.frac.items() if value > 0.5]
        if len(nouns) > 0:
            
            note = 'the {} is notable for its '.format(label)+str(nouns)
            notes.append(note)
        
    return notes

In [12]:
def note_events(com):
    notes = []
    frequent_qualities = sorted(Quality, key=lambda q: -com.mean[q])[0:2]
    notes.append('the events of this commercial are frequently {} and {}'.format(*list(map(ADJECTIVES.__getitem__, frequent_qualities))))

    # given the commercial's high average chaos, this event embodies the chaotic nature of the commercial
    x = sorted(com.children, key=lambda o: o.rank[frequent_qualities[0]])[0]
    notes.append('the climax of the commercial was when ' + x.what_happened())
    
    x = sorted(com.children, key=lambda o: o.rank[frequent_qualities[1]])[0]
    notes.append('the second climax of the commercial was when ' + x.what_happened())

    # notable event: stands out by several metrics
    x = sorted(com.children, key=lambda o: o.tops)[0]
    notes.append('it was memorable when '+ x.what_happened())
    return notes

# Load commercials

In [13]:
commercials = []
for path in glob.glob('/Users/RSHAW/Library/Application Support/EpicBanana/Yogurt Commercial 3/commercial_events_*'):
    commercials.append(Commercial(path))


commercials

[<8bdb8470-8b25-4484-aa09-34c2592bbb42>,
 <71685811-ade5-405e-bae5-859b1be2fa68>,
 <c7419462-1ccc-4a35-8022-c1689c5ddf27>,
 <be27029d-4b17-40b8-a397-abceb8b89485>,
 <258f36b2-2ad3-446b-a132-3034dcc4c89f>,
 <849989cd-1ca6-45c6-b047-74fac01322f9>,
 <9801811f-4c07-4914-ac18-ceceb5481b10>,
 <876e91d6-90d5-41f8-9a30-761f0dd6eb30>,
 <7d9e9681-069e-4105-a2b4-efac14000dda>,
 <dc47ebf1-98b7-463c-b72d-f1a8b27a37f5>,
 <6d731e4b-9484-493a-b291-578f9e4c58b7>,
 <57ff9ae1-6fb6-4fef-b3b3-606e53c71249>]

In [14]:
def analyze(com):
    # total qualities and quality per occurrence
    com.report()
    print('\n'.join(commercial_notes(com)))
    print('\n'.join(note_events(com)))

In [15]:
analyze(commercials[8])

Quality.CHAOS 0.5 3.0 9.0
Quality.DISTURBING 0.111111111111 2.0 2.0
Quality.DISGUSTING 0.555555555556 4.0 10.0
Quality.OFFENSIVE 0.0 0.0 0.0
Quality.POSITIVE 0.111111111111 1.0 2.0

the commercial is primarily characterized by eating, vomiting yogurt, and yogurt eating.
the eating is notable for its ['chaos', 'disgusting quality', 'positivity']
the vomiting yogurt is notable for its ['disturbing quality']
the yogurt eating is notable for its ['chaos', 'disgusting quality', 'positivity']
the events of this commercial are frequently disgusting and chaotic
the climax of the commercial was when test vomited up vomited-up puddle of yogurt
test vomited up yogurt
the second climax of the commercial was when test vomited up vomited-up puddle of yogurt
test vomited up yogurt
it was memorable when test said Yuck


In [211]:
# peaks:
# compare max to mean?

# mean different peaked qualities = hard to pay attention to, jumbled
# no peaked qualities: boring, bland, lifeless


# characterized by a strong sense of morbidity
# flirting dangerously with 

# populate a number of statements. remove any statement with a duplicate description.

# time series?

wretched


focus group:
find notable occurrence, describe liked/didnt like, and why

describe commercial as a whole
it is a yogurt commercial. it is puzzling if it veers off into territory that is very disgusting, disturbing,
find the climax of the commercial

"the star repeatedly vomited yogurt"
"flirting dangerously with madness, the commercial "
"in what is supposed to be a yogurt commercial,"

legal notice
