# Simple Template-Based Approach

In [26]:
import random
import pandas as pd
import numpy as np
import csv
import spacy
sp = spacy.load('en_core_web_sm')
import nltk
from nltk.tokenize import word_tokenize
import re
nltk.download('tagsets')
from nltk.data import load
from nltk.corpus import conll2000
import pickle

[nltk_data] Downloading package tagsets to /Users/renny/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


## 1. Load Parameters 

In [40]:
with open('output/tagged_tags_dictionary_withsents_film.pkl','rb') as f:
    tagged_tags_dictionary = pickle.load(f)
    
with open('output/mapped_tags_to_group_film','rb') as f:
    mapped_tags_to_group = pickle.load(f)
    
with open('output/mapped_tags_to_group_nonperson_film','rb') as f:
    mapped_tags_to_group_nonperson = pickle.load(f)

    
cleaned_list_of_tags = tagged_tags_dictionary.keys()

tags_dataframe = pd.DataFrame(tagged_tags_dictionary).T.reset_index().rename(columns={'index': 'Tags'})   

like_synonyms_basic = ["like"]
dont_like_synonyms_basic = ["don't like"]
like_synonyms=["like","love","prefer","enjoy","are into", "would watch", "like to watch","like watching", "love to watch","love watching","prefer to watch","prefer watching","enjoy watching","are interested in"]
dont_like_synonyms=["dislike","don't like","hate","don't prefer","are not into", "wouldn't watch","dislike watching","don't like to watch","don't like watching","hate to watch","hate watching","don't prefer watching","don't prefer to watch","are not interested in"]
especially_synonyms = ["especially", "particularly"]
unless_synonyms = ["unless","except if"]

### Basic template adapted from the paper : 
1. You (don't) like {tag1} movies especially if they are {tag2} -- for ((tag1,+), (tag2,++)) or ((tag1,-),(tag2,--))
2. You (don't) like {tag1} movies especially if they are not {tag2} -- for ((tag1,++),(tag2,+)) or ((tag1,--),(tag2,-))
3. You (don't) like {tag1} movies unless they are {tag2} -- for ((tag1,+),(tag2,-)) or ((tag1,-),(tag2,+))
4. You (don't) like {tag1} movies if they are {tag2} -- for ((tag1,N),(tag2,+)) or ((tag1,N),(tag2,-))


### Function get_summary
This function is to generate phrase based on above basic template, it will take tuple of tag1 and tag2, where each contain a pair of text and the sign (+,++,-, etc.), and return the phrase.

In this function, it will generate sentence 1 and sentence 2 by calling function get_first_sentence, and get_second_sentence. The generated sentence 1 and 2 will have similar meaning with the sentence it replaced, it is just can have different pattern. The tag text will not be changed.

Example from the first basic template:

<img src="screenshot.png" width="400" height="200" >


### Function get_first_sentence
It is to generate sentence 1 part, by picking random pattern which is basically different ways of saying the basic form. 
In each sentence pattern, it will also pick randomly different words for expressing like or don't like.
The pattern for the sentence is manually picked (there are 3 patterns), and the different expresison of like/don't like is also manually picked.

### Function get_second_sentence
It is to generate sentence 2 part, with the same way of get_first_sentence function.


In [29]:
### Basic_2 templates


def get_first_sentence(tag,sentiment = 1):
    group = tagged_tags_dictionary[tag]['POS_Tags_Group']
    entity = tagged_tags_dictionary[tag]['Entity']  
    person_type = tagged_tags_dictionary[tag]['Person_Type']
    end_with_movie = tagged_tags_dictionary[tag]['End_with_movie']
    start_pos = tagged_tags_dictionary[tag]['Start_POS']
    end_pos = tagged_tags_dictionary[tag]['End_POS']
    genre = tagged_tags_dictionary[tag]['genre']
    
    #if end with movie/movies/film/films
    if end_with_movie == True:
         statement_patterns = ["You {} [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]#, #Basic pattern
            
    elif entity == 'GPE':
        statement_patterns = ["You {} movies about [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),
                             "You {} movies from [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    elif genre==True:
        statement_patterns = ["You {} [{}] movies".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),
                             "You {} movies with [{}] {}".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag,random.choice(['genre','content'])),
                             "You {} movies full of [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
        
    
    #elif tag.startswith('oscar'): ## Since oscar is a famous award, make own rule for it
    #    statement_patterns = ["You {} '{}' movies".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    
    elif ((group == 'noun') | (group == 'numeral')) & (not tag.startswith("oscar")):
        if entity == 'PERSON':
            if person_type == 'actor':
                  statement_patterns = ["You {} movies starred by [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag), #Basic pattern
                                       "You {} movies starring [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),   
                                        "You {} movies played by [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),
                                        "[{}] movies is {}for you".format(tag.capitalize(), "" if sentiment==1 else "not ")]
            elif (person_type == 'director'):
                statement_patterns = ["You {} movies directed by [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag), #Basic pattern
                          "[{}] movies is {}for you".format(tag.capitalize(), "" if sentiment==1 else "not ")]
            else:
                statement_patterns = ["You {} movies about [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    
        elif ((start_pos.startswith('JJ')) | (start_pos=='DT')) & (end_pos=='VBG'):
            statement_patterns = ["You {} movies with [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    
        else:
            statement_patterns = ["You {} movies about [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    

    else : #adjective & other
        statement_patterns = ["You {} [{}] movies".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag), #Basic pattern
                              "[{}] movies is {}for you".format(tag.capitalize(), "" if sentiment==1 else "not ")]
                              #  "You {} movies filled with '{}'".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]

    return(random.choice(statement_patterns))

def get_second_sentence(tag,sentiment = 1):
    group = tagged_tags_dictionary[tag]['POS_Tags_Group']
    entity = tagged_tags_dictionary[tag]['Entity']
    person_type = tagged_tags_dictionary[tag]['Person_Type']
    end_with_movie = tagged_tags_dictionary[tag]['End_with_movie']
    start_pos = tagged_tags_dictionary[tag]['Start_POS']
    end_pos = tagged_tags_dictionary[tag]['End_POS']
    genre = tagged_tags_dictionary[tag]['genre']
    
    
    if end_with_movie == True:
        statement_patterns = ["they are {}[{}]".format("" if sentiment==1 else "not ",tag)]#,#Basic pattern for second sentence
    
    elif entity == 'GPE':
        statement_patterns = ["they are {}about [{}]".format("" if sentiment==1 else "not ",tag),
                             "they are {}from [{}]".format("" if sentiment==1 else "not ",tag)]
    
    elif tag.startswith('oscar'): ## Since oscar is a famous award, make own rule for it
        statement_patterns = ["they are {}[{}] movies".format("" if sentiment==1 else "not ",tag)]
  
    elif genre == True:
        statement_patterns = ["they are {}full of [{}]".format("" if sentiment==1 else "not ",tag),
                             "they {} [{}]".format("contain" if sentiment==1 else "don't contain",tag),
                             "they are {}[{}]".format("" if sentiment==1 else "not ",tag)]
    
    elif (group == 'noun') | (group == 'numeral'):
        if entity == 'PERSON':
            if person_type == 'actor':
                  statement_patterns = ["they are {}starred by [{}]".format("" if sentiment==1 else "not ",tag), #Basic pattern
                          "they are {}starring [{}]".format("" if sentiment==1 else "not ",tag),
                          "they are {}played by [{}]".format("" if sentiment==1 else "not ",tag)]
            elif (person_type == 'director'):
                statement_patterns = ["they are {}directed by [{}]".format("" if sentiment==1 else "not ",tag),
                                      "they are {}movies by [{}]".format("" if sentiment==1 else "not ",tag)]
            else:
                statement_patterns = ["they are {}about [{}]".format("" if sentiment==1 else "not ",tag),
                                 "they are {}[{}] movies".format("" if sentiment==1 else "not ",tag)]
        
        elif ((start_pos.startswith('JJ')) | (start_pos=='DT')) & (end_pos=='VBG'):
            statement_patterns = ["they are {}with [{}]".format("" if sentiment==1 else "not ",tag)]
                
        else:
            statement_patterns = ["they are {}about [{}]".format("" if sentiment==1 else "not ",tag)]
    

    else : #adjectives and others, also if end_with_movie == True
    # Below are manually set pattern for first sentence with positive or negative sentiment which can be randomly chosen
        statement_patterns = ["they are {}[{}]".format("" if sentiment==1 else "not ",tag)]#,#Basic pattern for second sentence
                         # $"they are {}full of '{}'".format("" if sentiment==1 else "not ", tag),
                        #"they {} high '{}' content".format("have" if sentiment==1 else "don't have",tag)]
   
    
    
    return(random.choice(statement_patterns))

def get_summary_basic_2(tag1,tag2):
    " Function to generate template_based summary/phrase, using basic template as above explanation in markdown"
    
    if len(tag1[1]) ==1:
        
        if len(tag2[1])==2:
            #1st template (+,++) or (-,--)
            sentiment_1 = 1 if tag1[1]=='+' else 0
            return "{}, {} if {}.".format(get_first_sentence(tag1[0],sentiment_1),random.choice(especially_synonyms),get_second_sentence(tag2[0])) #especially
            
        else:
            #4th template (N,+) or (N,-)
            if tag1[1] == 'N':
                sentiment_1 = 1 if tag2[1]=='+' else 0
                return "{} if {}.".format(get_first_sentence(tag1[0],sentiment_1),get_second_sentence(tag2[0]))
    
            #3rd template (+,-) or (-,+)    
            else:
                    sentiment_1 = 1 if tag1[1]=='+' else 0
                    return "{}, {} {}.".format(get_first_sentence(tag1[0],sentiment_1),random.choice(unless_synonyms),get_second_sentence(tag2[0])) #unless
    
    #2nd template (++,+) or (--,-)
    else:
        sentiment_1 = 1 if tag1[1]=='++' else 0
        return "{}, {} if {}.".format(get_first_sentence(tag1[0],sentiment_1),random.choice(especially_synonyms),get_second_sentence(tag2[0],0))#especially
    

## 1.3 Get N preference statements
To generate n number of preference statement, with random tags and random preferences level

In [30]:
def generate_preference_statements(num_statements, tags, template_type=1): 
    '''
    param num_statements : number of statements to be generated
    param tags : list of tags. From this list, the tags for each statement will be randomly choosen
    '''
    list_preference_statements = []
    list_preference = []
    list_tags = []
    list_postags = []

    for i in range(num_statements):
        pair_tags = random.sample(tags,k=2)
        list_postags.append([tags_dataframe[tags_dataframe['Tags']==pair_tags[0]]['POS_Tags'].values[0],tags_dataframe[tags_dataframe['Tags']==pair_tags[1]]['POS_Tags'].values[0]])
        pair_preferences = random.choice(list_of_preference)
        if template_type == 1:
            pref_statement = get_summary_basic_1((pair_tags[0],pair_preferences[0]),(pair_tags[1],pair_preferences[1]))
        elif template_type == 2:
            pref_statement = get_summary_basic_2((pair_tags[0],pair_preferences[0]),(pair_tags[1],pair_preferences[1]))
        print (pref_statement)
        list_preference_statements.append(pref_statement)
        list_preference.append(pair_preferences)
        list_tags.append(pair_tags)
    return list_preference_statements, list_preference,list_tags, list_postags

## 1.4 Other function(s)

In [31]:
def save_to_file(title,list_preference_statements, list_preference,list_tags, list_postags):   
    '''
    Saving the generated statements into excel file
    '''
    # Write the generated sentences in a csv file for review
    list_tags = np.array(list_tags)
    list_preference = np.array(list_preference)
    list_postags = np.array(list_postags)
    
    pd.DataFrame({'Preference_statement': list_preference_statements,
                  'tag1': list_tags[0:,0],
                  'pos-tag1' : list_postags[0:,0],
                  'pref1': list_preference[0:,0],
                  'tag2': list_tags[0:,1],
                  'pos-tag2' : list_postags[0:,1],
                  'pref2': list_preference[0:,1]
    }).to_excel("output/"+title)

# 3. Generate Preference statements

In [32]:
list_of_preference = [('+','++'),('-','--'),('++','+'),('--','-'),('+','-'),('-','+'),('N','+'),('N','-')]

In [44]:
# Generate preference statements with all filtered tags
list_preference_statements, list_preference, list_tags, list_postags = generate_preference_statements(100,cleaned_list_of_tags,template_type=2)
save_to_file('preference_statement_all_210320_w_sent_film_basic1.xlsx',list_preference_statements, list_preference,list_tags,list_postags)

You prefer [jamie foxx] movies especially if they are not [jonathan pryce].
You like [unsympathetic characters] movies unless they are [free running].
You don't prefer [americans abroad] movies unless they are [nerd].
You hate [midlife crisis] movies especially if they are [camera].
You like [life in general] movies especially if they are not [charles laughton].
You dislike [edward furlong] movies especially if they are [unintelligent].
You dislike [yul brynner] movies especially if they are [vanity].
You prefer [sherlock holmes] movies especially if they are not [overly simplistic].
You love [oscar (best picture)] movies unless they are [kathy bates].
You enjoy [psychoanalysis] movies especially if they are [apocalyptic].
You hate [silly] movies if they are [original].
You are not into [ancient civilization] movies especially if they are not [lots of blood].
You love [soccer] movies unless they are [relaxing].
You prefer [living with disability] movies unless they are [maria bello].
Y

In [41]:
# Generate preference statements with tags tagged as adjective
adjectives_tags = mapped_tags_to_group['adjective']
list_preference_statements, list_preference, list_tags, list_postags = generate_preference_statements(100,adjectives_tags, template_type=2)
save_to_file('preference_statement_adjective_210320_basic1.xlsx',list_preference_statements, list_preference,list_tags,list_postags)

You love [ninja] movies if they are [self-sacrifice].
You prefer [retro-futuristic] movies unless they are [off-beat].
You don't like [anti-hero] movies especially if they are [motown].
You are not into [musician] movies especially if they are not [wonderful].
You are into [delorean] movies if they are [superficial].
You are not into [twisted] movies especially if they are [pows].
You don't like [nerd] movies especially if they are [meditative].
You don't like [raw] movies especially if they are [funny!].
You like [spaceships] movies unless they are [panic].
You are not into [quiet] movies especially if they are [schizophrenic].
You hate [pagan] movies especially if they are [subversive].
You like [self-indulgent] movies unless they are [magical].
You prefer [owls] movies especially if they are [immoral].
You enjoy [autumnal] movies especially if they are not [rated-r].
You like [slow-paced] movies if they are [weak].
You enjoy [frivolous] movies unless they are [noir].
You don't prefe

In [43]:
# Generate preference statements which tags tagged as other than nouns
non_noun_tags = []
for item in mapped_tags_to_group_nonperson:
    if item !='noun':
        non_noun_tags = non_noun_tags + mapped_tags_to_group_nonperson[item]
        
list_preference_statements, list_preference, list_tags,list_postags = generate_preference_statements(100,non_noun_tags,template_type=2)
save_to_file('preference_statement_non_adjective_nonPerson_230320_basic1.xlsx',list_preference_statements, list_preference,list_tags,list_postags)

You enjoy [movie to see] movies especially if they are not [photographer].
You dislike [amazon] movies especially if they are [mormon].
You hate [chaos] movies especially if they are [boring beginning].
You don't like [globalisation] movies unless they are [all-star cast].
You don't like [eco] movies unless they are [race].
You are not into [not enough action] movies especially if they are [french film].
You enjoy [medicine] movies unless they are [singing].
You prefer [misunderstanding] movies if they are [rome].
You dislike [prom] movies especially if they are not [monsters].
You don't prefer [farming] movies unless they are [awkward romance].
You dislike [geeky] movies especially if they are [writing process].
You are into [metaphysics] movies especially if they are not [h.p. lovecraft].
You hate [lack of story] movies especially if they are not [zizek].
You don't like [tear-jerker] movies especially if they are [tension].
You dislike [great dialouge] movies especially if they are n

In [526]:
# Generate preference statements which tags tagged as other noun but not person entity
list_preference_statements, list_preference, list_tags,list_postags = generate_preference_statements(100,mapped_tags_to_group_nonperson['noun'], template_type=2)
save_to_file('preference_statement_non_person_noun_190320_b2.xlsx',list_preference_statements, list_preference,list_tags,list_postags)

You dislike movies about 'martial arts' if they are about 'fast food'.
You dislike movies about 'wealth' if they are about 'artist mind'.
You prefer movies about 'worth watching' especially if they are not about 'no chemistry'
You dislike movies about 'greed' unless they are about 'russian revolution'.
You are not into movies about 'writing' unless they are about 'russia'.
You don't like movies about 'mortality' if they are about 'good soundtrack'.
You enjoy movies about 'movie business' especially if they are about 'complex morality'.
You don't like movies about 'world war ii' if they are about 'knights'.
You dislike movies about 'hayley atwell' especially if they are about 'german perspective'.
You hate movies about 'metaphysics' especially if they are about 'dvd'.
You dislike movies about 'dialogue driven' unless they are about 'figure skating'.
You like movies about 'escapism' especially if they are about 'opposites attract'.
You dislike movies about 'soviet union' if they are abou