# Simple Template-Based Approach

In [1]:
import random
import pandas as pd
import numpy as np
import csv
import spacy
sp = spacy.load('en_core_web_sm')
import nltk
from nltk.tokenize import word_tokenize
import re
nltk.download('tagsets')
from nltk.data import load
from nltk.corpus import conll2000
import pickle

[nltk_data] Downloading package tagsets to /Users/renny/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!


## 1. Load Parameters 

In [2]:
#with open('output/tagged_tags_dictionary_withsents_film.pkl','rb') as f:
with open('output/tags_available_in_index/tagged_tags_dictionary_withsents_film(available_in_index).pkl','rb') as f:
    tagged_tags_dictionary = pickle.load(f)
    
#with open('output/mapped_tags_to_group_film','rb') as f:
with open('output/tags_available_in_index/mapped_tags_to_group_film(available_in_index).pkl','rb') as f:
    mapped_tags_to_group = pickle.load(f)
    
with open('output/tags_available_in_index/mapped_tags_to_group_nonperson_film(available_in_index).pkl','rb') as f:
    mapped_tags_to_group_nonperson = pickle.load(f)

    
cleaned_list_of_tags = tagged_tags_dictionary.keys()

tags_dataframe = pd.DataFrame(tagged_tags_dictionary).T.reset_index().rename(columns={'index': 'Tags'})   

like_synonyms_basic = ["like"]
dont_like_synonyms_basic = ["don't like"]
like_synonyms=["like","love","prefer","enjoy","are into", "would watch", "like to watch","like watching", "love to watch","love watching","prefer to watch","prefer watching","enjoy watching","are interested in"]
dont_like_synonyms=["dislike","don't like","hate","don't prefer","are not into", "wouldn't watch","dislike watching","don't like to watch","don't like watching","hate to watch","hate watching","don't prefer watching","don't prefer to watch","are not interested in"]
especially_synonyms = ["especially", "particularly"]
unless_synonyms = ["unless","except if"]

In [3]:
### Basic_1 templates
def get_summary_basic_1(tag1,tag2):
    
    if len(tag1[1]) ==1:
        
        if len(tag2[1])==2:
            #1st template (+,++) or (-,--)
            text1 = random.choice(like_synonyms_basic) if tag1[1]=='+' else random.choice(dont_like_synonyms_basic)
            return "You {} [{}] movies, especially if they are [{}].".format(text1,tag1[0],tag2[0])
            
        else:
            #4th template (N,+) or (N,-)
            if tag1[1] == 'N':
                text1 = random.choice(like_synonyms_basic) if tag2[1]=='+' else random.choice(dont_like_synonyms_basic)
                return "You {} [{}] movies if they are [{}].".format(text1,tag1[0],tag2[0])
    
            #3rd template (+,-) or (-,+)    
            else:
                    text1 = random.choice(like_synonyms_basic) if tag1[1]=='+' else random.choice(dont_like_synonyms_basic)
                    return "You {} [{}] movies, unless they are [{}].".format(text1,tag1[0],tag2[0])
    
    #2nd template (++,+) or (--,-)
    else:
        text1 = random.choice(like_synonyms_basic) if tag1[1]=='++' else random.choice(dont_like_synonyms_basic)
        return "You {} [{}] movies, especially if they are not [{}].".format(text1,tag1[0],tag2[0])
 

### Basic template adapted from the paper : 
1. You (don't) like {tag1} movies especially if they are {tag2} -- for ((tag1,+), (tag2,++)) or ((tag1,-),(tag2,--))
2. You (don't) like {tag1} movies especially if they are not {tag2} -- for ((tag1,++),(tag2,+)) or ((tag1,--),(tag2,-))
3. You (don't) like {tag1} movies unless they are {tag2} -- for ((tag1,+),(tag2,-)) or ((tag1,-),(tag2,+))
4. You (don't) like {tag1} movies if they are {tag2} -- for ((tag1,N),(tag2,+)) or ((tag1,N),(tag2,-))


### Function get_summary
This function is to generate phrase based on above basic template, it will take tuple of tag1 and tag2, where each contain a pair of text and the sign (+,++,-, etc.), and return the phrase.

In this function, it will generate sentence 1 and sentence 2 by calling function get_first_sentence, and get_second_sentence. The generated sentence 1 and 2 will have similar meaning with the sentence it replaced, it is just can have different pattern. The tag text will not be changed.

Example from the first basic template:

<img src="screenshot.png" width="400" height="200" >


### Function get_first_sentence
It is to generate sentence 1 part, by picking random pattern which is basically different ways of saying the basic form. 
In each sentence pattern, it will also pick randomly different words for expressing like or don't like.
The pattern for the sentence is manually picked (there are 3 patterns), and the different expresison of like/don't like is also manually picked.

### Function get_second_sentence
It is to generate sentence 2 part, with the same way of get_first_sentence function.


In [4]:
### Basic_2 templates

def get_first_sentence(tag,dictionary_tags,sentiment = 1):
    group = dictionary_tags[tag]['POS_Tags_Group']
    entity = dictionary_tags[tag]['Entity']  
    person_type = dictionary_tags[tag]['Person_Type']
    end_with_movie = dictionary_tags[tag]['End_with_movie']
    start_pos = dictionary_tags[tag]['Start_POS']
    end_pos = dictionary_tags[tag]['End_POS']
    genre = dictionary_tags[tag]['genre']
    
    #if end with movie/movies/film/films
    if end_with_movie == True:
         statement_patterns = ["You {} [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]#, #Basic pattern
            
    elif entity == 'GPE':
        statement_patterns = ["You {} movies about [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),
                             "You {} movies from [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    elif genre==True:
        statement_patterns = ["You {} [{}] movies".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),
                             "You {} movies with [{}] {}".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag,random.choice(['genre','content'])),
                             "You {} movies full of [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
        
    
    #elif tag.startswith('oscar'): ## Since oscar is a famous award, make own rule for it
    #    statement_patterns = ["You {} '{}' movies".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    
    elif ((group == 'noun') | (group == 'numeral')) & (not tag.startswith("oscar")):
        if entity == 'PERSON':
            if person_type == 'actor':
                  statement_patterns = ["You {} movies starred by [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag), #Basic pattern
                                       "You {} movies starring [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),   
                                        "You {} movies played by [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag),
                                        "[{}] movies is {}for you".format(tag.capitalize(), "" if sentiment==1 else "not ")]
            elif (person_type == 'director'):
                statement_patterns = ["You {} movies directed by [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag), #Basic pattern
                          "[{}] movies is {}for you".format(tag.capitalize(), "" if sentiment==1 else "not ")]
            else:
                statement_patterns = ["You {} movies about [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    
        elif ((start_pos.startswith('JJ')) | (start_pos=='DT')) & (end_pos=='VBG'):
            statement_patterns = ["You {} movies with [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    
        else:
            statement_patterns = ["You {} movies about [{}]".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]
    

    else : #adjective & other
        statement_patterns = ["You {} [{}] movies".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag), #Basic pattern
                              "[{}] movies is {}for you".format(tag.capitalize(), "" if sentiment==1 else "not ")]
                              #  "You {} movies filled with '{}'".format(random.choice(like_synonyms) if sentiment == 1 else random.choice(dont_like_synonyms),tag)]

    return(random.choice(statement_patterns))

def get_second_sentence(tag,dictionary_tags,sentiment = 1):
    group = dictionary_tags[tag]['POS_Tags_Group']
    entity = dictionary_tags[tag]['Entity']
    person_type = dictionary_tags[tag]['Person_Type']
    end_with_movie =dictionary_tags[tag]['End_with_movie']
    start_pos =dictionary_tags[tag]['Start_POS']
    end_pos =dictionary_tags[tag]['End_POS']
    genre = dictionary_tags[tag]['genre']
    
    
    if end_with_movie == True:
        statement_patterns = ["they are {}[{}]".format("" if sentiment==1 else "not ",tag)]#,#Basic pattern for second sentence
    
    elif entity == 'GPE':
        statement_patterns = ["they are {}about [{}]".format("" if sentiment==1 else "not ",tag),
                             "they are {}from [{}]".format("" if sentiment==1 else "not ",tag)]
    
    elif tag.startswith('oscar'): ## Since oscar is a famous award, make own rule for it
        statement_patterns = ["they are {}[{}] movies".format("" if sentiment==1 else "not ",tag)]
  
    elif genre == True:
        statement_patterns = ["they are {}full of [{}]".format("" if sentiment==1 else "not ",tag),
                             "they {} [{}]".format("contain" if sentiment==1 else "don't contain",tag),
                             "they are {}[{}]".format("" if sentiment==1 else "not ",tag)]
    
    elif (group == 'noun') | (group == 'numeral'):
        if entity == 'PERSON':
            if person_type == 'actor':
                  statement_patterns = ["they are {}starred by [{}]".format("" if sentiment==1 else "not ",tag), #Basic pattern
                          "they are {}starring [{}]".format("" if sentiment==1 else "not ",tag),
                          "they are {}played by [{}]".format("" if sentiment==1 else "not ",tag)]
            elif (person_type == 'director'):
                statement_patterns = ["they are {}directed by [{}]".format("" if sentiment==1 else "not ",tag),
                                      "they are {}movies by [{}]".format("" if sentiment==1 else "not ",tag)]
            else:
                statement_patterns = ["they are {}about [{}]".format("" if sentiment==1 else "not ",tag),
                                 "they are {}[{}] movies".format("" if sentiment==1 else "not ",tag)]
        
        elif ((start_pos.startswith('JJ')) | (start_pos=='DT')) & (end_pos=='VBG'):
            statement_patterns = ["they are {}with [{}]".format("" if sentiment==1 else "not ",tag)]
                
        else:
            statement_patterns = ["they are {}about [{}]".format("" if sentiment==1 else "not ",tag)]
    

    else : #adjectives and others, also if end_with_movie == True
    # Below are manually set pattern for first sentence with positive or negative sentiment which can be randomly chosen
        statement_patterns = ["they are {}[{}]".format("" if sentiment==1 else "not ",tag)]#,#Basic pattern for second sentence
                         # $"they are {}full of '{}'".format("" if sentiment==1 else "not ", tag),
                        #"they {} high '{}' content".format("have" if sentiment==1 else "don't have",tag)]
   
    
    
    return(random.choice(statement_patterns))

def get_summary_basic_2(tag1,tag2,dictionary_tags):
    " Function to generate template_based summary/phrase, using basic template as above explanation in markdown"
    
    if len(tag1[1]) ==1:
        
        if len(tag2[1])==2:
            #1st template (+,++) or (-,--)
            sentiment_1 = 1 if tag1[1]=='+' else 0
            return "{}, {} if {}.".format(get_first_sentence(tag1[0],dictionary_tags,sentiment_1),random.choice(especially_synonyms),get_second_sentence(tag2[0],dictionary_tags)) #especially
            
        else:
            #4th template (N,+) or (N,-)
            if tag1[1] == 'N':
                sentiment_1 = 1 if tag2[1]=='+' else 0
                return "{} if {}.".format(get_first_sentence(tag1[0],dictionary_tags,sentiment_1),get_second_sentence(tag2[0],dictionary_tags))
    
            #3rd template (+,-) or (-,+)    
            else:
                    sentiment_1 = 1 if tag1[1]=='+' else 0
                    return "{}, {} {}.".format(get_first_sentence(tag1[0],dictionary_tags,sentiment_1),random.choice(unless_synonyms),get_second_sentence(tag2[0],dictionary_tags)) #unless
    
    #2nd template (++,+) or (--,-)
    else:
        sentiment_1 = 1 if tag1[1]=='++' else 0
        return "{}, {} if {}.".format(get_first_sentence(tag1[0],dictionary_tags,sentiment_1),random.choice(especially_synonyms),get_second_sentence(tag2[0],dictionary_tags,0))#especially
    

## 1.3 Get N preference statements
To generate n number of preference statement, with random tags and random preferences level

In [5]:
def generate_preference_statements(dictionary_tags,pair_tags_list=None,list_pair_preferences=None, num_sentence = 200): 
    '''
    param num_statements : number of statements to be generated
    param tags : list of tags. From this list, the tags for each statement will be randomly choosen
    '''
    list_preference_statements_basic = []
    list_preference_statements_adjusted = []
    list_preference = []
    list_tags = []
    list_postags = []
    
    cleaned_all_tags = list(dictionary_tags.keys())
    
    if pair_tags_list == None:
        pair_tags_list =[]
        list_pair_preferences = []
        for i in range(num_sentence):
            pair_tags_list.append(random.sample(cleaned_all_tags,k=2))
            list_pair_preferences.append(random.choice(list_of_preference))
            

    for pair_tags,pair_preferences in zip(pair_tags_list,list_pair_preferences):
        #pair_tags = random.sample(tags,k=2)
        list_postags.append([dictionary_tags[pair_tags[0]]['POS_Tags'],dictionary_tags[pair_tags[1]]['POS_Tags']])
        #pair_preferences = random.choice(list_of_preference)
        pref_statement_basic = get_summary_basic_1((pair_tags[0],pair_preferences[0]),(pair_tags[1],pair_preferences[1]))
        pref_statement_adjusted = get_summary_basic_2((pair_tags[0],pair_preferences[0]),(pair_tags[1],pair_preferences[1]),dictionary_tags)
        #print (pref_statement)
        list_preference_statements_basic.append(pref_statement_basic)
        list_preference_statements_adjusted.append(pref_statement_adjusted)
        list_preference.append(pair_preferences)
        list_tags.append(pair_tags)
    return list_preference_statements_basic,list_preference_statements_adjusted, list_preference,list_tags, list_postags

## 1.4 Other function(s)

In [6]:
def save_to_file(title,list_preference_statements, list_preference,list_tags, list_postags):   
    '''
    Saving the generated statements into excel file
    '''
    # Write the generated sentences in a csv file for review
    list_tags = np.array(list_tags)
    list_preference = np.array(list_preference)
    list_postags = np.array(list_postags)
    
    pd.DataFrame({'Preference_statement': list_preference_statements,
                  'tag1': list_tags[0:,0],
                  'pos-tag1' : list_postags[0:,0],
                  'pref1': list_preference[0:,0],
                  'tag2': list_tags[0:,1],
                  'pos-tag2' : list_postags[0:,1],
                  'pref2': list_preference[0:,1]
    }).to_excel("output/"+title)

In [7]:
def save_to_txt(title,list_sentence):
    MyFile=open('output/'+title,'w')

    for element in list_sentence:
        MyFile.write(element.replace('[',"").replace(']',""))
        MyFile.write('\n')
    MyFile.close()

# 3. Generate Preference statements

In [8]:
list_of_preference = [('+','++'),('-','--'),('++','+'),('--','-'),('+','-'),('-','+'),('N','+'),('N','-')]

In [None]:
list_preference_statements_basic,list_preference_statements_adjusted, list_preference, list_tags, list_postags = generate_preference_statements(tagged_tags_dictionary)
save_to_file('tags_available_in_index/preference_statement_all_basic1_dummy.xlsx',list_preference_statements_basic, list_preference,list_tags,list_postags)
save_to_file('tags_available_in_index/preference_statement_all_adjusted1_dummy.xlsx',list_preference_statements_adjusted, list_preference,list_tags,list_postags)

In [31]:
save_to_txt("test_basic_dummy.txt",list_preference_statements_basic)
list_preference_statements_basic

['You like [lance henriksen] movies if they are [war hero].',
 'You dislike [talent] movies unless they are [sofia coppola].',
 'You dislike [hong kong] movies especially if they are [jingoistic].',
 'You love [lauren bacall] movies unless they are [amateur film making].',
 'You are into [evil corporation] movies especially if they are not [evil children].',
 'You enjoy [twists & turns] movies especially if they are not [carol reed].',
 'You are not into [poor plot] movies especially if they are not [michael mann].',
 'You are into [bahamas] movies unless they are [lon chaney].',
 'You love [identity theft] movies especially if they are not [sandra bullock].',
 'You like [distorted reality] movies especially if they are [self-referential].',
 'You are into [catchy score] movies if they are [matt stone].',
 "You don't prefer [anna kendrick] movies if they are [andrei tarkovsky].",
 'You dislike [self-referential] movies if they are [overlong].',
 'You dislike [teen] movies unless they a

In [32]:
save_to_txt("test_adjusted_dummy.txt",list_preference_statements_adjusted)
list_preference_statements_adjusted

['You prefer movies starred by [lance henriksen] if they are about [war hero].',
 "You don't like movies about [talent] unless they are movies by [sofia coppola].",
 "You don't like movies from [hong kong] especially if they are [jingoistic].",
 'You like movies starring [lauren bacall] unless they are about [amateur film making].',
 'You enjoy movies about [evil corporation] especially if they are not [evil children].',
 'You enjoy [twists & turns] movies especially if they are not directed by [carol reed].',
 'You hate movies about [poor plot] especially if they are not movies by [michael mann].',
 'You love movies about [bahamas] unless they are about [lon chaney].',
 'You love movies about [identity theft] especially if they are not starring [sandra bullock].',
 'You like movies about [distorted reality] especially if they are [self-referential].',
 'You enjoy movies about [catchy score] if they are about [matt stone].',
 'You hate movies starred by [anna kendrick] if they are dire