# 1. Generation of Locations

In [31]:
import sys
import pandas as pd
import os
import wikipediaapi
import numpy as np
import json
import random
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
import re
import requests
from scipy import spatial

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/lauraluckert/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/lauraluckert/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [2]:
PATH = "~/Desktop/"
FILENAME = "tmdb_5000_credits.csv"

full_path = os.path.expanduser(PATH)
os.chdir(full_path)

### 0. Example Structure for New Game Field

In [3]:
new_field = {
    "streets": {"1-3": [],"4-6": [],"7-9":[] , "10-12": [], \
                "13-15": [], "16-18": [], "expensive": [], "cheap": []},
    "stations": [],
    "prison": [],
    "free_parking": [],
    "special": {"1": [], "2": []}
}

### 1. Clean and read-in Movie Data
Data Source:
https://www.kaggle.com/tmdb/tmdb-movie-metadata?select=tmdb_5000_movies.csv

In [5]:
def clean_movie_dataset(movie_data):
    """
    :movie_data: Pandas DataFrame holding movie titles, character cast
    Preprocess the data, we only need the characters from the movie in a dict
    
    :returns: dictionary with key = movie title, value = 
    """
    
    cast_rows = []

    for malformed_string in movie_data.cast:
        imd_string = list(malformed_string[1:(len(malformed_string)-1)].split("}"))
    
        new_list = []

        for item in imd_string:
            try: 
                if item[0] != "{":
                    item = item[2:(len(item))]
                item += "}"
                new_item =json.loads(item)
                person = new_item["character"]
                #gender = new_item["gender"]
                new_list.append(person)
            except IndexError:
                break
        cast_rows.append(new_list)

    
    cast_dict = {}
    for movie, cast in zip(movie_data.title,cast_rows):
        cast_dict[movie] = cast
    
    return cast_dict

## preprocess dataset
movie_characters = pd.read_csv(FILENAME, sep=",")
cast_dict = clean_movie_dataset(movie_characters)

### 3. Random Selection of Topic

In [105]:
## get random movie from our dictionary
random_key = random.choice(list(cast_dict))
print(random_key)
## topic and cast is selected
#topic = random_key
#cast = cast_dict[topic]

## for testing purposes, we set the topic manually
#topic = "Furious 7"
#cast = cast_dict[topic]
#print(topic, cast)

Deep Impact


### 4. Select Characters as New Locations
#### 4.1 *To Do*:  Cleaning of character names (no brakets in names etc.)
#### 4.2 *To Do*: Useful combination of streetnames with selected characters

In [7]:
## possible street names for combination with characters
street_names = [#'Avenue', 
                'Park', 'Street', 'Boulevard', 'Road', 'Main Street', 'Drive', 'Lane', 'Alley']

## fill location entries with characters from film cast (cast_dict)
## example default combination
new_field["streets"]["expensive"] = [x + " Avenue" for x in cast[0:2]]
new_field["streets"]["cheap"] = [x + " Drive" for x in cast[8:10]]
new_field["streets"]["1-3"] = [x + " " + random.choice(street_names) for x in cast[11:14]]
new_field["streets"]["4-6"] = [x + " " + random.choice(street_names) for x in cast[15:18]]
new_field["streets"]["7-9"] = [x + " " + random.choice(street_names) for x in cast[19:22]]
new_field["streets"]["10-12"] = [x + " " + random.choice(street_names) for x in cast[23:26]]
new_field["streets"]["13-15"] = [x + " " + random.choice(street_names) for x in cast[27:30]]
new_field["streets"]["16-18"] = [x + " " + random.choice(street_names) for x in cast[31:33]]
new_field["stations"] = [x + " Station" for x in cast[3:7]]

print(new_field)

{'streets': {'1-3': ['Mr. Nobody Street', 'Deckard Shaw Drive', 'Han Drive'], '4-6': ['Sean Boswell Lane', 'Elena Boulevard', 'Hector Road'], '7-9': ['Owen Shaw Drive', 'Safar Road', 'Jack Lane'], '10-12': ['Samantha Hobbs Alley', 'Letty Fan Park', 'Female Racer Park'], '13-15': ['Race Starter Lane', 'Hot Teacher Drive', 'Doctor Park'], '16-18': ['Merc Tech Road', 'Weapons Tech Lane'], 'expensive': ['Dominic Toretto Avenue', "Brian O'Conner Avenue"], 'cheap': ['Kiet Drive', 'Kara Drive']}, 'stations': ['Letty Station', 'Roman Station', "Tej (as Chris 'Ludacris' Bridges) Station", 'Mia Station'], 'prison': [], 'free_parking': [], 'special': {'1': [], '2': []}}


### 5. Question Answering to Select Characters/Locations for Special Places
#### 5.1 Get Wikipedia Data as Q&A Basis Data
https://pypi.org/project/Wikipedia-API/0.3.5/
- *To Do* : Select only "Plot" Section from Wikipedia Data/Find a way to get relevant data only

In [8]:
## for regular text output
wiki_en_wiki = wikipediaapi.Wikipedia(
        language='en',
        extract_format=wikipediaapi.ExtractFormat.WIKI)

## check if page for topic exists
if wiki_en_wiki.page(topic).exists():
    print("Topic is ok.")
    wiki_page = wiki_en_wiki.page(topic)
    topic_text = wiki_page.text
else:
    print("Find a new topic")

Topic is ok.


In [9]:
model_name = "deepset/roberta-base-squad2"
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

q_a = pipeline('question-answering', model=model_name, tokenizer=model_name)

#### 5.2 Select Questions for Q&A model to get wikipedia responses

In [10]:
## Prepare questions (examples, to discuss)
question_dict = {"special_1": "What is an important monument in the movie ",
                "special_2": "What is an expensive location in the movie ",
                "prison": "Which one is a tragic area in the movie ",
                "free_parking": "What is the loveliest place in the movie "}

In [67]:
for category, question_body in question_dict.items():
    question = question_body + topic + "?"
    print(question)
    
    QA_input = {
        'question': question,
        'context': topic_text
    }
    response = q_a(QA_input)
    print(response)
    
    if category == "special_1":
        new_field["special"]["1"] = [response["answer"]]
    elif category == "special_2":
        new_field["special"]["2"] = [response["answer"]]
    else:
        new_field[category] = [response["answer"]]

What is an important monument in the movie Furious 7?


  tensor = as_tensor(value)
  p_mask = np.asarray(


{'score': 0.9653973579406738, 'start': 30306, 'end': 30312, 'answer': 'Walker'}
What is an expensive location in the movie Furious 7?
{'score': 0.6865812540054321, 'start': 4746, 'end': 4757, 'answer': 'Los Angeles'}
Which one is a tragic area in the movie Furious 7?
{'score': 0.7995515465736389, 'start': 29403, 'end': 29409, 'answer': 'Walker'}
What is the loveliest place in the movie Furious 7?
{'score': 0.46883273124694824, 'start': 13357, 'end': 13366, 'answer': 'Abu Dhabi'}


#### 5.3 Evaluate Responses:
- To Do: Check if location/character already exists in new_field
- To Do: filter for bad scores, retrigger question generation

In [68]:
new_field

{'streets': {'1-3': ['Mr. Nobody Street', 'Deckard Shaw Drive', 'Han Drive'],
  '4-6': ['Sean Boswell Lane', 'Elena Boulevard', 'Hector Road'],
  '7-9': ['Owen Shaw Drive', 'Safar Road', 'Jack Lane'],
  '10-12': ['Samantha Hobbs Alley', 'Letty Fan Park', 'Female Racer Park'],
  '13-15': ['Race Starter Lane', 'Hot Teacher Drive', 'Doctor Park'],
  '16-18': ['Merc Tech Road', 'Weapons Tech Lane'],
  'expensive': ['Dominic Toretto Avenue', "Brian O'Conner Avenue"],
  'cheap': ['Kiet Drive', 'Kara Drive']},
 'stations': ['Letty Station',
  'Roman Station',
  "Tej (as Chris 'Ludacris' Bridges) Station",
  'Mia Station'],
 'prison': ['Walker'],
 'free_parking': ['Abu Dhabi'],
 'special': {'1': ['Walker'], '2': ['Los Angeles']}}

# 2. Generation of Action Cards 

### 1. Plagiarism: Read in Monopoly Data
- Get action verbs from monopoly data
- use real action cards for few-shot learning

In [14]:
FILENAME_MONOPOLY = "monopoly_action_cards_keywords.csv"
monopoly_data = pd.read_csv(FILENAME_MONOPOLY, sep=";")

#### 1.1 Keyword Preparation from Monopoly Data

In [15]:
## get pos tags
def preprocess(sent):
    sent = nltk.word_tokenize(sent)
    sent = nltk.pos_tag(sent)
    return sent

## Get action verbs from real monopoly action cards
text_data = ""
for text_item in monopoly_data["content"]:
    text_data += ". " + text_item
    
inspect_actions = preprocess(text_data)

## use only verbs
keyword_list_verbs = []

for pos_tag in inspect_actions:
    if re.match("VB.*", pos_tag[1]):
        if pos_tag[0] == "DM":
            keyword_list_verbs.append("Euro")
        else:
            keyword_list_verbs.append(pos_tag[0])  
        
print("The topic's keyword_list is: ", keyword_list_verbs)

The topic's keyword_list is:  ['Pay', 'take', 'come', '..', 'Go', 'get', '..', 'pays', 'are', 'pays', 'Go', 'receive', 'inherit', 'receive', '..', 'is', 'win', 'won', 'is', '..', 'receives', 'has', 'buy', 'get', 'have', 'been', 'elected', 'Have', 'renovated', 'Euro', 'be', 'called', 'do', 'Pay', 'Do', 'pass', 'collect', 'be', 'released', 'keep', 'need', 'sell', 'Do', 'pass', 'collect', 'be', 'released', 'keep', 'need', 'sell']


Data Source Further Action Words:
https://www.citationmachine.net/resources/grammar-guides/verb/list-verbs/

In [16]:
## POS Tag == "VB.*" from real monopoly action cards


## belongs to DATA INIT
action_verbs_monopoly = ["Pay","Take","Come","Go","Get","Receive","Inherit","Win","Pass",
                         "Collect","being released","Keep","Sell"]
action_verbs = ["Act","Answer","Approve","Arrange","Break","Build","Buy","Coach","Color","Cough","Create", 
                "Complete","Cry","Dance","Describe","Draw","Drink","Eat","Edit","Enter","Exit",
                "Imitate","Invent","Jump","Laugh","Lie","Listen","Paint","Plan","Play","Read","Replace",
                "Run","Scream","See","Shop","Shout","Sing","Skip","Sleep","Sneeze","Solve","Study","Teach",
                "Touch","Turn","Walk","Win","Write","Whistle","Yank","Zip"]



In [72]:
## locations into flat list
locations = ["GO"]
for _, value in new_field["streets"].items():
    for item in value:
        locations.append(item)
for _, value in new_field["special"].items():
    for item in value:
        locations.append(item)
for item in new_field["stations"]:
    locations.append(item)
locations.append(new_field["prison"][0])
locations.append(new_field["free_parking"][0])    

print(locations)

['GO', 'Mr. Nobody Street', 'Deckard Shaw Drive', 'Han Drive', 'Sean Boswell Lane', 'Elena Boulevard', 'Hector Road', 'Owen Shaw Drive', 'Safar Road', 'Jack Lane', 'Samantha Hobbs Alley', 'Letty Fan Park', 'Female Racer Park', 'Race Starter Lane', 'Hot Teacher Drive', 'Doctor Park', 'Merc Tech Road', 'Weapons Tech Lane', 'Dominic Toretto Avenue', "Brian O'Conner Avenue", 'Kiet Drive', 'Kara Drive', 'Walker', 'Los Angeles', 'Letty Station', 'Roman Station', "Tej (as Chris 'Ludacris' Bridges) Station", 'Mia Station', 'Walker', 'Abu Dhabi']


#### 1.2 Few-Shot Learning Training Data Preparation

In [18]:
## generate few shot training data for text generation
prompt_text = ""

for text, keywords in zip(monopoly_data["content"], monopoly_data["keywords"]):
    imd = "key: " + keywords + "\ntweet: " + text + "\n###"
    prompt_text += imd

### 2. Random Keyword Generation for New Action Cards

In [None]:
## global
API_TOKEN = "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"
## self prison

## belongs to DATA INIT
def prepare_generation_prompt(monopoly_data):
    
    generation_prompt_text = ""

    for text, keywords in zip(monopoly_data["content"], monopoly_data["keywords"]):
        imd = "key: " + keywords + "\ntweet: " + text + "\n###"
        generation_prompt_text += imd
    
    return generation_prompt_text

def prepare_sentiment_prompt(monopoly_data):
    
    sent_prompt_text = ""

    for text, sentiment in zip(monopoly_data["content"], monopoly_data["bias"]):
        imd = "Tweet: " + text + "\nSentiment: " + sentiment + "\n###"
        sent_prompt_text += imd
    
    return sent_prompt_text

def keyword_generation():

    first_verb = random.choice(action_verbs_monopoly).lower()
    second_verb = random.choice(action_verbs).lower()
    pronoun = random.choice(pronouns).lower()
    location = random.choice(locations)
    number = 2000
    select_number = random.choice([0,1])

    ## create keyword list for generation
    if select_number == 1:
        keyword_list = [first_verb, second_verb, pronoun, location, number]
    else:
        keyword_list = [first_verb, second_verb, pronoun, location]
    keyword_string = helper_keywords(keyword_list)
    
    return keyword_string


def helper_keywords(keyword_list):
    
    keyword_string = ""

    for item in keyword_list:
        if keyword_string == "":
            keyword_string += str(item)
        else:
            keyword_string += ", " + str(item)
            
    return keyword_string


def query(payload='',
          parameters={'max_new_tokens': 25, 'temperature': 1, 'end_sequence': "###"},
          options={'use_cache': False}):
    
    API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-neo-2.7B"
    headers = {"Authorization": f"Bearer {API_TOKEN}"}
    body = {"inputs":payload,'parameters':parameters,'options':options}
    response = requests.request("POST", API_URL, headers=headers, data= json.dumps(body))
    try:
      response.raise_for_status()
    except requests.exceptions.HTTPError:
        return "Error:"+" ".join(response.json()['error'])
    else:
      return response.json()[0]['generated_text']


def generate_action_cards(action_verbs_monopoly, ## self
                          action_verbs, ## self
                          pronouns=["you","your","yours"], ## self
                          locations, ## self
                          generation_prompt_text, ## self
                          sent_prompt_text, ## self
                          counter=0):
    
    if counter == 0:
        action_cards = []
        counter += 1
        generate_action_cards(counter)
        
    ## stop generation of action cards
    elif counter > 2:
        return action_cards
    
    else:
        ## keyword generation
        keyword_string = keyword_generation()
        ## action card generation
        generation_prompt = generation_prompt_text + "\nkey: " + keyword_string + "\ntweet:"
        data = query(generation_prompt)
        action_card = re.findall(r"(?<=tweet:\s).*", data)[-1] 
        
        ## sentiment classification for action card
        sent_prompt = sent_prompt_text + "\nTweet: " + action_card + "\nSentiment:"
        sentiment = query(sent_prompt)
        action_sentiment = re.findall(r"(?<=Sentiment:\s).*", sentiment)[-1]
        
        ## get reference
        reference = get_reference_for_sentence(location, action_card, action_sentiment)
        
        ## evaluate action card against reference
        score = eval_sentence(reference, action_card)
        
        ## if action card OK, append
        if score >= 0.5:
            actions_cards.append((action_card, action_sentiment))
            counter += 1
            generate_action_cards(counter)
        else:
            counter += 1
            generate_action_cards(counter)
            
        
                 
        

In [19]:
## once locations available, randomly select location


## randomly select verbs, pronouns, locations, fixed number
first_verb = random.choice(action_verbs_monopoly).lower()
second_verb = random.choice(action_verbs).lower()
pronoun = random.choice(["you","your","yours"]).lower()
LOCATION = random.choice(locations)
LOCATION = "Dominic Toretto Avenue"
number = 2000

## special case for prison
if LOCATION == new_field["prison"]:
    keyword_list = [LOCATION, "not pass", "not collect"]

else:
    ## randomly select if second verb, pronoun and location should be considered
    select_second_verb = 1
    select_pronoun = 1
    select_location = 1
    #select_second_verb = random.choice([0,1])
    #select_pronoun = random.choice([0,1])
    #select_location = random.choice([0,1])

    print("\nfirst_verb:", first_verb, "\nsecond_verb:", second_verb, "\npronoun:",  \
          pronoun, "\nLOCATION:",LOCATION, "\nnumber:",number )

    print("\nselect_second_verb:", select_second_verb, "\nselect_pronoun:", select_pronoun, \
          "\nselect_location:",  select_location,)

    if select_second_verb and select_pronoun and select_location:
        keyword_list = [first_verb, second_verb, pronoun, LOCATION]
    elif select_second_verb == 0 and select_pronoun and select_location:
        keyword_list = [first_verb, pronoun, LOCATION]
    elif select_second_verb == 0 and select_pronoun == 0 and select_location:
        keyword_list = [first_verb, LOCATION]
    elif select_second_verb == 0 and select_pronoun == 1 and select_location == 0:
        keyword_list = [first_verb, pronoun, number]
    elif select_second_verb == 0 and select_pronoun == 0 and select_location == 1:
        keyword_list = [first_verb, LOCATION]
    elif select_second_verb == 1 and select_pronoun == 0 and select_location == 0:
        keyword_list = [first_verb, second_verb]
    elif select_second_verb == 1 and select_pronoun == 0 and select_location == 1:
        keyword_list = [first_verb, second_verb, LOCATION]
    elif select_second_verb == 1 and select_pronoun == 1 and select_location == 0:
        keyword_list = [first_verb, second_verb, pronoun]
    elif select_second_verb == 0 and select_pronoun == 0 and select_location == 0:
        keyword_list = [first_verb, number]


keyword_string = ""

for item in keyword_list:
    if keyword_string == "":
        keyword_string += str(item)
    else:
        keyword_string += ", " + str(item)
        
print(keyword_string)


first_verb: go 
second_verb: yank 
pronoun: you 
LOCATION: Dominic Toretto Avenue 
number: 2000

select_second_verb: 1 
select_pronoun: 1 
select_location: 1
go, yank, you, Dominic Toretto Avenue


### 2. Few-Shot Learning Key-to-Text Generation for Action Cards
Source Inference API: https://huggingface.co/blog/few-shot-learning-gpt-neo-and-inference-api

In [20]:
## api token can be generated via free huggingface account
API_TOKEN = "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"

def query(payload='',
          parameters={'max_new_tokens': 25, 'temperature': 1, 'end_sequence': "###"},
          options={'use_cache': False}):
    
    API_URL = "https://api-inference.huggingface.co/models/EleutherAI/gpt-neo-2.7B"
    headers = {"Authorization": f"Bearer {API_TOKEN}"}
    body = {"inputs":payload,'parameters':parameters,'options':options}
    response = requests.request("POST", API_URL, headers=headers, data= json.dumps(body))
    try:
      response.raise_for_status()
    except requests.exceptions.HTTPError:
        return "Error:"+" ".join(response.json()['error'])
    else:
      return response.json()[0]['generated_text']

In [21]:
parameters = {
    'max_new_tokens':25,  # number of generated tokens
    'temperature': 1,   # controlling the randomness of generations
    'end_sequence': "###" # stopping sequence for generation
}

prompt = prompt_text + "\nkey: " + keyword_string + "\ntweet:"


data = query(prompt,parameters)

action_card = re.findall(r"(?<=tweet:\s).*", data)[-1] 
#print(data)
print(keyword_string)
print(action_card)

go, yank, you, Dominic Toretto Avenue
Go into Dominic Toretto from the east. 


### 3. Evaluation of Generated Action Card

- To Do: Which action cards should be used as reference for which input tokens?
- To Do: Regularization List Filter

In [32]:
def pos_distribution(pos_tuples_of_sentence):
    """
    :pos_tuple_of_sentences: tuple (token, pos_tag) as returned from preprocess function
    
    crop pos tags into relevant groups (first two letters)
    count occurences of pos tags in input sentence
    
    :returns: pandas DataFrame with pos_tag and its frequency
    
    """
    pos_df = pd.DataFrame(pos_tuples_of_sentence,columns=["token","long_pos_tag"])
    pos_df["pos_tag"] = [x[0:2] for x in pos_df["long_pos_tag"]]
    freq_df = pos_df["pos_tag"].value_counts()
    
    return freq_df

def eval_compare_structure(reference, new_sentence):

    ## preprocess both
    reference = preprocess(reference)
    new_sentence = preprocess(new_sentence)
    
    ## pos distribution
    reference = pos_distribution(reference)
    new_sentence = pos_distribution(new_sentence)
    
    ## merge vectors
    merged_df = pd.merge(reference,new_sentence,how="outer", left_index=True,right_index=True).fillna(0)
    merged_df.columns=["reference","target"]
    
    ## calc cosine similarity 
    similarity_score = 1 - spatial.distance.cosine(merged_df["reference"], merged_df["target"])
    
    return similarity_score

In [34]:
def eval_compare_lengths(reference, new_sentence):
    
    reference = reference.split(" ")
    new_sentence = new_sentence.split(" ")
    
    len_ref = len(reference)
    len_new = len(new_sentence)
    
    if len_ref >= len_new:
        len_score = len_new/len_ref
    else:
        len_score = len_ref/len_new
    
    return len_score

def score_weighting(similarity_score, len_score, alpha = 0.3):
    
    score = alpha * len_score + (1-alpha) * similarity_score
    
    return score

def eval_sentence(reference, new_sentence):
    
    similarity_score = eval_compare_structure(reference, new_sentence)
    len_score = eval_compare_lengths(reference, new_sentence)
    
    score = score_weighting(similarity_score, len_score)
    
    return score
    

def get_reference_for_sentence(location, new_sentence, sentiment): ## monopoly_data):
    
    if location == PRISON:
        if sentiment == "positiv":
            reference = "You will be released from prison! You must keep this card until you need it or sell it."
        else:
            reference = "Go to the prison! Go directly there. Do not pass Go. Do not collect DM 4000,-."
    else:
        
        if sentiment == "positiv":
            pos_data = monopoly_data['content'][monopoly_data['bias'] == "positiv"]
            pos_data = list(pos_data)
            reference = random.choice(pos_data)
        elif sentiment == "negativ":
            neg_data = monopoly_data['content'][monopoly_data['bias'] == "negativ"]
            neg_data = list(neg_data)
            reference = random.choice(neg_data)
        elif sentiment == "neutral":
            neu_data = monopoly_data['content'][monopoly_data['bias'] == "neutral"]
            neu_data = list(neu_data)
            reference = random.choice(neu_data)
    
    return reference
    

In [103]:
some_data = monopoly_data['content'][monopoly_data['bias'] == "positiv"]
#type(some_data)
some_data = list(some_data)
type(some_data)
print(random.choice(some_data))
#test = some_data.sample
#print(some_data)

You inherit: DM 2000,-.


In [99]:
test

<bound method NDFrame.sample of 7               The bank pays you a dividend DM 1000,-.
8     Rent and bond interest are due. The bank pays ...
10    You receive a 7% dividend on preferred shares....
11                              You inherit: DM 2000,-.
12              From stock sales you receive: DM 500,-.
13           The annual annuity is due. Draw DM 2000,-.
14    You win a crossword puzzle contest. Draw DM 20...
15            Bank error in your favor. Draw DM 4000,-.
16                    Income tax refund. Draw DM 400,-.
17    You won the 2nd prize in a beauty contest. Dra...
18    It is your birthday. Collect DM 1000,- from ea...
29    You will be released from prison! You must kee...
31    You will be released from prison! You must kee...
Name: content, dtype: object>

In [54]:
reference = "Go back to Badstraße."

In [24]:
action_card = "Go into Dominic Toretto Avenue from the east. "

'Go into Dominic Toretto from the east. '

In [35]:
eval_sentence(reference, action_card)

(0.4837119062359573, 0.4767312946227962, 0.5)

In [36]:
reference = reference.split(" ")
action_card = action_card.split(" ")


In [37]:
reference

['Go', 'back', 'to', 'Badstraße.']

In [38]:
len(reference)

4

In [44]:
action_card = "Go into Dominic Toretto from the east. "

In [42]:
## generate few shot training data for sentiment classification
sent_prompt_text = ""

for text, sentiment in zip(monopoly_data["content"], monopoly_data["bias"]):
    imd = "Tweet: " + text + "\nSentiment: " + sentiment + "\n###"
    sent_prompt_text += imd

In [59]:
parameters = {
    'max_new_tokens':25,  # number of generated tokens
    'temperature': 1,   # controlling the randomness of generations
    'end_sequence': "###" # stopping sequence for generation
}

sent_prompt = sent_prompt_text + "\nTweet: " + action_card + "\nSentiment:"


sentiment = query(sent_prompt,parameters)

#action_card = re.findall(r"(?<=tweet:\s).*", data)[-1] 
#print(data)
print(action_card)
#print(action_card)

Go to this street. Go direct to the bank. Pass GO and collect DM 1000,-.


In [60]:
print(sentiment)

Tweet: Pay a fine of DM 200,- or take a community ticket.
Sentiment: negativ
###Tweet: Move up to Seestrasse. 
If you come over Go, collect DM 4000,-.
Sentiment: neutral
###Tweet: Go back 3 fields.
Sentiment: neutral
###Tweet: Go back to Badstraße.
Sentiment: neutral
###Tweet: Move forward to Schlossallee.
Sentiment: neutral
###Tweet: Move forward to Go.
Sentiment: neutral
###Tweet: Make a trip to the south station. When you get over Go, draw DM 4000,-.
Sentiment: neutral
###Tweet: The bank pays you a dividend DM 1000,-.
Sentiment: positiv
###Tweet: Rent and bond interest are due. The bank pays you DM 3000,-.
Sentiment: positiv
###Tweet: Move forward to Go.
Sentiment: neutral
###Tweet: You receive a 7% dividend on preferred shares. DM 900,-.
Sentiment: positiv
###Tweet: You inherit: DM 2000,-.
Sentiment: positiv
###Tweet: From stock sales you receive: DM 500,-.
Sentiment: positiv
###Tweet: The annual annuity is due. Draw DM 2000,-.
Sentiment: positiv
###Tweet: You win a crossword puzzl

In [61]:
action_sentiment = re.findall(r"(?<=Sentiment:\s).*", sentiment)[-1]

In [74]:
print(action_sentiment)
print(action_card)

negativ
Go to this street. Go direct to the bank. Pass GO and collect DM 1000,-.


In [77]:
## go to location
for location in locations:
    if location in action_card:
        go_to_location = location



GO


In [79]:
m = re.search('[0-9]+', action_card)
print(m.group(0))

1000


In [86]:
action_card_2 = "There is 1000 number, what is the return value"
m = re.search('[0-9]+', action_card_2)

try: m.group(0)
except AttributeError:
    number = None
else:
    number = m.group(0)
    
print(number)

1000


In [87]:
def action_from_action_card(action_card):
    
    ## search action card for locations
    for location in locations:
        if location in action_card:
            go_to_location = location
    
    ## extract number, if existing
    m = re.search('[0-9]+', action_card)
    try: m.group(0)    
    except AttributeError:
        number = None
    else:
        number = m.group(0)
        
        
    return go_to_location, number
    
    

In [None]:
## action card structure
##(action_card, sentiment)

In [88]:
action_from_action_card(action_card)

('GO', '1000')

In [71]:
for location in locations:
    print(location)

Mr. Nobody Street
Deckard Shaw Drive
Han Drive
Sean Boswell Lane
Elena Boulevard
Hector Road
Owen Shaw Drive
Safar Road
Jack Lane
Samantha Hobbs Alley
Letty Fan Park
Female Racer Park
Race Starter Lane
Hot Teacher Drive
Doctor Park
Merc Tech Road
Weapons Tech Lane
Dominic Toretto Avenue
Brian O'Conner Avenue
Kiet Drive
Kara Drive
Walker
Los Angeles
Letty Station
Roman Station
Tej (as Chris 'Ludacris' Bridges) Station
Mia Station
Walker
Abu Dhabi


In [55]:
action_card = "Go to this street. Go direct to the bank. Pass GO and collect DM 1000,-."

In [56]:
eval_sentence(reference, action_card)
#print(action_card)

(0.623829017247149, 0.7768985960673559, 0.26666666666666666)

In [57]:
print(action_card)

Go to this street. Go direct to the bank. Pass GO and collect DM 1000,-.


In [58]:
print(reference)

Go back to Badstraße.


In [106]:
locations

['GO',
 'Mr. Nobody Street',
 'Deckard Shaw Drive',
 'Han Drive',
 'Sean Boswell Lane',
 'Elena Boulevard',
 'Hector Road',
 'Owen Shaw Drive',
 'Safar Road',
 'Jack Lane',
 'Samantha Hobbs Alley',
 'Letty Fan Park',
 'Female Racer Park',
 'Race Starter Lane',
 'Hot Teacher Drive',
 'Doctor Park',
 'Merc Tech Road',
 'Weapons Tech Lane',
 'Dominic Toretto Avenue',
 "Brian O'Conner Avenue",
 'Kiet Drive',
 'Kara Drive',
 'Walker',
 'Los Angeles',
 'Letty Station',
 'Roman Station',
 "Tej (as Chris 'Ludacris' Bridges) Station",
 'Mia Station',
 'Walker',
 'Abu Dhabi']