In [9]:
import pandas as pd #might want to use it
import numpy as np #working with arrays/matrices after all! 
import random #random choice for bot's first word
from sklearn.metrics.pairwise import cosine_similarity
import convergence_resources as cr

# Using GloVe

I used word vectors from a pre-trained set of data consisting of <a href="https://catalog.ldc.upenn.edu/LDC2011T07">Gigawords</a> and Wikimedia dumps.



In [2]:
def load_glove_model(file):
    print("Loading...")
    f = open(file,'r')
    model = {}
    for line in f:
        whole_line = line.split()
        words = whole_line[0]
        embedding = np.array([float(val) for val in whole_line[1:]])
        model[words] = embedding
    print("Done.",len(model)," words loaded!")
    return model

In [3]:
model = load_glove_model('glove.6B/glove.6B.300d.txt')

Loading...
Done. 400000  words loaded!


# Common English Words

Create a list of 5,000 common english words from http://www.wordfrequency.info. 

In [None]:
#old file of 3000 words:
# with open("common_words.txt") as f:
#     cwordlist = []
#     for line in f:
#         cwordlist.append(line.strip().lower().replace('-',''))

In [10]:
pd.read_csv('5000_words.csv')

Unnamed: 0,Rank,Word,Part of speech,Frequency,Dispersion
0,,,,,
1,1.0,the,a,22038615.0,0.98
2,2.0,be,v,12545825.0,0.97
3,3.0,and,c,10741073.0,0.99
4,4.0,of,i,10343885.0,0.97
5,5.0,a,a,10144200.0,0.98
6,6.0,in,i,6996437.0,0.98
7,7.0,to,t,6332195.0,0.98
8,8.0,have,v,4303955.0,0.97
9,9.0,to,i,3856916.0,0.99


In [4]:
common_word_vectors = {}
unvectorized_common_words = []
for word in cwordlist:
    if word in model.keys():
        common_word_vectors[word] = model[word]
    else:
        unvectorized_common_words.append(word)

In [6]:
#reshape the dictionary arrays 
common_word_vectors['truth'].shape

for key, value in common_word_vectors.items():
    common_word_vectors[key] = value.reshape(1,-1)
    
for key, value in model.items():
    model[key] = value.reshape(1,-1)

common_word_vectors['truth'].shape

(1, 300)

# Create Functions

In [7]:
def converge(user_input=str, bot_input=str, exclude=None):
    """Return the "average word" of the input words."""
    if exclude is None:
        exclude = set()
    exclude.add(user_input)
    exclude.add(bot_input)  
    mean_vector = ((model[user_input] + model[bot_input])/2)
    cos_sim_dict = {}
    response_options_dict = {key: common_word_vectors[key] for key in common_word_vectors 
                             if key not in exclude}
    for word, vector in response_options_dict.items():
        cos_sim_dict[float(cosine_similarity(mean_vector, vector))] = word
    max_cos_sim = max(cos_sim_dict.keys())
    bot_response = cos_sim_dict[max_cos_sim]
    return bot_response

In [8]:
def play_round(user_input, user_history=None, bot_history=None):
    if bot_history is None:
        user_history = []
        bot_history = []
        bot_response = random.choice(cwordlist)
    else:
        bot_response = converge(user_history[-1], bot_history[-1], 
                                exclude=set(user_history + bot_history))
    user_history.append(user_input)
    bot_history.append(bot_response)
    return {
        'user_history': user_history,
        'bot_history': bot_history,
        'bot_response': bot_response,
    }

In [38]:
play_round('babysit', user_history=['evergreen', 'go', 'lucky', 'get', 'to', 'want', 
                                    'present', 'party', 'festive', 'inauguration', 'trump', 'anger', 
                                    'divorce', 'depression', 'counseling', 'doctor', 'nurse', 'care', 'daycare'], 
           bot_history=['ok', 'happy', 'come', 'know', "n't", 'you', 'birthday', 'celebrate', 'cheating' 
                        'celebration', 'occasion', 'opposition', 'protest', 'ceremony', 'wedding', 'frustration',
                        'anxiety', 'marriage', 'birth', 'pregnancy', 'pregnant', 'delivery', 'baby', 'child'])

{'user_history': ['evergreen',
  'go',
  'lucky',
  'get',
  'to',
  'want',
  'present',
  'party',
  'festive',
  'inauguration',
  'trump',
  'anger',
  'divorce',
  'depression',
  'counseling',
  'doctor',
  'nurse',
  'care',
  'daycare',
  'babysit'],
 'bot_history': ['ok',
  'happy',
  'come',
  'know',
  "n't",
  'you',
  'birthday',
  'celebrate',
  'cheatingcelebration',
  'occasion',
  'opposition',
  'protest',
  'ceremony',
  'wedding',
  'frustration',
  'anxiety',
  'marriage',
  'birth',
  'pregnancy',
  'pregnant',
  'delivery',
  'baby',
  'child',
  'infant'],
 'bot_response': 'infant'}