# Text Generator

We are going to develop a text generator based on Markov Chains, and try different inputs and hiperparameters in order to achieve concrete outcomes that help us understand the possibilities of this technique. 

First we are going to model the problem with some classes in order to make the development easier.

In [None]:
from numpy.random import choice

class Word:
    def __init__(self, word):
        self.word = word
        self.count_following_words = 0
        self.following_words = {}
        
    def __repr__(self):
        return self.word
    
    def __str__(self):
        return self.word
    
    def __hash__(self):
        return hash(self.word)
    
    def __eq__(self, other):
        return type(other) == Word and self.word == other.word
    
    def __ne__(self, other):
        return type(other) != Word or self.word != other.word
        
    def add_following_word(self, other_word):
        self.following_words[other_word] = self.following_words.get(other_word, 0) + 1
        self.count_following_words += 1
        
    def generate_next_word(self):
        next_words = list(self.following_words.keys())
        next_words_prob = list(map(lambda x: x / self.count_following_words, list(self.following_words.values())))
        if len(next_words) == 0:
            return None
        return choice(next_words, 1, p=next_words_prob)[0]
    
    
class TextGenerator:
    def __init__(self, text):
        self.words_dict = {}
        self.words_list = []
        self.punctuation = "'!()-[]{};:\"\,<>./?@#$%^&*_~"
        self.text = text
        
    def process_text(self):
        self.split_words()
        self.calculate_probabilities()
        
    def split_words(self):
        words = self.text.split()
        for word in words:
            self.words_dict[word] = self.words_dict.get(word, Word(self.curate_word(word)))
            self.words_list.append(self.words_dict[word])
        
    def calculate_probabilities(self):
        for i in range(len(self.words_list) - 1):
            self.words_list[i].add_following_word(self.words_list[i + 1])

    def curate_word(self, word):
        word = word.lower()
        curated_word = ""
        for c in word:
            if c not in self.punctuation:
                curated_word += c
        return curated_word
    
    def generate_text(self, length):
        self.debug()
        generated_text = [self.pick_random_word()]
        for i in range(length - 1):
            next_word = generated_text[i].generate_next_word()
            if next_word == None:
                next_word = self.pick_random_word()
                print("RANDOM: " + str(next_word))
            generated_text.append(next_word)
        return ' '.join(list(map(lambda x: x.word, generated_text)))
        
    def pick_random_word(self):
        return choice(list(self.words_dict.values()))
    
    def debug(self):
        for word in self.words_dict.values():
            print(word.word + " " + str(word.following_words))

In [None]:
textGenerator = TextGenerator('1 22 1 3 1')
textGenerator.process_text()
textGenerator.generate_text(10)