## Importing tools

In [4]:
import numpy as np
import pandas as pd
import os
import re
import string
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import random

## Reading every Sherlock Holmes adventure!

In [5]:
story_path = "./sherlock/"

def read_all_stories(story_path):
    txt = []
    for _, _, files in os.walk(story_path):
        for file in files:
            with open(story_path+file) as f:
                print(file)
                for line in f:
                    line = line.strip()
                    if line=='----------': break
                    if line!='':txt.append(line)
    return txt
        
stories = read_all_stories(story_path)
print("number of lines = ", len(stories))

glor.txt
lstb.txt
empt.txt
mems.txt
retn.txt
norw.txt
blac.txt
3gar.txt
spec.txt
danc.txt
blue.txt
bosc.txt
reti.txt
gree.txt
cano.txt
sign.txt
3gab.txt
shos.txt
bery.txt
soli.txt
cree.txt
resi.txt
reig.txt
prio.txt
card.txt
bruc.txt
nobl.txt
redc.txt
seco.txt
vall.txt
stoc.txt
iden.txt
lion.txt
copp.txt
nava.txt
sixn.txt
case.txt
3stu.txt
chas.txt
redh.txt
miss.txt
advs.txt
abbe.txt
twis.txt
wist.txt
cnus.txt
engr.txt
suss.txt
stud.txt
silv.txt
gold.txt
scan.txt
maza.txt
dyin.txt
lady.txt
devi.txt
croo.txt
houn.txt
musg.txt
illu.txt
five.txt
yell.txt
blan.txt
fina.txt
last.txt
thor.txt
veil.txt
number of lines =  215021


## Cleaning the text

In [6]:
def clean_txt(txt):
    cleaned_txt = []
    for line in txt:
        line = line.lower()
        line = re.sub(r"[,.\"\'!@#$%^&*(){}?/;`~:<>+=-\\]", "", line)
        tokens = word_tokenize(line)
        words = [word for word in tokens if word.isalpha()]
        cleaned_txt+=words
    return cleaned_txt

cleaned_stories = clean_txt(stories)
print("number of words = ", len(cleaned_stories))

number of words =  2332247


## Creating the Markov Model

In [7]:
def make_markov_model(cleaned_stories, n_gram=2):
    markov_model = {}
    for i in range(len(cleaned_stories)-n_gram-1):
        curr_state, next_state = "", ""
        for j in range(n_gram):
            curr_state += cleaned_stories[i+j] + " "
            next_state += cleaned_stories[i+j+n_gram] + " "
        curr_state = curr_state[:-1]
        next_state = next_state[:-1]
        if curr_state not in markov_model:
            markov_model[curr_state] = {}
            markov_model[curr_state][next_state] = 1
        else:
            if next_state in markov_model[curr_state]:
                markov_model[curr_state][next_state] += 1
            else:
                markov_model[curr_state][next_state] = 1
    
    # calculating transition probabilities
    for curr_state, transition in markov_model.items():
        total = sum(transition.values())
        for state, count in transition.items():
            markov_model[curr_state][state] = count/total
        
    return markov_model

In [8]:
markov_model = make_markov_model(cleaned_stories)

In [9]:
print("number of states = ", len(markov_model.keys()))

number of states =  208717


In [10]:
print("All possible transitions from 'the game' state: \n")
print(markov_model['the game'])

All possible transitions from 'the game' state: 

{'is up': 0.06306306306306306, 'is and': 0.036036036036036036, 'was afoot': 0.036036036036036036, 'for the': 0.036036036036036036, 'was whist': 0.036036036036036036, 'would have': 0.036036036036036036, 'in their': 0.036036036036036036, 'was up': 0.09009009009009009, 'in that': 0.036036036036036036, 'the lack': 0.036036036036036036, 'for all': 0.06306306306306306, 'is afoot': 0.036036036036036036, 'was in': 0.02702702702702703, 'is hardly': 0.02702702702702703, 'may wander': 0.02702702702702703, 'now a': 0.02702702702702703, 'my own': 0.02702702702702703, 'at any': 0.02702702702702703, 'mr holmes': 0.02702702702702703, 'ay whats': 0.02702702702702703, 'my friend': 0.02702702702702703, 'fairly by': 0.02702702702702703, 'is not': 0.02702702702702703, 'was not': 0.02702702702702703, 'worth it': 0.02702702702702703, 'you are': 0.02702702702702703, 'i am': 0.02702702702702703, 'now count': 0.02702702702702703, 'your letter': 0.027027027027027

## Generating Sherlock Holmes stories!

In [11]:
def generate_story(markov_model, limit=100, start='my god'):
    n = 0
    curr_state = start
    next_state = None
    story = ""
    story+=curr_state+" "
    while n<limit:
        next_state = random.choices(list(markov_model[curr_state].keys()),
                                    list(markov_model[curr_state].values()))
        
        curr_state = next_state[0]
        story+=curr_state+" "
        n+=1
    return story

In [12]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="dear holmes", limit=8))

0.  dear holmes you are uttering libel in the dead mans what do you think that they know so 
1.  dear holmes if i remember right who was to tell you that if you come down and spend 
2.  dear holmes i ejaculated no for my steve you are talking about him what will he knew well 
3.  dear holmes what do you make of it standing in a considerable share in clearing the matter up 
4.  dear holmes i have no one to be dazzled by the sudden swirl round of the reminiscences of 
5.  dear holmes said i ivy lane brixton he answered but women were taking down shutters and that i 
6.  dear holmes i ejaculated precisely so head attendant at the last said he and his companion with those 
7.  dear holmes you are then catch a fine one said holmes the train has been broken unwelcome truths 
8.  dear holmes said i when should i take it in his collection speaking of your papers for a 
9.  dear holmes am i accused of asked mcmurdo of being in this way we had gone up to 
10.  dear holmes i thought at first it w

In [13]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="my dear", limit=8))

0.  my dear sir knowing the vindictive character of its own indeed apart from the one who wishes to 
1.  my dear fellow it would be a colossal task she may have bolted during the struggle and was 
2.  my dear watson which he handed it back you say yes sir my own mind that all we 
3.  my dear sir if a document of immense value he asked not at all save the police some 
4.  my dear fellow i cried approaching him stand back stand right back said he laughing and pointing to 
5.  my dear fellow i congrat to milvertons housemaid good heavens holmes do you dismiss my case well mr 
6.  my dear watson i could thats what worries him so only i sent in their stead and we 
7.  my dear sir such a thing is the sequel of our investigation by a very tall handsome man 
8.  my dear sir said he there are a few inferences which are entirely its own even now we 
9.  my dear holmes i exclaimed how on earth did you think of anything else yes i admit it 
10.  my dear watson i was just one of murder and no trace

In [14]:
for i in range(20):
    print(str(i)+". ", generate_story(markov_model, start="i would", limit=8))

0.  i would be after standing back from the lower portion reserved for their dependents at one end and 
1.  i would have been trying some experiments after you left i sent a duplicate key been found in 
2.  i would willingly do so but apparently found no trace could be the object of his marriage about 
3.  i would refer you to mr browner a few days and which might lead to other methods what 
4.  i would have gone away leaving all his words first said the colonel in of the questions and 
5.  i would have thought it criminal had he deduced what i would find that mr neil gibson has 
6.  i would wish to know whether you observed whether it was not holmess nature to take that half 
7.  i would take my word for it and burst out of the wall under which its splintered fragments 
8.  i would not miss harrison here for example there is nothing new under the shelter of a boulder 
9.  i would have had the slightest attraction to me for a moment later he was in actual professional 
10.  i would mak

In [15]:
print(generate_story(markov_model, start="the case", limit=100))

the case is clear enough certainly but how came the beast to get loose is it watson there is something more solid i have no doubt jackson would take my own steps to attain to some other place where it had snapped the top with her when she did it differ from any other time eh the woman at margate whom i should judge it was admirably done that it was carried out by the same train which connects the long line of fiery and masterful progress of the night when the calls three times it is well and if you will be in him at all until i have come prepared to do so in a free present of interest to me the impression of deformity but the face on getting into the room and so was a corporal who came into the affair not so fast as that but of course i remember it just as we had been standing listening at the half opened door and now it is essential essential i say that your disturbance dates from the house surely your wife feared such an attack she saw who it was it before you can get no news 
