In [124]:
import re
import random

from collections import Counter

In [125]:
path = "./corpora/bible.txt"
text: str

with open(path, "r", encoding="utf-8") as file:
    text = file.read().lower()

text[:10_000]

'1:1 in the beginning god created the heaven and the earth.\n\n1:2 and the earth was without form, and void; and darkness was upon\nthe face of the deep. and the spirit of god moved upon the face of the\nwaters.\n\n1:3 and god said, let there be light: and there was light.\n\n1:4 and god saw the light, that it was good: and god divided the light\nfrom the darkness.\n\n1:5 and god called the light day, and the darkness he called night.\nand the evening and the morning were the first day.\n\n1:6 and god said, let there be a firmament in the midst of the waters,\nand let it divide the waters from the waters.\n\n1:7 and god made the firmament, and divided the waters which were\nunder the firmament from the waters which were above the firmament:\nand it was so.\n\n1:8 and god called the firmament heaven. and the evening and the\nmorning were the second day.\n\n1:9 and god said, let the waters under the heaven be gathered together\nunto one place, and let the dry land appear: and it was so.\

In [126]:
def extract_words(text: str) -> list[str]:
    word_re_pat = re.compile(r"\w+")
    return re.findall(word_re_pat, text)

words = extract_words(text)
words[:10]  # Display the first 10 words for verification

['1', '1', 'in', 'the', 'beginning', 'god', 'created', 'the', 'heaven', 'and']

In [127]:
def find_successors(word: str, corpus: list[str]) -> dict[str, float]:
    counter = Counter()
    
    for i, w in enumerate(corpus[:-1]):
        if w == word:
            counter[corpus[i+1]] += 1

    total = counter.total()
    probabilities = {k:v/total for k, v in counter.items()}
            
    return probabilities

d = find_successors(words[123], words)
d

{'1': 0.010452961672473868,
 'and': 0.10104529616724739,
 'from': 0.006968641114982578,
 'which': 0.020905923344947737,
 'under': 0.003484320557491289,
 'called': 0.003484320557491289,
 'bring': 0.003484320557491289,
 'brought': 0.003484320557491289,
 'in': 0.04529616724738676,
 'upon': 0.006968641114982578,
 'was': 0.003484320557491289,
 'of': 0.16376306620209058,
 'increased': 0.003484320557491289,
 'prevailed': 0.010452961672473868,
 '7': 0.003484320557491289,
 'prevail': 0.003484320557491289,
 'asswaged': 0.003484320557491289,
 'returned': 0.006968641114982578,
 'were': 0.0627177700348432,
 'decreased': 0.003484320557491289,
 'shall': 0.0313588850174216,
 'that': 0.041811846689895474,
 'may': 0.006968641114982578,
 '15': 0.003484320557491289,
 'the': 0.020905923344947737,
 '16': 0.003484320557491289,
 'whatsoever': 0.003484320557491289,
 'they': 0.017421602787456445,
 '24': 0.003484320557491289,
 'beneath': 0.006968641114982578,
 '10': 0.003484320557491289,
 'all': 0.00348432055749

In [128]:
def train_model(corpus: list[str]) -> dict[str, dict[str, float]]:
    mem = {}
    for word in corpus:
        if not word in mem:
            mem[word] = find_successors(word, corpus)
    return mem

In [129]:
model = train_model(words)

In [130]:

def predict_next(word: str, model: dict[str, dict[str, float]], temperature: int = 10) -> str:

    assert 0 < temperature <= 10, f"Temeprature must be between 1 and 10"

    possibilities = model.get(word)

    if possibilities is None:
        return "EOF"

    top_words = sorted(possibilities.items(), key=lambda x: x[1], reverse=True)[:temperature]
    
    words, probs = zip(*top_words)
    total = sum(probs)
    normalized_probs = [p/total for p in probs]
    
    return random.choices(words, weights=normalized_probs, k=1)[0]

In [133]:
token = input("Start Chat: ").lower()
while token != "EOF":
    print(token)
    token = predict_next(token, model, temperature=10)


adam
to
pass
in
the
lord
said
unto
me
and
he
shall
be
as
he
that
which
the
city
and
the
lord
of
the
lord
thy
name
there
be
the
lord
hath
given
it
and
i
pray
thee
with
thee
and
they
have
no
more
for
i
am
he
shall
be
in
the
lord
shall
be
for
ever
136
5
for
thou
wilt
hear
ye
shall
not
of
them
with
him
and
he
that
they
shall
be
of
the
sons
with
her
and
i
will
not
the
earth
and
to
the
land
of
the
land
of
the
lord
and
he
said
it
shall
be
the
lord
god
shall
not
be
put
the
lord
said
unto
the
children
and
they
are
not
the
lord
and
he
will
make
an
end
that
thou
hast
given
you
all
things
shall
have
heard
the
house
of
the
lord
thy
god
is
the
king
and
to
me
in
his
hand
and
the
lord
and
to
be
of
the
people
and
the
city
and
he
that
the
people
of
the
children
of
my
name
was
not
to
the
lord
your
fathers
of
the
lord
shall
be
put
them
which
the
king
of
my
name
of
judah
was
in
this
day
of
the
city
and
he
shall
the
people
israel
for
they
shall
go
and
to
the
lord
hath
spoken
unto
the
lord
hath
not
the
lord


KeyboardInterrupt: 