In [None]:
from keras.models import Sequential
from keras.layers import Dense

In [None]:
import numpy as np
import sys, os, string, random

In [None]:
characters = string.printable
char_indices = dict((c, i) for i, c in enumerate(characters))
indices_char = dict((i, c) for i, c in enumerate(characters))

In [None]:
INPUT_VOCAB_SIZE = len(characters)
WINDOW_SIZE = 3

In [None]:
def encode_one_hot(line):
  line = " " + line + " "
  x = np.zeros((len(line), INPUT_VOCAB_SIZE))
  for i, c in enumerate(line):
    index = char_indices[c] if c in characters else char_indices[' ']
    x[i][index] = 1
  return x

In [None]:
def decode_one_hot(x):
  s = []
  for onehot in x:
    one_index = np.argmax(onehot)
    s.append(indices_char[one_index])
  return ''.join(s)

In [None]:
def prepare_for_window(x):
  ind = [np.array(np.arange(i, i+WINDOW_SIZE)) for i in range(x.shape[0] - WINDOW_SIZE + 1)]
  ind = np.array(ind, dtype=np.int32)
  x_window = x[ind]
  return x_window.reshape(x_window.shape[0], x_window.shape[1]*x_window.shape[2])

In [None]:
def normalization_layer_set_weights(n_layer):
  wb = []
  w = np.zeros((WINDOW_SIZE*INPUT_VOCAB_SIZE, INPUT_VOCAB_SIZE))
  b = np.zeros((INPUT_VOCAB_SIZE))

  for c in string.ascii_lowercase:
    i = char_indices[c]
    w[INPUT_VOCAB_SIZE+i, i] = 1

  for c in string.ascii_uppercase:
    i = char_indices[c]
    il = char_indices[c.lower()]
    w[INPUT_VOCAB_SIZE+i, il] = 1

  sp_idx = char_indices[' ']
  non_letters = [c for c in list(characters) if c not in list(string.ascii_letters)]

  for c in non_letters:
    i = char_indices[c]
    w[INPUT_VOCAB_SIZE+i, sp_idx] = 1

  for c in non_letters:
    i = char_indices[c]
    w[i, sp_idx] = 0.75
    w[INPUT_VOCAB_SIZE*2+i, sp_idx] = 0.75

  wb.append(w)
  wb.append(b)
  n_layer.set_weights(wb)
  return n_layer

In [None]:
def build_model():
  model = Sequential()
  model.add(Dense(INPUT_VOCAB_SIZE,
                  input_shape=(WINDOW_SIZE*INPUT_VOCAB_SIZE,),
                  activation='softmax'))
  return model

In [None]:
model = build_model()

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
Total params: 30100 (117.58 KB)
Trainable params: 30100 (117.58 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
normalization_layer_set_weights(model.layers[0])

<keras.src.layers.core.dense.Dense at 0x7a26dd01f9d0>

In [None]:
!wget https://www.gutenberg.org/cache/epub/46144/pg46144.txt

--2024-03-31 17:42:32--  https://www.gutenberg.org/cache/epub/46144/pg46144.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 125798 (123K) [text/plain]
Saving to: ‘pg46144.txt.2’


2024-03-31 17:42:32 (1.73 MB/s) - ‘pg46144.txt.2’ saved [125798/125798]



In [None]:
!head pg46144.txt > pg46144_head.txt

In [None]:
with open('pg46144_head.txt') as f:
  for line in f:
    if line.isspace(): continue
    batch = prepare_for_window(encode_one_hot(line))
    preds = model.predict(batch)
    normal = decode_one_hot(preds)
    print(normal, " /// ",line)

 the project gutenberg ebook of six cups of coffee   ///  ﻿The Project Gutenberg eBook of Six Cups of Coffee

this ebook is for the use of anyone anywhere in the united states and   ///  This ebook is for the use of anyone anywhere in the United States and

most other parts of the world at no cost and with almost no restrictions   ///  most other parts of the world at no cost and with almost no restrictions

whatsoever  you may copy it  give it away or re use it under the terms   ///  whatsoever. You may copy it, give it away or re-use it under the terms

of the project gutenberg license included with this ebook or online   ///  of the Project Gutenberg License included with this ebook or online

at www gutenberg org  if you are not located in the united states    ///  at www.gutenberg.org. If you are not located in the United States,

you will have to check the laws of the country where you are located   ///  you will have to check the laws of the country where you are located

before