In [None]:
### Michelle Kouba
### DSC 550
### Week 10 (Assignments 11 and 12)

In [None]:
# Mounting my drive
import os
from google.colab import drive
drive.mount('/content/drive', force_remount = True)
os.chdir('/content/drive/My Drive/GitHub/dsc650/dsc650/assignments/assignment10')
!pwd

Mounted at /content/drive
/content/drive/My Drive/GitHub/dsc650/dsc650/assignments/assignment10


In [None]:
# Importing needed libraries
import pandas as pd
import numpy as np
import keras
from keras import layers
import tensorflow as tf
import sys
import random
import matplotlib.pyplot as plt
import tensorflow.compat.v1.keras.backend as K
tf.compat.v1.disable_eager_execution()
from keras.datasets import mnist
from tensorflow.keras.models import Model
from pathlib import Path
from scipy.stats import norm

# LSTM Text Generator

In [None]:
# Train the model on desired text.
# Importing data (8.3).
path = keras.utils.get_file('nietzsche.txt', origin = 'https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path). read().lower()
print('Corpus length of Nietzsche text is:', len(text))


Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length of Nietzsche text is: 600893


In [None]:
# Vectorizing sequences of characters (8.3)
# Extracting sequences of 60 characters.
maxlen = 60
# Sampling a new sequence every three characters.
steps = 3
# Placeholder for extracted sequences
sentences = []
# Holds the targets (the follow-up characters)
next_chars = []
# Iterating over the Nietzsche text, creating sequences of length `maxlen` with a step size of `step`
for i in range(0, len(text) - maxlen, steps):
  # Appending the sequence to sentences
  sentences.append(text[i: i + maxlen])
  # Appending the next character to the next_chars
  next_chars.append(text[i + maxlen])
print('Number of sequences:', len(sentences))
chars = sorted(list(set(text)))
print('Unique characters in Nietzsche text:', len(chars))
char_indices = dict((char, chars.index(char)) for char in chars)
# One-hot encodes the characters into binary arrays
print('Vectorization')
# Initializing the input and output arrays
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=bool)
y = np.zeros((len(sentences), len(chars)), dtype=bool)
for i, sentence in enumerate(sentences):
  for t, char in enumerate(sentence):
    # One-hot encoding each character in the input array
    x[i, t, char_indices[char]] = 1
  # One-hot encoding the next character in the output array
  y[i, char_indices[next_chars[i]]] = 1


Number of sequences: 200278
Unique characters in Nietzsche text: 57
Vectorization


In [None]:
# Building the network
#Single layer LSTM model for next-character prediction (8.4)
# Defining the Keras Sequential model
model = keras.models.Sequential()
# Add an LSTM layer
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
# Add a Dense layer
model.add(layers.Dense(len(chars), activation='softmax'))
# Model compiliation configuration (8.5)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

# Function to sample the next character given the model's prediction (8.6)
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

# Text generation loop (8.7)
for epoch in range(1, 60):
    print('\nepoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    for temperature in [0.2, 0.5, 1.0, 1.2]:
      print('------ temperature:', temperature)
      sys.stdout.write(generated_text)
      for i in range(400):
        sampled = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(generated_text):
          sampled[0, t, char_indices[char]] = 1.
        preds = model.predict(sampled, verbose=0)[0]
        next_index = sample(preds, temperature)
        next_char = chars[next_index]
        generated_text += next_char
        generated_text = generated_text[1:]
        sys.stdout.write(next_char)


epoch 1
--- Generating with seed: "s wandering of the reason and
the imagination, one ceases to"
------ temperature: 0.2
s wandering of the reason and
the imagination, one ceases to the sense of mankind the sense which the was a strict the great of the free of the sense of the sense of the sense of the sense of the ear has the sense of the sense of the comple the self the sense of the sense of the was the same the free the free of the each a pares the sense of the sense of the same the sich the sense of the self and the man be the sense of the sense of the sense of the sense------ temperature: 0.5
 the man be the sense of the sense of the sense of the sense of the compless and self the free the wast the great conciments of the was the spropent to has belome of a manting there is the manding the like the formed which is a manding
and whre such and the experience of the
exprations of the stith the aris of necessing as the sense and any the complance of the master the same the gate in th

In [None]:
# Using the trained model to generate 20 examples from random starting points.
for i in range(0, 20):
  print(f'\nExample of Generated Text #{i+1}:\n')

  # Selecting a random text starting point
  start_index = random.randint(0, len(text) - maxlen - 1)
  generated_text = text[start_index: start_index + maxlen]

  # Iterating with different temperatures
  for temperature in [0.2, 0.5, 1.0, 1.2]:
    print('\nTemperature = ', temperature, '\n')
    # Print
    sys.stdout.write(generated_text)
    # Generating first 100 characters
    for i in range(400):
      # One-hot encode the generated characters so far
      sampled = np.zeros((1, maxlen, len(chars)))
      for t, char in enumerate(generated_text):
        sampled[0, t, char_indices[char]] = 1.

      # Predicting the next character using the trained model
      preds = model.predict(sampled, verbose=0)[0]
      next_index = sample(preds, temperature)
      next_char = chars[next_index]

      # Appending the next character to the generated text
      generated_text += next_char
      generated_text = generated_text[1:]
      sys.stdout.write(next_char)
    print()
    # Saving the output to the results folder
    f = open("generatedtext.txt", "a")
    f.write(generated_text)
    f.close()



Example of Generated Text #1:


Temperature =  0.2 

god!--and my good tailoress!

young, a flower-decked cavern and instincts, the spirit is not to be such a charm of the spirit and such a man who have not all the demand of the strength of the same things and such a man who have been so man is the death of the most proved to his soul and such a philosopher and moral and moral and such an act of the spirit of the spirit and social and the same taste, and something of the strength of the same things and refin

Temperature =  0.5 

, and something of the strength of the same things and refined style of the same time with the last moral and reason of his end and controlus shamely regard to renders has been be moral responsible to the essence, and believes and contradict for one possible in the scientific interests that the soul and ruled by the same things of his taste. in the sardably almost things acts and experience all the basis of some distinguished that is to be the world and du

T

In [None]:
# Retraining the model with simpler fit (8.7)
for epoch in range(1, 60):
    print('\nepoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)


epoch 1

epoch 2

epoch 3

epoch 4

epoch 5

epoch 6

epoch 7

epoch 8

epoch 9

epoch 10

epoch 11

epoch 12

epoch 13

epoch 14

epoch 15

epoch 16

epoch 17

epoch 18

epoch 19

epoch 20

epoch 21

epoch 22

epoch 23

epoch 24

epoch 25

epoch 26

epoch 27

epoch 28

epoch 29

epoch 30

epoch 31

epoch 32

epoch 33

epoch 34

epoch 35

epoch 36

epoch 37

epoch 38

epoch 39

epoch 40

epoch 41

epoch 42

epoch 43

epoch 44

epoch 45

epoch 46

epoch 47

epoch 48

epoch 49

epoch 50

epoch 51

epoch 52

epoch 53

epoch 54

epoch 55

epoch 56

epoch 57

epoch 58

epoch 59
