In [None]:
####################
# import libraries #
####################

# tensorflow for keras and neural network
import tensorflow as tf
print ("TensorFlow version:", tf.__version__)

# keras neural network libraries
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense

# libraries for display widgets
from google.colab import widgets
import ipywidgets
from ipywidgets import Button
from IPython.display import display, clear_output

# numpy for dealing with arrays
import numpy as np

# FUNCTIONS:
# function that returns outputs from model
# potentially will have more functions from neural network

####################
#   Prepare Data   #
####################

# open lyric file with data
file = open("ts-all-songs.txt", "r", encoding = "utf8")
lines=[]


# CREATE DICTIONARY and prepare dataset
# add words to variable
for i in file:
  lines.append(i)

data = ' '.join(lines)

# remove any unnecessary characters - leaving in '\n', newline character will be useful to consider as its own word
data=data.replace('"','').replace(',','')
data = data.lower()

# remove any repeats - need to create index so only one of each word is needed
z = []
for w in data.split():
  if w not in z:
    z.append(w)

dictionary = ' '.join(z)

# use keras tokenizer to create a dictionary of all words
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts([dictionary])

print (dictionary)
# save the tokenizer for later - use pickle.dump()

# convert the lyrics to numbers based on dictionary index
sequence_data = tokenizer.texts_to_sequences([data])[0]

print (sequence_data)
# store sequences of words (previous three words + word) in a list
sequences = []

for i in range (1, len(sequence_data)):
  words = sequence_data [i-3:i+1]
  sequences.append (words)

print (sequences)
#sequences = np.array(sequences)

#scale inputs so that they are in between 0 and 1
scaler=MinMaxScaler()

scaler.fit(sequences[2:len(sequences)])
sequences=scaler.transform(sequences[2:len(sequences)])

print (sequences)

# create 3 input variables and 1 and output variable
x = []
y = []

count=0
# loop through each sequence
for i in sequences[2:len(sequences)]:
  # store previous 3 word in their own input variables, current word in output
  # this will be used for training
  x.append([])
  x[count].append(i[0])
  x[count].append(i[1])
  x[count].append(i[2])
  y.append(i[3])
  count+=1


#convert to numpy array
#x1 = np.array(x1)
#x2 = np.array(x2)
#x3 = np.array(x3)
#y = np.array (y)




# convert y to categorical - used for loss calculations
#y = tf.keras.utils.to_categorical(y, num_classes=len(tokenizer.word_index)+1)


################################################
#                MODEL TIME                    #
################################################

# outputs function

# split data figure out what data is training data and what is testing
train, test = train_test_split(data,test_size=0.2,random_state=1)
# call getOutputs function for train and test

# CREATE FUNCTIONAL API MODEL - NEURAL NETWORK using keras
# import necessary libraries from Keras
# create and define an input layer
inputLayer = Input (shape=(3,), name='inputLayer')
# connect the layers
# create hidden layers using dense
layer1 = Dense (1000, activation = "relu", name = 'layer1')(inputLayer)
layer2 = Dense (1000, activation = "relu", name = 'layer2')(layer1)
# create output layer
w1output = Dense(1, activation = "linear", name = 'w1output')(layer2)
# create branched layer for multiple outputs
branchedLayer1 = Dense (1000, activation = "relu", name = 'branchedLayer1')(layer2)
w2output = Dense (1, activation = "linear", name = 'w2output')(branchedLayer1)
branchedLayer2 = Dense (1000, activation = "relu", name = 'branchedLayer2')(layer2)
w3output = Dense (1, activation = "linear", name = 'w3output')(branchedLayer2)
# create and define a model using model class
model = Model(inputs=inputLayer, outputs = [w1output,w2output,w3output])
# display model summary for debugging - model.summary()
print (model.summary())

# import callbacks
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import TensorBoard
# modelCheckpoint used to store best weights
checkpoint = ModelCheckpoint('models.h5', monitor='loss',verbose=1, save_best_only=True,mode='auto' )
# Reduce learning rate used when the accuracy doesn't improve after 3 times through the dataset
reduceLR = ReduceLROnPlateau(monitor='loss', factor=0.2, patience=5, min_lr=0.001, verbose=1)
#visualize loss
TBvisual=TensorBoard(log_dir='logs')

#compile the model
model.compile(optimizer='adam',loss={'w1output':'mse','w2output':'mse','w3output':'mse'})
model.fit(x, y, epochs=10, batch_size=64, callbacks=[checkpoint, reduceLR, TBvisual])

# train the model - fit it to pre-saved training data - should be one line using Keras
# save the best model

# evaluate the model for accuracy - only in debug/model preparation
# run predictions with test portion of data
predictions = model.predict(x)
word1=predictions[0]
word2=predictions[1]
word3=predictions[2]

for i in range (0,5):
  print (x[i],word1[i],word2[i],word3[i])
# in test program, will need to display accuracy


TensorFlow version: 2.7.0
[109, 373, 110, 42, 111, 43, 374, 375, 112, 376, 377, 378, 14, 379, 44, 45, 4, 373, 113, 15, 380, 114, 15, 381, 46, 15, 382, 383, 44, 47, 15, 384, 115, 385, 386, 387, 388, 116, 45, 48, 4, 117, 118, 49, 389, 390, 50, 391, 392, 48, 119, 110, 51, 52, 393, 16, 14, 394, 44, 391, 395, 120, 121, 7, 122, 396, 397, 4, 398, 7, 122, 111, 399, 400, 110, 17, 52, 401, 14, 50, 45, 392, 110, 402, 26, 15, 403, 387, 110, 404, 121, 7, 122, 405, 4, 398, 7, 122, 44, 406, 123, 407, 122, 115, 111, 27, 387, 53, 408, 48, 111, 409, 410, 43, 411, 121, 7, 122, 396, 397, 4, 398, 7, 122, 115, 8, 412, 413, 15, 414, 115, 415, 48, 416, 124, 44, 7, 417, 54, 14, 55, 8, 26, 44, 120, 46, 15, 418, 419, 111, 125, 126, 15, 420, 44, 7, 28, 421, 422, 127, 423, 56, 128, 424, 425, 14, 394, 18, 50, 15, 406, 426, 48, 427, 56, 387, 50, 115, 44, 128, 129, 14, 130, 121, 7, 122, 396, 397, 4, 398, 7, 122, 111, 399, 400, 110, 17, 52, 401, 14, 50, 45, 392, 110, 402, 26, 15, 403, 387, 110, 404, 121, 7, 122, 405, 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[[0.03035413 0.10455312 0.03063519 0.01152009]
 [0.10455312 0.03063519 0.01152333 0.03090756]
 [0.03063519 0.01152333 0.03091625 0.01180107]
 ...
 [0.01405284 0.01405284 0.01405284 0.00168587]
 [0.01405284 0.01405284 0.00168634 0.03399831]
 [0.01405284 0.00168634 0.03400787 1.        ]]
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 inputLayer (InputLayer)        [(None, 3)]          0           []                               
                                                                                                  
 layer1 (Dense)                 (None, 1000)         4000        ['inputLayer[0][0]']             
                                                                                                  
 layer2 (Dense)                 (None, 1000)         1001000     ['layer1[0][0]']                 
    

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
model = keras. models.load_model("models.h5")

# numpy for dealing with arrays
import numpy as np

# math for rounding
import math

# FUNCTIONS:
# function that returns outputs from model
# potentially will have more functions from neural network
# rounding function
def round (num):
  num+=0.5
  num=math.floor(num)
  return num

####################
#   Prepare Data   #
####################

# open lyric file with data
file = open("folklore.txt", "r", encoding = "utf8")
lines=[]


# CREATE DICTIONARY and prepare dataset
# add words to variable
for i in file:
  lines.append(i)

data = ' '.join(lines)

# remove any unnecessary characters - leaving in '\n', newline character will be useful to consider as its own word
data=data.replace('"','').replace(',','')
data = data.lower()

# remove any repeats - need to create index so only one of each word is needed
z = []
for w in data.split():
  if w not in z:
    z.append(w)

dictionary = ' '.join(z)

# use keras tokenizer to create a dictionary of all words
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts([dictionary])

print (dictionary)
# save the tokenizer for later - use pickle.dump()

# convert the lyrics to numbers based on dictionary index
sequence_data = tokenizer.texts_to_sequences([data])[0]

print (sequence_data)
# store sequences of words (previous three words + word) in a list
sequences = []

for i in range (1, len(sequence_data)):
  words = sequence_data [i-3:i]
  sequences.append (words)

#sequences = np.array(sequences)

#scale inputs so that they are in between 0 and 1
scaler=MinMaxScaler()

scaler.fit(sequences[2:len(sequences)])
sequences=scaler.transform(sequences[2:len(sequences)])

print (sequences)
#print (sequences)

# create 3 input variables and 1 and output variable
x = []
y = []

count=0
# loop through each sequence
for i in sequences[2:len(sequences)]:
  # store previous 3 word in their own input variables, current word in output
  # this will be used for training
  x.append([])
  x[count].append(i[0])
  x[count].append(i[1])
  x[count].append(i[2])
  #y.append(i[3])
  count+=1

print (x1)

#convert to numpy array
#x1 = np.array(x1)
#x2 = np.array(x2)
#x3 = np.array(x3)
#y = np.array (y)


# run predictions with test portion of data
predictions = model.predict(x)

print (predictions)
predictions=np.reshape(predictions,(-1,3))
predictions=scaler.inverse_transform(predictions)
print (predictions)

roundedP=[]
for i in range(len(predictions)):
  roundedP.append([])
  for j in range(len(predictions[i])):
    roundedP[i].append(round(predictions[i][j]))
print (roundedP)
newWord=tokenizer.sequences_to_texts(roundedP)
print (newWord)

print (data[2],newWord[0])

"""
word1=predictions[0][0]
word2=predictions[0][1]
word3=predictions[0][2]

word1 = tokenizer.sequences_to_texts(word1)
word2 = tokenizer.sequences_to_texts(word1)
word3 = tokenizer.sequences_to_texts(word1)

ogword = []
# loop through each sequence
#for i in sequences[2:len(sequences)]:
  # store previous 3 word in their own input variables, current word in output
  # this will be used for training
  #ogword.append(tokenizer.sequences_to_texts[i])

for i in range (0,5):
  print (ogword[i],word1[i],word2[i],word3[i])
"""



FileNotFoundError: ignored