In [30]:
#!pip install pronouncing
#!pip install markovify

In [31]:
import pronouncing
import markovify
import re
import random
import numpy as np
import os
import keras
from keras.models import Sequential
from keras.layers import LSTM 
from keras.layers.core import Dense

#os.chdir('/Users/ptm/desktop/cwd20171207')


depth = 4 # depth of the network. changing will require a retrain
maxsyllables = 8 # maximum syllables per line. Change this freely without retraining the network
train_mode = True
#artist = "kanye_west" # used when saving the trained model
#artist = "lil_wayne" # used when saving the trained model
artist = "dr_seuss" # used when saving the trained model


rap_file = "neural_rap.txt" # where the rap is written to

def create_network(depth):
	model = Sequential()
	model.add(LSTM(4, input_shape=(2, 2), return_sequences=True))
	for i in range(depth):
		model.add(LSTM(8, return_sequences=True))
	model.add(LSTM(2, return_sequences=True))
	model.summary()
	model.compile(optimizer='rmsprop',
              loss='mse')

	if artist + ".rap" in os.listdir(".") and train_mode == False:
		model.load_weights(str(artist + ".rap"))
		print("loading saved network: " + str(artist) + ".rap") 
	return model

def markov(text_file):
    ######
	read = open(text_file, "r", encoding='utf-8').read()
	text_model = markovify.NewlineText(read)
	return text_model

def syllables(line):
	count = 0
	for word in line.split(" "):
		vowels = 'aeiouy'
		word = word.lower().strip(".:;?!")
		if word[0] in vowels:
			count +=1
		for index in range(1,len(word)):
			if word[index] in vowels and word[index-1] not in vowels:
				count +=1
		if word.endswith('e'):
			count -= 1
		if word.endswith('le'):
			count+=1
		if count == 0:
			count +=1
	return count / maxsyllables

def rhymeindex(lyrics):
	if str(artist) + ".rhymes" in os.listdir(".") and train_mode == False:
		print ("loading saved rhymes from " + str(artist) + ".rhymes")
		return open(str(artist) + ".rhymes", "r",encoding='utf-8').read().split("\n")
	else:
		rhyme_master_list = []
		print ("Alright, building the list of all the rhymes")
		for i in lyrics:
			word = re.sub(r"\W+", '', i.split(" ")[-1]).lower()
			rhymeslist = pronouncing.rhymes(word)
			rhymeslist = [x.encode('UTF8') for x in rhymeslist]
			rhymeslistends = []
			for i in rhymeslist:
				rhymeslistends.append(i[-2:])
			try:
				rhymescheme = max(set(rhymeslistends), key=rhymeslistends.count)
			except Exception:
				rhymescheme = word[-2:]
			rhyme_master_list.append(rhymescheme)
		rhyme_master_list = list(set(rhyme_master_list))

		reverselist = [x[::-1] for x in rhyme_master_list]
        #
        #
        #
		reverselist = sorted(str(reverselist))
		#
        #
        #
		rhymelist = [x[::-1] for x in reverselist]

		f = open(str(artist) + ".rhymes", "w", encoding='utf-8')
		f.write("\n".join(rhymelist))
		f.close()
		print(rhymelist)
		return rhymelist

def rhyme(line, rhyme_list):
	word = re.sub(r"\W+", '', line.split(" ")[-1]).lower()
	rhymeslist = pronouncing.rhymes(word)
	rhymeslist = [x.encode('UTF8') for x in rhymeslist]
	rhymeslistends = []
	for i in rhymeslist:
		rhymeslistends.append(i[-2:])
	try:
		rhymescheme = max(set(rhymeslistends), key=rhymeslistends.count)
	except Exception:
		rhymescheme = word[-2:]
	try:
		float_rhyme = rhyme_list.index(rhymescheme)
		float_rhyme = float_rhyme / float(len(rhyme_list))
		return float_rhyme
	except Exception:
		#return None
		return 0
##################
def split_lyrics_file(text_file):
	text = open(text_file, encoding='utf-8').read()
	text = text.split("\n")
	while "" in text:
		text.remove("")
	return text


def generate_lyrics(text_model, text_file):
	bars = []
	last_words = []
	lyriclength = len(open(text_file,encoding='utf-8').read().split("\n"))
	count = 0
	markov_model = markov(text_file)
	
	while len(bars) < lyriclength / 9 and count < lyriclength * 2:
		bar = markov_model.make_sentence()

		if type(bar) != type(None) and syllables(bar) < 1:
			
			def get_last_word(bar):
				last_word = bar.split(" ")[-1]
				if last_word[-1] in "!.?,":
					last_word = last_word[:-1]
				return last_word
				
			last_word = get_last_word(bar)
			if bar not in bars and last_words.count(last_word) < 3:
				bars.append(bar)
				last_words.append(last_word)
				count += 1
	return bars

def build_dataset(lines, rhyme_list):
	dataset = []
	line_list = []
	for line in lines:
		line_list = [line, syllables(line), rhyme(line, rhyme_list)]
		dataset.append(line_list)
	
	x_data = []
	y_data = []
	
	for i in range(len(dataset) - 3):
		line1 = dataset[i    ][1:]
		line2 = dataset[i + 1][1:]
		line3 = dataset[i + 2][1:]
		line4 = dataset[i + 3][1:]

		x = [line1[0], line1[1], line2[0], line2[1]]
		x = np.array(x)
		x = x.reshape(2,2)
		x_data.append(x)

		y = [line3[0], line3[1], line4[0], line4[1]]
		y = np.array(y)
		y = y.reshape(2,2)
		y_data.append(y)
		
	x_data = np.array(x_data)
	y_data = np.array(y_data)
	
	#print "x shape " + str(x_data.shape)
	#print "y shape " + str(y_data.shape)
	return x_data, y_data
	
def compose_rap(lines, rhyme_list, lyrics_file, model):
	rap_vectors = []
	human_lyrics = split_lyrics_file(lyrics_file)
	
	initial_index = random.choice(range(len(human_lyrics) - 1))
	initial_lines = human_lyrics[initial_index:initial_index + 2]
	
	starting_input = []
	for line in initial_lines:
		starting_input.append([syllables(line), rhyme(line, rhyme_list)])

	starting_vectors = model.predict(np.array([starting_input]).flatten().reshape(1, 2, 2))
	rap_vectors.append(starting_vectors)
	
	for i in range(100):
		rap_vectors.append(model.predict(np.array([rap_vectors[-1]]).flatten().reshape(1, 2, 2)))
	
	return rap_vectors
	
def vectors_into_song(vectors, generated_lyrics, rhyme_list):
	print ("\n\n")	
	print ("About to write rap (this could take a moment)...")
	print ("\n\n")
	def last_word_compare(rap, line2):
		penalty = 0 
		for line1 in rap:
			word1 = line1.split(" ")[-1]
			word2 = line2.split(" ")[-1]
			 
			while word1[-1] in "?!,. ":
				word1 = word1[:-1]
			
			while word2[-1] in "?!,. ":
				word2 = word2[:-1]
			
			if word1 == word2:
				penalty += 0.2
				
		return penalty

	def calculate_score(vector_half, syllables, rhyme, penalty):
		desired_syllables = vector_half[0]
		desired_rhyme = vector_half[1]
		desired_syllables = desired_syllables * maxsyllables
		desired_rhyme = desired_rhyme * len(rhyme_list)
#		print(vectors)
#		print(rhyme_list)             
#		print(vector_half)  
#		print(vector_half[0])     
#		print(vector_half[0])         
#		print(desired_rhyme)	
#		print(desired_syllables)		
		#score = 1.0 - abs(((float(desired_syllables) - float(syllables))) + abs((float(desired_rhyme) - float(rhyme)))) - penalty
#		score = 1.0 - abs(desired_syllables - syllables) + abs(desired_rhyme - rhyme) - penalty
		score = 1.0 - abs(float(desired_syllables) - float(syllables)) + abs(float(desired_rhyme) - float(rhyme)) - penalty
        		
		return score
		
	dataset = []
	for line in generated_lyrics:
		line_list = [line, syllables(line), rhyme(line, rhyme_list)]
		dataset.append(line_list)
	
	rap = []
	
	vector_halves = []
	
	for vector in vectors:
		vector_halves.append(list(vector[0][0])) 
		vector_halves.append(list(vector[0][1]))
	
		
	for vector in vector_halves:
		scorelist = []
		for item in dataset:
			line = item[0]
			
			if len(rap) != 0:
				penalty = last_word_compare(rap, line)
			else:
				penalty = 0
			total_score = calculate_score(vector, item[1], item[2], penalty)
			score_entry = [line, total_score]
			scorelist.append(score_entry)
		
		fixed_score_list = []
		for score in scorelist:
			fixed_score_list.append(float(score[1]))
		max_score = max(fixed_score_list)
		for item in scorelist:
			if item[1] == max_score:
				rap.append(item[0])
				print (str(item[0]))
				
				for i in dataset:
					if item[0] == i[0]:
						dataset.remove(i)
						break
				break     
	return rap

def train(x_data, y_data, model):
	model.fit(np.array(x_data), np.array(y_data),
			  batch_size=2,
			  epochs=30,
			  verbose=1)
	model.save_weights(artist + ".rap")
			  


def main(depth, train_mode):
	model = create_network(depth)
#	text_file = "lyrics.txt"
#	text_file = "lilwayne_20171208_E.txt" 
	text_file = "drseuss_20171208_D.txt" 
#	text_file = str(text_file, errors='ignore')
#	text_file = text_file.encode('utf-8').strip()


	text_model = markov(text_file)
	if train_mode == True:
		bars = split_lyrics_file(text_file)
	
	if train_mode == False:
		bars = generate_lyrics(text_model, text_file)
	
	rhyme_list = rhymeindex(bars)
	if train_mode == True:
		x_data, y_data = build_dataset(bars, rhyme_list)
		train(x_data, y_data, model)

	if train_mode == False:
		vectors = compose_rap(bars, rhyme_list, text_file, model)
        #
        #
		#print(vectors) 
       
		rap = vectors_into_song(vectors, bars, rhyme_list)
		f = open(rap_file, "w", encoding='utf-8')
		for bar in rap:
			f.write(bar)
			f.write("\n")	
		
main(depth, train_mode)

######### 

train_mode = False

		
main(depth, train_mode)

# improve the main function by creating a new file named neural_rap.txt instead of starting with an empty file named neural_rap.txt


##################

lyrics = open("neural_rap.txt", encoding='utf-8').read().split("\n") #this reads lines from a file called 'neural_rap.txt'
print(lyrics)





_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_97 (LSTM)               (None, 2, 4)              112       
_________________________________________________________________
lstm_98 (LSTM)               (None, 2, 8)              416       
_________________________________________________________________
lstm_99 (LSTM)               (None, 2, 8)              544       
_________________________________________________________________
lstm_100 (LSTM)              (None, 2, 8)              544       
_________________________________________________________________
lstm_101 (LSTM)              (None, 2, 8)              544       
_________________________________________________________________
lstm_102 (LSTM)              (None, 2, 2)              88        
Total params: 2,248
Trainable params: 2,248
Non-trainable params: 0
_________________________________________________________________
Alright,

I bet with my Ish wish dish.
And I said How do you do?
HILL WILL Will went up the chimney!
Would you like them here or there.
So he paused. And the fan and the fish.
I have a little bit more!
we see them come we see them come we see them go.
With this coat and this is why:
Should have a bird I like this?
And I will not with a fox?
It could be his head he had a book.
Should have a little toy man!
If you have to get rid of
We saw him step in on the ball!
Did you have to be so dumb now.
He picked up the chimney with glee.
I do not like them in the pot.
Then out of bed for a mouse.
Oh no! I do not like them
So we sat in the park in the Hat!
When our mother came in
Come down! We have to know how.
Bricks and blocks and clocks sir.
I will not with a box.
Said the fish in the hot hot sun.
How The Grinch looked around.
Her gown with the fan and the small
Hop hop! I am in bed.
I always pick up the cup
And he stuffed them in the dark?
Too wet to go Bump! Bump!
But that is not sunny.
We saw him pi

ValueError: max() arg is an empty sequence

In [None]:
# file = open("lilwayne_20171208_E.txt", 'r')
# book = file.read()


# def tokenize():
#     if book is not None:
#         words = book.lower().split()
#         return words
#     else:
#         return None
        

# def map_book(tokens):
#     hash_map = {}

#     if tokens is not None:
#         for element in tokens:
#             # Remove Punctuation
#             word = element.replace(",","")
#             word = word.replace(".","")

#             # Word Exist?
#             if word in hash_map:
#                 hash_map[word] = hash_map[word] + 1
#             else:
#                 hash_map[word] = 1

#         return hash_map
#     else:
#         return None


# # Tokenize the Book
# words = tokenize()
# word_list = ['cat','hat','sat']

# # Create a Hash Map (Dictionary)
# map = map_book(words)

# # Show Word Information
# for word in word_list:
#     print('Word: [' + word + '] Frequency: ' + str(map[word]))


