# This notebook contains the code to reproduce the analysis presented in the blog post about generating band names with recurrent neural networks:

# https://methodmatters.blogspot.com/2018/11/using-recurrent-neural-nets-to-generate.html

## Note that, due to randomness inherent in the model training process, your results will not exactly match those displayed here or on the blog. The analysis presented below was re-generated using the original code; therefore, the results do not exactly match those described in the blog post.

## Get set up

In [1]:
# what environment are we using?
import sys
print(sys.version)

3.6.7 |Anaconda, Inc.| (default, Oct 28 2018, 19:44:12) [MSC v.1915 64 bit (AMD64)]


In [2]:
# load basic packages
import numpy as np
import pandas as pd
# load the textgenrnn library
from textgenrnn import textgenrnn

Using TensorFlow backend.


In [3]:
# load the pre-trained network
# from mthe textgenrnn library
textgen = textgenrnn()

In [4]:
# specify the directory where the band name data file is stored
in_dir = 'C:\\Directory\\'

## Update weights from pre-trained network with the band names directly from the data file

In [5]:
%time textgen.train_from_file(in_dir + 'pitchfork_artists.txt', is_csv = True, num_epochs=5)

8,651 texts collected.
Training on 112,107 character sequences.
Epoch 1/5
####################
Temperature: 0.2
####################
the steve carde

the steve brothers

the shappened steve sterson

####################
Temperature: 0.5
####################
chur

down charges

sear stander

####################
Temperature: 1.0
####################
pald & bury mandel of tones

goddiela

the losistworken

Epoch 2/5
####################
Temperature: 0.2
####################
the brothers

jack brother

the brothers

####################
Temperature: 0.5
####################
party white

no war

ferrean bulle

####################
Temperature: 1.0
####################
bob andle

smugh buddy

the rby hundey sunces

Epoch 3/5
####################
Temperature: 0.2
####################
carl singer

the courts

the future candits

####################
Temperature: 0.5
####################
marcal bath

lee and the high

dead band

####################
Temperature: 1.0
####################
nister

In [6]:
# save out the model
# textgen.save(in_dir + 'rnn_model_band_names.hdf5')

## Generate band names at temperature of .20

In [7]:
# generate 5000 names at a temperature of .20
# the names are returned as a list
epoch_5_temp_20 = textgen.generate(n=5000, temperature=0.20, 
                                   return_as_list=True)
# remove the names present in the original data
# first, load the original artist names
orig_artist = pd.read_csv(in_dir + 'pitchfork_artists.txt', 
                          header=None)[0].tolist()
# remove rnn-generated names that are repeats of original names
dl_artists_5_20 = [x for x in epoch_5_temp_20 if x not in orig_artist]
# remove any duplicates in the rnn-generated band names
dl_artists_5_20 = list(set(dl_artists_5_20))

In [8]:
# inspect the first 20 names
dl_artists_5_20[0:20]

['mark steven',
 'the beautiful moore',
 'the stars and the sound',
 'steve rebel',
 'the legendary',
 'steve marrine',
 'the stars constice',
 'robert beach',
 'maria boys',
 'the waller brothers',
 'the stars sound',
 'dead collective',
 'the daniel space',
 'ben karal',
 'black mark man',
 'the stars constine',
 'the sam sand',
 'daniel suns',
 'the love of the states',
 'the body money']

In [9]:
# make df
dl_artists_5_20_df = pd.DataFrame(dl_artists_5_20, columns=["band_names"])
# save to csv
dl_artists_5_20_df.to_csv(in_dir + 'dl_artists_5_20_df.csv', index=False)

In [10]:
# count of frequent words in generated names 
dl_artists_5_20_df.band_names.str.split(expand=True).stack().value_counts()[0:20]

the          2170
of            416
stars         371
beautiful     188
band          185
and           166
black         162
states        156
dead          128
sound         119
mark          116
steve         109
party          91
charles        91
steven         87
bears          72
sunset         71
sun            67
star           65
martin         65
dtype: int64

## Generate band names at temperature of .50

In [11]:
epoch_5_temp_50 = textgen.generate(n=5000, temperature=0.50, return_as_list=True)
dl_artists_5_50 = [x for x in epoch_5_temp_50 if x not in orig_artist]
dl_artists_5_50 = list(set(dl_artists_5_50))

In [12]:
# inspect the first 20 names
dl_artists_5_50[0:20]

['richard harmonies',
 'mentolical',
 'dream social',
 'red the portal and the orchestra',
 'sung plan',
 'the chapped program',
 'beat eminst',
 'brother richers',
 'the legendary',
 'chris christ',
 'bones of bear',
 'infinite jewen',
 'man grimes',
 'leave sounds',
 'the league planet',
 'mark wax',
 'mings',
 'alex and delt',
 'fight and magic gods',
 'the waller boys']

In [13]:
# make df
dl_artists_5_50_df = pd.DataFrame(dl_artists_5_50, columns=["band_names"])
# save to csv
dl_artists_5_50_df.to_csv(in_dir + 'dl_artists_5_50_df.csv', index=False)

In [14]:
# count of frequent words in generated names 
dl_artists_5_50_df.band_names.str.split(expand=True).stack().value_counts()[0:20]

the         1802
of           271
and          238
band         181
black        156
mark         118
dead         111
steve         86
boys          83
brothers      68
david         67
sound         66
martin        64
stars         63
party         61
death         59
daniel        59
charles       54
man           50
sam           48
dtype: int64

## Generate band names at temperature of 1.0

In [15]:
epoch_5_temp_100 = textgen.generate(n=5000, temperature= 1.0 , return_as_list=True)
dl_artists_5_100 = [x for x in epoch_5_temp_100 if x not in orig_artist]
dl_artists_5_100 = list(set(dl_artists_5_100))

In [16]:
# inspect the first 20 names
dl_artists_5_100[0:20]

['jfead on escare',
 'emey gorillar',
 'the feiled villagies',
 'iron angels',
 'shinoche',
 'the quartet',
 'mails',
 'stadia j mehman',
 'sleet chris',
 'hose and the david haowers',
 'jullas people',
 'the matt music',
 'black haowes',
 'black vega',
 'temple orchestra',
 'animalsky',
 'sickty prinds',
 'tuckfriendlax & jongo durk',
 'deehdargwest!',
 'the dereir gromman']

In [17]:
# make df
dl_artists_5_100_df = pd.DataFrame(dl_artists_5_100, columns=["band_names"])
# save to csv
dl_artists_5_100_df.to_csv(in_dir + 'dl_artists_5_100_df.csv', index=False)

In [18]:
# count of frequent words in generated names 
dl_artists_5_100_df.band_names.str.split(expand=True).stack().value_counts()[0:20]

the      510
and      104
of        87
&         78
black     36
dj        32
young     26
david     26
peter     23
a         22
on        21
band      20
john      20
big       20
white     19
in        19
for       17
tom       15
to        15
boys      15
dtype: int64

## Generate band names with prefix "the"

In [20]:
# generate the names
name_prefix_the_raw_50 = textgen.generate(n=100, temperature= .50, return_as_list=True, prefix = 'the')
# remove those in the original data
name_prefix_the_50 = [x for x in name_prefix_the_raw_50 if x not in orig_artist]

In [21]:
# inspect the first 20 names
name_prefix_the_50[0:20]

['the holly night',
 'the black singers',
 'the bells',
 'the wall brothers',
 'the forever',
 'the world traved orchestra',
 'the hall bears',
 'the magic cold',
 'the weeks',
 'the amp bells',
 'the dreams',
 'the death priests',
 'the pearls',
 'the wilders of the rebott',
 'the money harrels',
 'the space',
 'the sun band',
 'the silver experiences',
 'the sea motion',
 'the brothers and the machine trios']

In [22]:
# make df
name_prefix_the_50_df = pd.DataFrame(name_prefix_the_50, columns=["prefix_the"])
# save to csv
name_prefix_the_50_df.to_csv(in_dir + 'name_prefix_the_50_df.csv', index=False)

## Generate band names with prefix "dj"

In [25]:
# generate the names
name_prefix_dj_raw_50 = textgen.generate(n=100, temperature= 1.0, return_as_list=True, prefix = 'dj')
# remove those in the original data
name_prefix_dj_50 = [x for x in name_prefix_dj_raw_50 if x not in orig_artist]

In [26]:
# inspect the first 20 names
name_prefix_dj_50[0:20]

['dj state',
 'dj boredl',
 'dj japant',
 'dj paniful cold & wheels',
 'dj jeff lion',
 'dj lightniphen',
 'dj and eternale',
 'dj stilliums',
 'dj knali',
 'dj six',
 'dj amido',
 'dj pipee',
 'dj/2',
 'dj triz',
 'dj pond',
 'dj corp',
 'dj mine comet bounce',
 'dj band',
 'dj attack',
 'dj grang']

In [27]:
# make df
name_prefix_dj_50_df = pd.DataFrame(name_prefix_dj_50, columns=["prefix_dj"])
# save to csv
name_prefix_dj_50_df.to_csv(in_dir + 'name_prefix_dj_50_df.csv', index=False)