# Case Study-3

## <font color='red'> 1. Building a Markov Model </font>

In [36]:
import re # used to handle regular expression in extracted file for data clean up
import random # used in building markov model
from collections import defaultdict, deque # used to build collections in markovs model
import requests # for extracting data through API

In [37]:
# Markov Chain generator
# This is a text generator that uses Markov Chains to generate text using a uniform distribution.
# num_key_words is the number of words that compose a key (suggested: 2 or 3)

class MarkovChain:

  def __init__(self, num_key_words=3):
    self.num_key_words = num_key_words
    self.lookup_dict = defaultdict(list)
    self._punctuation_regex = re.compile('[,.!;\?\:\-\[\]\n]+')
    self._seeded = False
    self.__seed_me()

  def __seed_me(self, rand_seed=None):
    if self._seeded is not True:
      try:
        if rand_seed is not None:
          random.seed(rand_seed)
        else:
          random.seed()
        self._seeded = True
      except NotImplementedError:
        self._seeded = False

In [38]:
# Build Markov Chain from data source.
# Use add_file() or add_string() to add the appropriate format source

  def add_file(self, file_path):
    content = ''
    with open(file_path, 'r') as fh:
      self.__add_source_data(fh.read())

  def add_string(self, str):
    self.__add_source_data(str)

  def __add_source_data(self, str):
    clean_str = self._punctuation_regex.sub(' ', str).lower()
    tuples = self.__generate_tuple_keys(clean_str.split())
    for t in tuples:
      self.lookup_dict[t[0]].append(t[1])

  def __generate_tuple_keys(self, data):
    if len(data) < self.num_key_words:
      return

    for i in range(len(data) - self.num_key_words):
      yield [ tuple(data[i:i+self.num_key_words]), data[i+self.num_key_words] ]

In [39]:
# Generates text based on the data the Markov Chain contains
# max_length is the maximum number of words to generate

  def generate_text(self, max_length=20):
    context = deque()
    output = []
    if len(self.lookup_dict) > 0:
      self.__seed_me(rand_seed=len(self.lookup_dict))
      idx = random.randint(0, len(self.lookup_dict)-1)
      chain_head = list(self.lookup_dict.keys())
      context.extend(chain_head)

      while len(output) < (max_length - self.num_key_words):
        next_choices = self.lookup_dict[tuple(context)]
        if len(next_choices) > 0:
          next_word = random.choice(next_choices)
          context.append(next_word)
          output.append(context.popleft())
        else:
          break
      output.extend(list(context))
    return output

## <font color='red'> 2. Data Extraction from MusicMatch API </font>

In [41]:
apikey = "b35f687575517500787c7703ed7e6ac7"

In [42]:
# Getting the artist_id from band name search
url_artist = "http://api.musixmatch.com/ws/1.1/artist.search"
payload_artist = {'q_artist': 'Metallica', 'apikey': apikey, 'format':'json'}
response_artist = requests.get(url_artist, params=payload_artist)
print(response_artist)
response_artist = response_artist.json()
artist_id = response_artist['message']['body']['artist_list'][0]['artist']['artist_id']
print(artist_id)

<Response [200]>
64


In [43]:
# Getting all track_ids available for the artist
tracks = []
url_tracks = "http://api.musixmatch.com/ws/1.1/track.search?"
payload_tracks = {'q_track':'Battery', 'f_artist_id': artist_id,'page':1,'page_size': 10, 'page': 3, 'apikey': apikey}
response_tracks = requests.get(url_tracks, params=payload_tracks)
print(response_tracks)
response_tracks = response_tracks.json()
for item in response_tracks['message']['body']['track_list']:
    for i in item:
        track_id = item[i]["track_id"]
        print(track_id)
        tracks.append(track_id)
print(len(tracks))

<Response [200]>
138286528
134555313
136356206
134555312
134555269
134555290
6


In [44]:
# Getting lyrics from track_ids
data = []
url_lyrics = "http://api.musixmatch.com/ws/1.1/track.lyrics.get?"
track_count=1
for track_id in tracks:
    print(track_id)
    payload_lyrics = {'track_id' : int(track_id), 'apikey': apikey}
    response_lyric = requests.get(url_lyrics, params=payload_lyrics)
    response_lyric = response_lyric.json()
    print(response_lyric['message']['header']['status_code'])
    if (response_lyric['message']['header']['status_code'] == 200):
        response_lyric = response_lyric['message']['body']['lyrics']
        lyric_text = response_lyric['lyrics_body']
        # Cleaning the Data
        bad_string = '''******* This Lyrics is NOT for Commercial use *******'''
        lyric_text = lyric_text.replace(bad_string, '')
        another_bad_string = '''...'''
        lyric_text = lyric_text.replace(another_bad_string, '')
        data.append(lyric_text)
        track_count+1
        print(track_count)
        
print('Saved a total of %s track' %(track_count))

138286528
404
134555313
404
136356206
404
134555312
404
134555269
200
1
134555290
200
1
Saved a total of 1 track


<img src='music_Match_hits.JPG'>


In [45]:
# Save data in a file

# Open a file
f = open("prady_lyrics.txt", "wb")
for lyric in data:
    lyric = lyric.encode('ascii', 'ignore')
    f.write(lyric)

# Close opend file
f.close()


## <font color='red'> 3. Generating words from Markovs Model Trained </font>

In [50]:
if __name__ == '__main__':
    mc = MarkovChain()
    mc.add_file('prady_lyrics.txt')
    lyrics = mc.generate_text()
    str1 = ' '
    for lyric in lyrics:
        lyric = str1.join(lyric)
        print(lyric)

through the boundaries
me battery battery
found me cannot
to stay smashing
cower battery is
battery is here
the action returning
lunacy has found
weak are ripped
the battery cannot
a reaction weak
battery is found
lashing out the
that cower battery
kill the battery
me cannot stop
stay smashing through
all that cower
into obsession cannot
family battery is
hypnotizing power crushing
aggression turns into
here to stay
are ripped and
the boundaries lunacy
is found in
the battery pounding
out the action
and torn away
smashing through the
battery lashing out
battery cannot kill
boundaries lunacy has
cannot stop the
the family battery
obsession cannot kill
ripped and torn
battery pounding out
stop the battery
found in me
crushing all that
away hypnotizing power
pounding out aggression
turns into obsession
in me battery
reaction weak are
power crushing all
torn away hypnotizing
returning a reaction
cannot kill the
action returning a
is here to
has found me
battery battery lashing
out aggressi