# Clone impala mk2

## Initialization

In [54]:
import numpy as np
import matplotlib.pyplot as plt
import json
import re
import pandas as pd
import pickle
import tensorflow as tf

In [2]:
gpu_device = tf.config.list_physical_devices('GPU')[0]
tf.config.experimental.set_memory_growth(gpu_device, True)

To use gpu while training use this code snippit
```
with tf.device('/GPU:0'):
    # Create your TensorFlow model here
    model = tf.keras.Sequential([...])
```

In [44]:
# Getting the lyrics dataframe
df = pd.read_csv("corpuses\\tame impala lyrics.csv")
df = df.dropna()

In [None]:
#To Load the tokenizer
with open('models\\tame_impala_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
    total_words = tokenizer.num_words

## Data gathering

In [12]:
# genius api auth keys
file = open("genius_auth.json")
genius_auth = json.load(file)
file.close()

In [13]:
from lyricsgenius import Genius

# API Client
genius = Genius(genius_auth['client_access_token'],
                verbose=False,
                skip_non_songs=True, 
                excluded_terms=["(Remix)", "(Live)"], 
                remove_section_headers=True)

In [14]:
# Getting all the songs
songs = genius.search_artist(artist_name="Tame Impala", max_songs=None).songs

In [15]:
songs

[Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist, ...),
 Song(id, artist

### Turning the obtained information into a pandas dataframe

In [16]:
songs[0].lyrics

'159 ContributorsTranslationsFrançaisTürkçeEspañolPortuguêsItalianoDeutschThe Less I Know the Better Lyrics\nSomeone said they left together\nI ran out the door to get her\nShe was holding hands with Trevor\nNot the greatest feeling ever\nSaid, "Pull yourself together\nYou should try your luck with Heather"\nThen I heard they slept together\nOh, the less I know the better\nThe less I know the better\n\nOh, my love, can\'t you see yourself by my side?\nNo surprise, when you\'re on his shoulder like every night\nOh, my love, can\'t you see that you\'re on my mind?\nDon\'t suppose you could convince your lover to change his mind\nSo goodbye\n\nShe said, "It\'s not now or never\nWait ten years, we\'ll be together"\nI said, "Better late than never\nJust don\'t make me wait forever"\nDon\'t make me wait forever\nDon\'t make me wait forever\nYou might also like\nOh, my love, can\'t you see yourself by my side?\nI don\'t suppose you could convince your lover to change his mind\n\nI was doing f

In [43]:
data = {
    "id":[],
    "artist":[],
    "title":[],
    "lyrics":[]
}

# Iterating through the list
for song in songs:
    if not song:
        continue
    data["id"].append(song.id)
    data["artist"].append(song.artist)
    data["title"].append(song.title)

    # Preprocessing the lyrics to remove watermarks
    song_lyrics = song.lyrics
    ## Getting rid of the first line (shows song metadata)
    song_lyrics = re.sub(r'^.*?Lyrics','',song_lyrics)
    ## Removing the watermark in the last line
    song_lyrics = re.sub(r'(You might also like)?(\d*)?Embed','',song_lyrics, flags=re.IGNORECASE)
    ## Splitting the lyrics by line
    song_lyrics = song_lyrics.splitlines()
    ##Removing empty lines
    song_lyrics = list(filter(None, song_lyrics))
    ## Joining back lines
    song_lyrics = '\n'.join(song_lyrics)

    data["lyrics"].append(song_lyrics)
    

# Turning into dataframe
df = pd.DataFrame(data)
df.to_csv('corpuses\\tame impala lyrics.csv', index=False)

In [41]:
df = pd.read_csv("corpuses\\tame impala lyrics.csv")
df.head()

Unnamed: 0,id,artist,title,lyrics
0,2165830,Tame Impala,The Less I Know the Better,Someone said they left together\nI ran out the...
1,2165813,Tame Impala,"New Person, Same Old Mistakes","I can just hear them now\n""How could you let u..."
2,721026,Tame Impala,Let It Happen,"It's always around me, all this noise\nBut not..."
3,2165828,Tame Impala,Yes I’m Changing,"I was raging, it was late\nIn the world my dem..."
4,94120,Tame Impala,Feels Like We Only Go Backwards,"It feels like I only go backwards, baby\nEvery..."


## Tokenizer creation

In [50]:
# Joining all lines of lyrics into one single list
lyrics_corpus = []
for song_lryics in df['lyrics']:
    # Splitting the lyrics by line
    song_lryics = song_lryics.splitlines()
    
    #Removing empty lines
    song_lryics = list(filter(None, song_lryics))

    # Joining back lines
    song_lryics = '\n'.join(song_lryics)

    # Appending to songs corpus
    lyrics_corpus.append(song_lryics)

lyrics_corpus = '\n'.join(lyrics_corpus)
lyrics_corpus = lyrics_corpus.split('\n')
lyrics_corpus = [line + ' \n' for line in lyrics_corpus]

In [53]:
lyrics_corpus[:3]

['Someone said they left together \n',
 'I ran out the door to get her \n',
 'She was holding hands with Trevor \n']

In [55]:
from tensorflow.keras.preprocessing.text import Tokenizer

# Making tokenizer
tokenizer = Tokenizer(num_words=2000, oov_token="OOV",
                      filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t')

# Fitting corpus
tokenizer.fit_on_texts(lyrics_corpus)


total_words = tokenizer.num_words
print(f"Total words in tokenizer:{total_words}")

# Saving tokenizer
with open('models\\tame_impala_tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

Total words in tokenizer:2000


In [58]:
#To Load the tokenizer
with open('models\\tame_impala_tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
    total_words = tokenizer.num_words

## Model architecture

In [None]:
# Bring in the sequential api for the generator and discriminator
from tensorflow.keras.models import Sequential
# Bring in the layers for the neural network
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Reshape, LeakyReLU, Dropout, UpSampling2D

### Generator

### Descriminator

## Training loop

## Generating the output