# Import librairies

In [12]:
import pandas as pd
import sklearn
from collections import Counter, defaultdict
import preprocessing.preprocessing as pp
import math
import random
import re
import numpy as np

# Load datas

In [21]:
df = pp.load_data("english_cleaned_lyrics.csv")
# df = pp.preprocess_data(df)
df = df[['lyrics', 'genre']]
genres = df['genre'].unique()

print(df)

                                                   lyrics    genre
0       Oh baby how you doing You know I'm gonna cut r...      Pop
1       playin everything so easy it's like you seem s...      Pop
2       If you search For tenderness It isn't hard to ...      Pop
3       Oh oh oh I oh oh oh I If I wrote a book about ...      Pop
4       Party the people the people the party it's pop...      Pop
...                                                   ...      ...
218205  I gotta say Boy after only just a couple of da...  Country
218206  I helped you find her diamond ring You made me...  Country
218207  Look at the couple in the corner booth Looks a...  Country
218208  When I fly off this mortal earth And I'm measu...  Country
218209  I heard from a friend of a friend of a friend ...  Country

[218210 rows x 2 columns]


# Create 3-gram for each music genre

In [22]:
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text


df['lyrics'] = df['lyrics'].apply(preprocess_text)

def build_ngram_model(texts, n):
    model = defaultdict(list)
    for text in texts:
        words = text.split()
        for i in range(len(words) - n):
            gram = tuple(words[i:i+n])
            next_word = words[i+n]
            model[gram].append(next_word)
    return model



ngram_size = 3
genre_models = {}

for genre in genres:
    genre_lyrics = df[df['genre'] == genre]['lyrics']
    genre_models[genre] = build_ngram_model(genre_lyrics, ngram_size)

def generate_lyrics(model, n, num_words=50):
    current_gram = random.choice(list(model.keys()))
    result = list(current_gram)
    for i in range(num_words - n):
        possibilities = model[current_gram]
        if not possibilities:
            break
        next_word = random.choice(possibilities)
        result.append(next_word)
        current_gram = tuple(result[-n:])
    return ' '.join(result)


# Example Pop

In [23]:
genre_to_generate = 'Pop'
new_lyrics = generate_lyrics(genre_models[genre_to_generate], ngram_size, 200)
print(new_lyrics)

dead she knelt she cried and she cried overnight cause what she is after when you find the ones i loved is like leaving the one they hate just come with me i like what i want ill do anything to have you back again so until then send me the blues i was a fool playing by the rules by the rules got nothing left holding my holding my breath feel like im losing my heart i can make you feel em bring your troubles well see if we can play it thug get it up gotta get outta here no i dont love you baby cant do ay ay ay ay ay ay its my party and ill fuck who i want to breathe about but safe to say that anybodys keepin score but you gotta come to terms oh its another day another time and place oh baby dont you cry let me see you know who you are yea this is how well be able to define the colors that you see more than you ever dreamed possible dont be afraid to touch i want it now i know what it feels like the change that


# Example Country

In [27]:
genre_to_generate = 'Country'
new_lyrics = generate_lyrics(genre_models[genre_to_generate], ngram_size)
print(new_lyrics)

for sinners such as i am or let me go dancing dancing ive seen life from both sides now from win and lose and still somehow its lifes illusions i recall i really dont know what it is and then one night as a time these tangled minds will all


# Example Rock

In [25]:
genre_to_generate = 'Rock'
new_lyrics = generate_lyrics(genre_models[genre_to_generate], ngram_size)
print(new_lyrics)

of those his head went back this far and now its all so cut and dried im all cut up sting like amphetamines and ready to rock in the storm there to ease the tension i got start it all over do it and i will never be alone and


# Example Metal

In [26]:
genre_to_generate = 'Metal'
new_lyrics = generate_lyrics(genre_models[genre_to_generate], ngram_size, 200)
print(new_lyrics)

at the evil that men do lives on and on burning the sky slumped like a headless scarecrow cold and limp against the wall stand up or die while theres still time cause soon its gone so pale and pure hear the stories of friends i never knew i could feel this way but its hard to ignore when your hand reaches out i abandon you begging for more tickling in their ears the fear is gone we seem to fill up the emptiness within the emptiness be gone oh yielding evidence of greed safe now in the heart of the mountain made of skulls of my enemies let the sea run red with the blood of the innocent denegrates humanitys existance when education and peaceful protest cant bring their liberation the strategy for their rescue changes into militant intervention every action has an impact every life saved is a victory were burning out were fading away a picture is all that remains is still only the pain as present to know the feeling the loss of memory oh my wintertime and a longing for the reason to live 

# Having mean and std to generate accurate lyrics

In [19]:
df['word_count'] = df['lyrics'].apply(lambda x: len(x.split()))
genre_stats = df.groupby('genre')['word_count'].agg(['mean', 'std'])

# Generate 1000 songs per genre

In [20]:
num_lyrics_per_genre = 1000
lyrics_data = []

for genre, model in genre_models.items():
    mean_word_count = genre_stats.loc[genre, 'mean']
    std_word_count = genre_stats.loc[genre, 'std']
    for _ in range(num_lyrics_per_genre):
        num_words = max(int(np.random.normal(mean_word_count, std_word_count)), 1) 
        lyrics = generate_lyrics(model, ngram_size)
        lyrics_data.append({'genre': genre, 'lyrics': lyrics, 'artist': f'n-gram {ngram_size}'})


lyrics_df = pd.DataFrame(lyrics_data)
lyrics_df.to_csv('generated_lyrics.csv', index=False)