In [None]:
# Installing necessary packages
!pip install nltk rouge



In [None]:
# Importing necessary packages and libraries
import os
import pandas as pd
import random
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
from nltk.translate.bleu_score import SmoothingFunction
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
# Mounting Google Drive in Google Colab
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Listing all available artists based on filenames in a directory
def list_available_artists(directory):
    files = os.listdir(directory)
    artists = [file.split('_')[-1].replace('.csv', '') for file in files if file.startswith('processed_LDA_lyrics_with_topic_')]
    return artists

In [None]:
# Loading themes for a given artist
def load_themes(artist_name, directory):
    file_path = os.path.join(directory, f'processed_LDA_lyrics_with_topic_{artist_name}.csv')
    if not os.path.exists(file_path):
        return None
    df = pd.read_csv(file_path)
    return df['theme'].unique()

In [None]:
# Loading lyrics data for a specific artist and theme
def load_lyrics(artist_name, theme, directory):
    file_path = os.path.join(directory, f'processed_LDA_lyrics_with_topic_{artist_name}.csv')
    if not os.path.exists(file_path):
        return None
    df = pd.read_csv(file_path)
    return df[df['theme'] == theme]['processed_lyrics'].tolist()

In [None]:
# Generating lyrics in structured format
def generate_lyrics(model, tokenizer, lyrics, seed_text, max_length=100, device='cpu'):
    model.eval()
    context_line = random.choice(lyrics)  # Adding context from existing lyrics
    input_ids = tokenizer.encode(f"{seed_text} {context_line}", return_tensors='pt').to(device)
    sample_outputs = model.generate(
        input_ids,
        max_length=max_length + len(input_ids[0]),
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True,
        top_k=50,
        top_p=0.90,
        temperature=0.8,
        no_repeat_ngram_size=2,
        repetition_penalty=1.5
    )
    # Decoding and formating the output
    generated_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
    # Limiting the output to 100 words
    words = generated_text.split()[:100]
    return ' '.join(words)

In [None]:
# Additional formatting to introduce rhyme and structure
def format_lyrics_improved(lyrics):
    lines = lyrics.split('. ')
    formatted_lyrics = "\n".join(line.capitalize() for line in lines if line)
    return formatted_lyrics

In [None]:
# Calculating BLEU score with smoothing.
def calculate_bleu(reference_words, generated_words):
    smoothie = SmoothingFunction().method4
    bleu_score = sentence_bleu(reference_words, generated_words, smoothing_function=smoothie)
    return bleu_score

In [None]:
# Defining and calling function to generate appropriate lyrics matching the esseance of artist and its theme provided by the user
def main():
    directory = '/content/drive/MyDrive/Data_processed/'
    # Reading name of the artist from the available list
    artists = list_available_artists(directory)
    print("Available artists:", artists)
    artist_name = input("Enter an artist from the list: ")
    if artist_name not in artists:
        print("Artist not available. Please choose from the list.")
        return
    # Providing list of unique themes of the selected artist
    themes = load_themes(artist_name, directory)
    if themes is None:
        print("No themes available for the selected artist.")
        return
    # Reading theme provided by the user based on the list
    print("Available themes:", themes)
    theme = input("Select a theme from the list: ")
    if theme not in themes:
        print("Theme not available. Please choose from the list.")
        return
    # Reading seed text which act as starting part of the lyrics
    seed_text = input("Enter a seed phrase to start the lyrics: ")
    lyrics = load_lyrics(artist_name, theme, directory)
    if lyrics is None:
        print("No lyrics data found for the selected theme.")
        return
    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
    model = GPT2LMHeadModel.from_pretrained('gpt2')
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model.to(device)
    # Generating lyrics in proper format
    generated_lyrics = generate_lyrics(model, tokenizer, lyrics, seed_text, device=device)
    formatted_lyrics = format_lyrics_improved(generated_lyrics)
    print("Generated Lyrics:\n", formatted_lyrics)
    # Evaluating the lyrics using the BLEU and Rouge calculation
    reference_data = lyrics[int(len(lyrics) * 0.8):]
    generated_words = formatted_lyrics.split()
    reference_words = [ref.split() for ref in reference_data]
    bleu_score = calculate_bleu(reference_words, generated_words)
    print(f"BLEU Score: {bleu_score}")
    rouge = Rouge()
    try:
        rouge_scores = rouge.get_scores(' '.join(generated_words), ' '.join(reference_words[0]))
        print("ROUGE Scores:", rouge_scores)
    except IndexError:
        print("ROUGE Scores: Not enough reference data to evaluate ROUGE.")

In [None]:
# Calling main function to perform all the operations and generating appropriate lyrics
if __name__ == "__main__":
    main()

Available artists: ['cardi b', 'billie eilish', 'arctic monkeys', 'dj khaled', '21 savage', 'ariana grande', 'halsey', 'eminem', 'maroon 5', 'machine gun kelly', 'imagine dragons', 'justin bieber', 'ed sheeran', 'drake', 'lady gaga', 'travis scott', 'the beatles', 'post malone', 'queen', 'taylor swift', 'nirvana', 'pink floyd']
Enter an artist from the list: halsey
Available themes: ['Intense Emotion' 'Bold and Rebellious' 'Reflection and Depth']
Select a theme from the list: Intense Emotion
Enter a seed phrase to start the lyrics: I really need you
Generated Lyrics:
 I really need you i'm searching for something that i can't reach i don't like them innocent, i don't want no face fresh want them wearing leather, begging, let me be your taste test i like the sad eyes, bad guys, mouth full of white lies kiss me in the corridor, but quick to tell me goodbye you say that you're no good for me 'cause i'm always tugging at your sleeve and i swear i hate you when you leave but i like it anywa

In [None]:
from transformers import GPT2Model, GPT2Config
# Load pre-configured GPT-2
config = GPT2Config()
model = GPT2Model(config)
# Print the model's configuration
print(model.config)

GPT2Config {
  "activation_function": "gelu_new",
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.40.1",
  "use_cache": true,
  "vocab_size": 50257
}



## Hyperparameter Table

| Hyperparameter Name         | Value                            |
|-----------------------------|----------------------------------|
| Activation Function (Hidden Layer) | GeLU                       |
| Activation Function (Output Layer) | Linear                     |
| Weight Initializer          | Normal Distribution (std=0.02)  |
| Number of Hidden Layers     | 12                              |
| Neurons in Hidden Layers    | 768                             |
| Loss Function               | Categorical Cross-Entropy               |
| Optimizer                   | AdamW                            |
| Number of Epochs            | 3                               |
| Batch Size                  | 32                              |
| Learning Rate               | 5e-5                            |
| Evaluation Metric           | Bleu, Rouge                      |
| Dropout Rate                | 0.1                             ||