In [None]:
from langchain.prompts import PromptTemplate

# === Step 1: User Inputs ===
genre = input("üéµ Enter genre (e.g., pop, rock, EDM): ")
instruments = input("üé∏ Enter instruments (e.g., guitar, piano): ")
mood = input("üåà Enter mood or theme (e.g., sad, uplifting, romantic): ")
lyrics_prompt = input("üìù Describe what the lyrics should be about: ")

# === Step 2: LangChain Prompt Template ===
prompt_template = PromptTemplate(
    input_variables=["genre", "instruments", "mood", "lyrics"],
    template="A {mood} {genre} song with {instruments}. Inspired by the theme: {lyrics}"
)

# === Step 3: Create Final Prompt ===
final_prompt = prompt_template.format(
    genre=genre,
    instruments=instruments,
    mood=mood,
    lyrics=lyrics_prompt
)

print("\nüé§ Final Music Prompt:")
print(final_prompt)


In [None]:

huggingface-cli login

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.3")
pipe(messages)

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")

In [None]:
import os
import json

dataset_dir = "song_lyrics_dataset/json1"
output_file = "train.json"

with open(output_file, "w") as out_f:
    for file_name in os.listdir(dataset_dir):
        if file_name.endswith(".json"):
            with open(os.path.join(dataset_dir, file_name), "r") as f:
                data = json.load(f)

            artist = file_name.replace("Lyrics_", "").replace(".json", "")
            for song in data:
                lyrics = song.get("lyrics", "").strip()
                title = song.get("title", "Untitled")
                if len(lyrics.split()) > 50:
                    prompt = f"Write a song like {artist}, titled '{title}'."
                    formatted = {
                        "messages": [
                            {"role": "user", "content": prompt},
                            {"role": "assistant", "content": lyrics}
                        ]
                    }
                    out_f.write(json.dumps(formatted) + "\n")


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
from datasets import load_dataset
import torch

# MODEL
base_model = "mistralai/Mistral-7B-Instruct"

# Load tokenizer & model (with 4-bit quantization)
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # important fix for padding

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16
)

# Apply LoRA
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)

# Load dataset
dataset = load_dataset("json", data_files="train.jsonl")["train"]

# Tokenization function
def tokenize(example):
    prompt = example["messages"][0]["content"]
    response = example["messages"][1]["content"]
    full_prompt = f"<s>[INST] {prompt} [/INST] {response}</s>"
    return tokenizer(full_prompt, truncation=True, max_length=1024, padding="max_length")

tokenized_dataset = dataset.map(tokenize, remove_columns=dataset.column_names)

# Training arguments
training_args = TrainingArguments(
    output_dir="./mistral-lyrics-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    learning_rate=2e-5,
    lr_scheduler_type="cosine",
    warmup_steps=100,
    fp16=True,
    logging_dir="./logs",
    logging_steps=20,
    save_strategy="epoch",
    save_total_limit=2,
    report_to="none"
)

# Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
    args=training_args,
    packing=True  # token-efficient
)

# Start training
trainer.train()

# Save the fine-tuned model
model.save_pretrained("./mistral-lyrics-finetuned")
tokenizer.save_pretrained("./mistral-lyrics-finetuned")


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.prompts import PromptTemplate
import torch

# === Load fine-tuned model ===
model = AutoModelForCausalLM.from_pretrained("./mistral-lyrics-finetuned", device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("./mistral-lyrics-finetuned")
tokenizer.pad_token = tokenizer.eos_token  # to avoid padding issues


prompt_template = PromptTemplate(
    input_variables=["genre", "instruments", "mood", "lyrics"],
    template="A {mood} {genre} song with {instruments}. Inspired by the theme: {lyrics}"
)
formatted_prompt = prompt_template.format(
    genre=genre,
    instruments=instruments,
    mood=mood,
    lyrics=lyrics_prompt
)

# === Step 3: Mistral Inference Function ===
def generate_lyrics(full_prompt):
    input_text = f"<s>[INST] {full_prompt} [/INST]"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")

    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.9,
        top_p=0.95,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )

    generated = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated

# === Step 4: Run Model ===
lyrics_output = generate_lyrics(formatted_prompt)
print("\nüé§ Generated Lyrics:\n")
print(lyrics_output)


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.prompts import PromptTemplate
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
import torch

# === Load fine-tuned Mistral model ===
print("üîÅ Loading fine-tuned Mistral...")
model = AutoModelForCausalLM.from_pretrained("./mistral-lyrics-finetuned", device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("./mistral-lyrics-finetuned")
tokenizer.pad_token = tokenizer.eos_token

# === Load MusicGen model ===
print("üîÅ Loading MusicGen...")
music_model = MusicGen.get_pretrained('facebook/musicgen-small')  # Use 'medium' or 'melody' for better quality
music_model.set_generation_params(duration=15)  # seconds

# === Step 2: Create prompt ===
prompt_template = PromptTemplate(
    input_variables=["genre", "instruments", "mood", "lyrics"],
    template="A {mood} {genre} song with {instruments}. Inspired by the theme: {lyrics}"
)
formatted_prompt = prompt_template.format(
    genre=genre,
    instruments=instruments,
    mood=mood,
    lyrics=lyrics_prompt
)

# === Step 3: Generate lyrics ===
def generate_lyrics(full_prompt):
    input_text = f"<s>[INST] {full_prompt} [/INST]"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.9,
        top_p=0.95,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )
    generated = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated

print("\nüé§ Generating lyrics...")
lyrics = generate_lyrics(formatted_prompt)
print("\nüé∂ Lyrics:\n", lyrics)

# === Step 4: Generate music from prompt ===
print("\nüéß Generating background music...")
wav = music_model.generate([formatted_prompt])  # input must be a list
audio_write("generated_music", wav[0].cpu(), sample_rate=32000)
print("‚úÖ Music saved as 'generated_music.wav'")


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.prompts import PromptTemplate
from audiocraft.models import MusicGen
from audiocraft.data.audio import audio_write
from bark import generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import torch

# === Load fine-tuned Mistral model ===
print("üîÅ Loading fine-tuned Mistral...")
model = AutoModelForCausalLM.from_pretrained("./mistral-lyrics-finetuned", device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("./mistral-lyrics-finetuned")
tokenizer.pad_token = tokenizer.eos_token

# === Load MusicGen model ===
print("üîÅ Loading MusicGen...")
music_model = MusicGen.get_pretrained('facebook/musicgen-small')
music_model.set_generation_params(duration=15)

# === Load Bark (for vocals) ===
print("üîÅ Loading Bark models...")
preload_models()

# === Step 1: Take user input ===
genre = input("üéµ Enter genre (e.g., pop, rock, EDM): ")
instruments = input("üé∏ Enter instruments (e.g., guitar, piano): ")
mood = input("üåà Enter mood or theme (e.g., sad, uplifting, romantic): ")
lyrics_prompt = input("üìù Describe what the lyrics should be about: ")

# === Step 2: Create full prompt ===
prompt_template = PromptTemplate(
    input_variables=["genre", "instruments", "mood", "lyrics"],
    template="A {mood} {genre} song with {instruments}. Inspired by the theme: {lyrics}"
)
formatted_prompt = prompt_template.format(
    genre=genre,
    instruments=instruments,
    mood=mood,
    lyrics=lyrics_prompt
)

# === Step 3: Generate lyrics ===
def generate_lyrics(full_prompt):
    input_text = f"<s>[INST] {full_prompt} [/INST]"
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.9,
        top_p=0.95,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )
    generated = tokenizer.decode(output[0], skip_special_tokens=True)
    return generated

print("\nüé§ Generating lyrics...")
lyrics = generate_lyrics(formatted_prompt)
print("\nüé∂ Lyrics:\n", lyrics)

# === Step 4: Generate background music ===
print("\nüéß Generating background music...")
music_wav = music_model.generate([formatted_prompt])
audio_write("generated_music", music_wav[0].cpu(), sample_rate=32000)
print("‚úÖ Background music saved as 'generated_music.wav'")

# === Step 5: Generate vocals from lyrics using Bark ===
print("\nüéôÔ∏è Generating vocals from lyrics...")
vocal_array = generate_audio(lyrics)
write_wav("vocal_output.wav", rate=24000, data=vocal_array)
print("‚úÖ Vocals saved as 'vocal_output.wav'")


In [None]:
from pydub import AudioSegment

# === Load both audio files ===
bgm = AudioSegment.from_wav("generated_music.wav")
vocals = AudioSegment.from_wav("vocal_output.wav")

# === Adjust levels if needed ===
bgm = bgm - 5  # reduce BGM volume to make vocals clearer

# === Overlay vocals onto music ===
final_mix = bgm.overlay(vocals, position=0)

# === Export final mix ===
final_mix.export("final_song.wav", format="wav")
print("‚úÖ Final song saved as 'final_song.wav'")


In [None]:
from pydub import AudioSegment

# Load WAV file
song = AudioSegment.from_wav("final_song.wav")

# Export as MP3
song.export("final_song.mp3", format="mp3")
print("‚úÖ Converted and saved as 'final_song.mp3'")
