# Simple Evolutionary Algorithm

## Import Libraries

In [None]:
import torch
import pathlib
import requests
import zipfile
import glob
import pretty_midi
import random
from collections import Counter
from IPython.display import Audio, display
import time
import pygame

## Set the device to GPU if available

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Using device: {device}")

## Constants

In [None]:
# Paths
DATA_FOLDER_PATH = pathlib.Path("data/maestro-v3.0.0")
ZIP_FILE_PATH = pathlib.Path("data/maestro-v3.0.0-midi.zip")

# URLs
URL = "https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip"

## Download and Extract Dataset

In [None]:
if not DATA_FOLDER_PATH.exists():
    DATA_FOLDER_PATH.parent.mkdir(parents=True, exist_ok=True)

if not ZIP_FILE_PATH.exists():
    print(f"Downloading {URL}...")
    response = requests.get(URL, stream=True)
    response.raise_for_status()
    
    with open(ZIP_FILE_PATH, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    print("Download Completed.")

if ZIP_FILE_PATH.exists():
    try:
        print(f"Extracting {ZIP_FILE_PATH}...")
        with zipfile.ZipFile(ZIP_FILE_PATH, 'r') as zip_ref:
            zip_ref.extractall('data')
        print("Extraction completed.")
    except zipfile.BadZipFile:
        print("Error: The file is not a valid ZIP file or it is corrupted.")
else:
    print("Error: ZIP file does not exist.")

## Functions

### Load a MIDI file and convert it into a sequence of note numbers

In [None]:
def midi_to_sequence(midi_to_seq):
    notes = []
    
    midi_data = pretty_midi.PrettyMIDI(midi_to_seq)
    
    for instrument in midi_data.instruments:
        if not instrument.is_drum:
            for note in instrument.notes:
                notes.append(note.pitch)
    
    return torch.tensor(notes, device=device)

### Load MIDI files from the Dataset

In [None]:
def load_midi_files(num_files=None):
    all_filenames = glob.glob(str(DATA_FOLDER_PATH / '**/*.mid*'))
    print('Total number of MIDI files:', len(all_filenames))
    
    if num_files:
        all_filenames = random.sample(all_filenames, num_files)
        print(f'Selected {num_files} MIDI files for training.')
    else:
        print('Using the entire dataset for training.')
    
    midi_seq_to_load = []
    
    for filename in all_filenames:
        sequence = midi_to_sequence(filename)
        midi_seq_to_load.append(sequence)
    
    return midi_seq_to_load

### Fitness Function to Evaluate the Sequence

In [None]:
def fitness_function(sequence, transition_probs):
    fitness = 0
    
    for i in range(len(sequence) - 1):
        pair = (sequence[i].item(), sequence[i + 1].item())
        fitness += transition_probs.get(pair, 0)
    
    return fitness

### Initialize Population from MIDI Sequence

In [None]:
def initialize_population_from_midi(midi_seq_from_pop, pop_size):
    population = []
    seq_length = len(midi_seq_from_pop)
    
    for _ in range(pop_size):
        start_idx = torch.randint(0, seq_length - 1, (1,), device=device).item()
        end_idx = torch.randint(start_idx + 1, seq_length, (1,), device=device).item()
        sequence = midi_seq_from_pop[start_idx:end_idx]
        population.append(sequence)
    
    return population

### Select Parents Based on Fitness Scores

In [None]:
def select_parents(population, fitness_scores, num_parents):
    _, indices = torch.topk(fitness_scores, num_parents)
    
    return [population[i] for i in indices]

### Crossover to Generate Offspring

In [None]:
def crossover(parent1, parent2):
    min_len = min(len(parent1), len(parent2))
    
    # If parents are too short, return one of them as is
    if min_len < 2:
        return parent1  
    
    point = torch.randint(1, min_len - 1, (1,), device=device).item()
    child = torch.cat((parent1[:point], parent2[point:]))
    
    return child

### Create Offspring from Parents

In [None]:
def create_offspring(parents, num_offspring):
    offspring = []
    
    for _ in range(num_offspring):
        parent1, parent2 = random.sample(parents, 2)
        child = crossover(parent1, parent2)
        offspring.append(child)
    
    return offspring

### Mutate a Sequence

In [None]:
def mutate(sequence, mutation_rate=0.1):
    mutation_mask = torch.rand(sequence.size(), device=device) < mutation_rate
    random_notes = torch.randint(note_range[0], note_range[1], sequence.size(), device=device)
    sequence[mutation_mask] = random_notes[mutation_mask]
    
    return sequence

### Mutate a Population of Sequence

In [None]:
def mutate_population(population, mutation_rate=0.1):
    return [mutate(seq, mutation_rate) for seq in population]

### Evolutionary algorithm Using MIDI Sequence

In [None]:
def evolutionary_algorithm_midi(num_generations, midi_seq_for_ea, pop_size, num_parents, mutation_rate):
    population = initialize_population_from_midi(midi_seq_for_ea, pop_size)
    transition_probs = calculate_transition_probabilities(midi_seq_for_ea)
    
    for generation in range(1, num_generations + 1):
        fitness_scores = torch.tensor([fitness_function(seq, transition_probs) for seq in population], device=device)
        parents = select_parents(population, fitness_scores, num_parents)
        offspring = create_offspring(parents, pop_size - num_parents)
        offspring = mutate_population(offspring, mutation_rate)
        
        population = parents + offspring
        
        best_fitness = fitness_scores.max().item()
        
        if generation % 5 == 0 or generation == num_generations:
            print(f"Generation {generation}, Best Fitness: {best_fitness}")
    
    best_seq = population[torch.argmax(fitness_scores)]
    
    return best_seq

### Calculate Transition Probabilities Between Notes

In [None]:
def calculate_transition_probabilities(midi_seq_calc):
    transitions = [(midi_seq_calc[i], midi_seq_calc[i + 1]) for i in range(len(midi_seq_calc) - 1)]
    transition_counts = Counter(transitions)
    total_transitions = sum(transition_counts.values())
    transition_probs = {k: v / total_transitions for k, v in transition_counts.items()}
    
    return transition_probs

### Convert Sequence to MIDI File

In [None]:
def sequence_to_midi(sequence, output_file):
    midi_data = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)
    
    # Ensure the sequence has notes
    if len(sequence) == 0:
        print("The sequence is empty. Skipping MIDI file creation.")
        return
    
    for i, note in enumerate(sequence):
        note = int(note.item())
        start_time = i * 0.5
        end_time = (i + 1) * 0.5
        midi_note = pretty_midi.Note(
            velocity=100, pitch=note, start=start_time, end=end_time
        )
        instrument.notes.append(midi_note)
    
    # Add instrument and write the MIDI file
    midi_data.instruments.append(instrument)
    midi_data.write(output_file)
    print(f"MIDI file created: {output_file}")

### Play MIDI File in the Notebook

In [None]:
# Function to play a MIDI file in the notebook
def play_midi_file(midi_file):
    try:
        audio = pretty_midi.PrettyMIDI(midi_file).synthesize()
        display(Audio(audio, rate=44100))
        print("\n")
    except Exception as e:
        print(f"Could not play the file: {e}")

## Set Parameters

In [None]:
# Set this to None to use the entire dataset
num_files_to_load = 1

midi_sequences = load_midi_files(num_files=num_files_to_load)

# Example note range for mutation
note_range = (48, 72)

## Run Evolutionary Algorithm

In [None]:
current_sequence = 1

for midi_sequence in midi_sequences:
    print(f"Sequence {current_sequence}: \n")
    
    best_sequence = evolutionary_algorithm_midi(
        num_generations=20,
        midi_seq_for_ea=midi_sequence,
        pop_size=50,
        num_parents=10,
        mutation_rate=0.1
    )
    
    if best_sequence is not None:
        print(f"Best sequence length: {len(best_sequence)} notes.")
        output_midi_file = f"generated_music_{current_sequence}.mid"
        sequence_to_midi(best_sequence, output_midi_file)
        play_midi_file(output_midi_file)
    else:
        print("No best sequence generated.")
    
    current_sequence += 1

## Play Specific Generated MIDI File

In [None]:
def play_midi_file(midi_file):
    pygame.init()
    pygame.mixer.init()
    pygame.mixer.music.load(midi_file)
    pygame.mixer.music.play()

    while pygame.mixer.music.get_busy():
        time.sleep(1)

    pygame.quit()

output_midi_file_path = "generated_music_3.mid"

# Play the generated MIDI file
play_midi_file(output_midi_file_path)