# Simple Evolutionary Algorithm

## Import Libraries

- **`torch`**: Tools for tensor operations and GPU acceleration, used in managing data and potential computations.
- **`pathlib`**: Handle filesystem paths in an object-oriented way, used for managing file paths.
- **`requests`**: Make HTTP requests, used to download the MIDI dataset from a URL.
- **`zipfile`**: Handle ZIP archives, used to extract the downloaded dataset.
- **`glob`**: Find path names matching a specified pattern, used to locate all MIDI files in the dataset.
- **`pretty_midi`**: A library for handling MIDI files and musical notes in Python. It is used for loading MIDI files, converting note names to numbers, and saving MIDI files.
- **`random`**: Generate random numbers, used in the evolutionary algorithm for selection and mutation processes.
- **`from collections import Counter`**: Count the frequency of elements in a list, used to analyze note distributions in MIDI sequences.
- **`from IPython.display import Audio, display`**: Play audio and display widgets in Jupyter Notebooks, used to play generated music directly in the notebook.
- **`time`**: Used to pause execution while the music is playing.
- **`pygame`**: A library for creating games. It is used to play the MIDI file sounds.

In [None]:
import torch
import pathlib
import requests
import zipfile
import glob
import pretty_midi
import random
from collections import Counter
from IPython.display import Audio, display
import time
import pygame

## Set the device to GPU if available

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Using device: {device}")

## Constants

In [None]:
# This is the URL for the dataset that will be downloaded & extracted
URL = "https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip"

# The downloaded dataset will be stored at this path
ZIP_FILE_PATH = pathlib.Path("data/maestro-v3.0.0-midi.zip")

# The dataset will be stored at this path after extraction
DATA_FOLDER_PATH = pathlib.Path("data/maestro-v3.0.0")

## Download and Extract Dataset

In [None]:
# Check if the directory where the dataset will be stored exists & if it doesn't - create it
if not DATA_FOLDER_PATH.exists():
    DATA_FOLDER_PATH.parent.mkdir(parents=True, exist_ok=True)

# Check if the .zip file (dataset) has been downloaded & if it isn't download id
if not ZIP_FILE_PATH.exists():
    print(f"Downloading {URL}...")
    response = requests.get(URL, stream=True)
    response.raise_for_status()
    
    with open(ZIP_FILE_PATH, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    print("Download Completed.")

# Check if the dataset is downloaded & if it is extract its contents
if ZIP_FILE_PATH.exists():
    try:
        print(f"Extracting {ZIP_FILE_PATH}...")
        with zipfile.ZipFile(ZIP_FILE_PATH, 'r') as zip_ref:
            zip_ref.extractall('data')
        print("Extraction completed.")
    except zipfile.BadZipFile:
        print("Error: The file is not a valid ZIP file or it is corrupted.")
else:
    print("Error: ZIP file does not exist.")

## Functions

### Load a MIDI file and convert it into a sequence of note numbers

In [None]:
def midi_to_sequence(midi_to_seq):
    # Store notes (and their pitch) extracted from the MIDI file
    notes = []

    # Load the MIDI file as a PrettyMIDID object
    midi_data = pretty_midi.PrettyMIDI(midi_to_seq)

    # Iterate over the instruments in the MIDI file
    for instrument in midi_data.instruments:
        # Drums do not hold pitch differently, so drums won't be processed
        if not instrument.is_drum:
            for note in instrument.notes:
                notes.append(note.pitch)

    # Return notes and their pitch as a PyTorch tensor
    return torch.tensor(notes, device=device)

### Load MIDI files from the Dataset

In [None]:
def load_midi_files(num_files=None):
    # Search for MIDI files and return the number of found files
    all_filenames = glob.glob(str(DATA_FOLDER_PATH / '**/*.mid*'))
    print('Total number of MIDI files:', len(all_filenames))

    # Check if user has provided X number of files
    if num_files:
        # Select X number of files at random from the dataset for training
        all_filenames = random.sample(all_filenames, num_files)
        print(f'Selected {num_files} MIDI files for training.')
    else:
        # Select all files from the dataset for training
        print('Using the entire dataset for training.')

    # Variable to store a sequence of note pitches
    midi_seq_to_load = []

    # Iterate through the selected number of MIDI Files
    for filename in all_filenames:
        # Convert MIDI file into a Tensor of note pitches
        sequence = midi_to_sequence(filename)

        # # ADDED
        print(f"Loaded sequence length: {len(sequence)}")

        if len(sequence) > 1:
            midi_seq_to_load.append(sequence)


        # # Append the tensor to the sequence of note pitches
        # midi_seq_to_load.append(sequence)

    # Return a sequence of note pitches
    return midi_seq_to_load

### Fitness Function to Evaluate the Sequence

Evaluate the quality of the musical sequence based on its alignment with learned transition probabilities.

In [None]:
def fitness_function(sequence, transition_probs):
    # Variable to accumulate the total fitness score of the sequence
    fitness = 0
    
    # Iterate over the indices of the sequence
    for i in range(len(sequence) - 1):
        # Convert sequence elements to integers for dictionary lookup
        
        # Tuple of two consecutive elements from the sequence
        # The elements are converted to integers with .item() method
        pair = (sequence[i].item(), sequence[i + 1].item())
        
        # Transition probability (or score) of the pair
        score = transition_probs.get(pair, 0)
        
        # Value is added to the total fitness score
        fitness += score
    
    # Return the final fitness value
    return fitness

### Initialize Population from MIDI Sequence

Create an initial population for an evolutionary algorithm. Each individual in this population is a sequence of MIDI notes, generated from a given MIDI sequence.

In [None]:
def initialize_population_from_midi(midi_seq_from_pop, pop_size):
    # Variable to store the generated sequences
    population = []
    # Compute length of the provided MIDI sequence,
    # used to determine the range for selecting subsequences
    seq_length = len(midi_seq_from_pop)

    # Create the required number of "individuals" (subsequences)
    for _ in range(pop_size):
        # Randomly selected start index for the subsequence
        start_idx = torch.randint(0, seq_length - 1, (1,), device=device).item()
        
        # Randomly selected end index for the subsequence
        end_idx = torch.randint(start_idx + 1, seq_length, (1,), device=device).item()
        
        # Extract the subsequence in the specified range
        sequence = midi_seq_from_pop[start_idx:end_idx]
        
        # Append the subsequence to the population sequence
        population.append(sequence)

    # Return the generated sequence as the initial population
    return population

### Select Parents Based on Fitness Scores

Select the top-performing individuals from the population based on their fitness scores. These individuals are considered as potential parents for the next generation in an evolutionary algorithm.

In [None]:
def select_parents(population, fitness_scores, num_parents):
    # Find the indices of the top fitness scores
    _, indices = torch.topk(fitness_scores, num_parents)

    # Return a sequence with the highest fitness scores
    return [population[i] for i in indices]

### Crossover to Generate Offspring

A single-point crossover operation between two parent sequences to generate a child sequence. This way genetic material is exchanged between parents to produce offspring.

In [None]:
def crossover(parent1, parent2):
    # Find the minimum length between two parents sequences
    # Ensures the crossover point is valid for both parents
    min_len = min(len(parent1), len(parent2))
    
    # If parents are too short, return one of them as is
    if min_len < 2:
        return parent1  

    # Select crossover point at random
    point = torch.randint(1, min_len - 1, (1,), device=device).item()
    
    # Create a child sequence by concatenating the 1st part of parent1 and the 2nd part of parent2
    child = torch.cat((parent1[:point], parent2[point:]))

    # Return the created child sequence
    return child

### Create Offspring from Parents

Generate a specified number of offspring from a given set of parents using crossover operations. This way new individuals (*offspring*) are created to form the next generation.

In [None]:
def create_offspring(parents, num_offspring):
    # Variable to store the generated offspring
    offspring = []

    # Iterate through the user set number of offspring to be created
    for _ in range(num_offspring):
        # Select two different parents at random
        parent1, parent2 = random.sample(parents, 2)
        
        # Create a child sequence
        child = crossover(parent1, parent2)
        
        # Append the child as an offspring
        offspring.append(child)

    # Return the generated offspring
    return offspring

### Mutate a Sequence

Introduce random changes (*mutations*) to a sequence of notes with a specified probability. This is used to maintain genetic diversity and explore new solutions.

In [None]:
def mutate(sequence, mutation_rate=0.1):
    # Generate a tensor of random values (range 0-1) shaped as the sequence
    # The comparison (< mutation_rate) creates a boolean mask where each element is True
    # with probability mutation_rate and False otherwise. It determines which elements of
    # the sequence will be mutated
    mutation_mask = torch.rand(sequence.size(), device=device) < mutation_rate

    # Generate a tensor of random values shaped as the sequence
    # These notes are potential replacements for the mutated elements
    random_notes = torch.randint(note_range[0], note_range[1],
                                 sequence.size(), device=device)

    # Update the sequence elements, using the mutation mask (For indices where is True)
    sequence[mutation_mask] = random_notes[mutation_mask]

    # Return the mutated sequence
    return sequence

### Mutate a Population of Sequence


Apply mutation to each sequence in the population.

In [None]:
def mutate_population(population, mutation_rate=0.1):
    # Apply mutation to each sequence in the population sequences
    return [mutate(seq, mutation_rate) for seq in population]

### Calculate Transition Probabilities Between Notes

Compute the probabilities of transitions between consecutive notes in a MIDI sequence.

In [None]:
def calculate_transition_probabilities(midi_seq_calc):
    # List of tuples, where each tuple is a pair of consecutive notes
    transitions = [
        (midi_seq_calc[i].item(), midi_seq_calc[i + 1].item()) for i in range(len(midi_seq_calc) - 1)
    ]

    # Counts the occurrences of each transition pair
    # Creates a dictionary where transition pairs are keys & counts - their values
    transition_counts = Counter(transitions)

    # Calculate the total number of transitions
    total_transitions = sum(transition_counts.values())

    # Calculate the probability of each transition
    transition_probs = {pair: transition / total_transitions for pair, transition in transition_counts.items()}

    # Return the transition probabilities
    return transition_probs


### Evolutionary algorithm Using MIDI Sequence

Implement the evolutionary algorithm to optimize a sequence of MIDI notes. The algorithm iterates over X number of generations, evolving a population of sequences to maximize the fitness score.

In [None]:
def evolutionary_algorithm_midi(num_generations, midi_seq_for_ea, pop_size, num_parents, mutation_rate):
    # Initialise the population sequence from the provided MIDI sequence
    population = initialize_population_from_midi(midi_seq_for_ea, pop_size)

    # Calculate the transition probabilities between the notes in the MIDI sequence
    transition_probs = calculate_transition_probabilities(midi_seq_for_ea)

    # Iterate over the specified number of generations
    for generation in range(1, num_generations + 1):
        # Calculate the fitness score for each sequence in the population
        fitness_scores = torch.tensor([fitness_function(seq, transition_probs) for seq in population], device=device)

        # Select the parents for the next generations (top-performing sequence)
        parents = select_parents(population, fitness_scores, num_parents)
        # Generate the new offspring
        offspring = create_offspring(parents, pop_size - num_parents)
        # Mutate the newly generated offspring
        offspring = mutate_population(offspring, mutation_rate)

        # Combine the parents and their offspring to form the new population for the next generation
        population = parents + offspring

        # Find the best fitness score in the current generation
        best_fitness = fitness_scores.max().item()

        # Print a status message every 5th generation and for the last generation
        if generation % 5 == 0 or generation == num_generations:
            print(f"Generation {generation}, Best Fitness: {best_fitness}")

    # Determine the sequence with the best fitness score
    best_seq = population[torch.argmax(fitness_scores)]

    # Return the sequence with the best fitness score
    return best_seq

### Convert Sequence to MIDI File

In [None]:
def sequence_to_midi(sequence, output_file):
    # Create an empty prettyMIDI object
    midi_data = pretty_midi.PrettyMIDI()
    # Crete new Instrument object, program number 0 (Acoustic Grand Piano)
    instrument = pretty_midi.Instrument(program=0)
    
    # Ensure the sequence has notes
    if len(sequence) == 0:
        print("The sequence is empty. Skipping MIDI file creation.")
        return

    # Iterate over the note sequence
    for i, note in enumerate(sequence):
        # Turn the note to int from tensor
        note = int(note.item())
        # Set start time for the note
        start_time = i * 0.5
        # Set end time for the note
        end_time = (i + 1) * 0.5

        # Create a Note object with specified volume (velocity), pitch, start & end time
        midi_note = pretty_midi.Note(
            velocity=100, pitch=note, start=start_time, end=end_time
        )

        # Append the Note object to the instrument
        instrument.notes.append(midi_note)
    
    # Add the instrument to the prettyMIDI object and create the MIDI file
    midi_data.instruments.append(instrument)
    midi_data.write(output_file)

    print(f"MIDI file created: {output_file}")

### Play MIDI File in the Notebook

In [None]:
# Function to play a MIDI file in the notebook
def play_midi_file(midi_file):
    try:
        audio = pretty_midi.PrettyMIDI(midi_file).synthesize()
        display(Audio(audio, rate=44100))
        print("\n")
    except Exception as e:
        print(f"Could not play the file: {e}")

## Set Parameters

In [None]:
# Number of MIDI files to load
# Set this to None to use the entire dataset
num_files_to_load = 2

# Load the specified number of MIDI files
# List, containing tensors, each representing a note sequence from the loaded MIDI files
midi_sequences = load_midi_files(num_files=num_files_to_load)

# The Range of MIDI notes to be used for a mutation
# Note interval is C3 to C5
note_range = (48, 72)

## Run Evolutionary Algorithm

In [None]:
# Counter of processed sequences
current_sequence = 1

# Iterate over the loaded MIDI sequences
for midi_sequence in midi_sequences:
    # Current sequence
    print(f"Sequence {current_sequence}: \n")

    # Find the best sequence, based on the current MIDI sequence
    best_sequence = evolutionary_algorithm_midi(
        num_generations=20,
        midi_seq_for_ea=midi_sequence,
        pop_size=50,
        num_parents=10,
        mutation_rate=0.5
    )

    # Check if there created best_sequence
    if best_sequence is not None:
        print(f"Best sequence length: {len(best_sequence)} notes.")

        # Filename for the generated MIDI file based on the current sequence
        output_midi_file = f"generated_music_{current_sequence}.mid"

        # Convert the best sequence to MIDI
        sequence_to_midi(best_sequence, output_midi_file)

        # Play the generated MIDI file
        play_midi_file(output_midi_file)
    else:
        print("No best sequence generated.")

    # Increment the sequence counter for the next iteration
    current_sequence += 1

## Play Specific Generated MIDI File

In [None]:
def play_specific_midi_file(midi_file):
    pygame.init()
    pygame.mixer.init()
    pygame.mixer.music.load(midi_file)
    pygame.mixer.music.play()

    while pygame.mixer.music.get_busy():
        time.sleep(1)

    pygame.quit()

output_midi_file_path = "generated_music_3.mid"

# Play the generated MIDI file
play_specific_midi_file(output_midi_file_path)