In [1]:
import sqlite3
import pandas as pd
import os
from midi_player import MIDIPlayer
from midi_player.stylers import basic, cifka_advanced
# import musicbrainzngs
from magenta.models.music_vae import data

os.chdir('../')
os.chdir('../')

from plagdet.src.defaults import *
from plagdet.scripts.synthetic_data.monophonic_melody_extractor import MonophonicMelodyExtractor
from plagdet.scripts.synthetic_data.pair_generator import SyntheticDataGenerator
from plagdet.scripts.synthetic_data.utils import calculate_total_bars, get_tempo_and_time_signature, calculate_bars_for_three_minutes


Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
MAESTRO_PATH

'plagdet/data/midi_databases/maestro'

In [4]:
import logging
from plagdet.src.utils.log import configure_logging

logger = logging.getLogger(__name__)
configure_logging()

def produce_monophonic_dataset(evaluable_directory_path: str) -> None:

    extractor = MonophonicMelodyExtractor()
    directory_path = eval(evaluable_directory_path)
    
    evaluable_destination_path = 'MONOPHONIC_MIDIS_PATH'
    destination_path = eval(evaluable_destination_path)

    # Ensure the destination directory exists
    os.makedirs(destination_path, exist_ok=True)

    if not os.path.exists(PROCESSED_MELODY_TRACKER):
        log = pd.DataFrame(columns=['raw_root', 'raw_relative_path', 'success', 'mono_root', 'mono_relative_path'])
    else:
        log = pd.read_csv(PROCESSED_MELODY_TRACKER)

    for root, _, files in os.walk(directory_path):
        for file in files:
            path = os.path.join(root, file)
            relative_path = os.path.relpath(path, directory_path)
            # Check if the file is a MIDI file
            if not file.lower().endswith(('.mid', '.midi')):
                logger.info(f'Skipping {file} as it is not a MIDI file')
                continue
            
            if relative_path in log['raw_relative_path'].values:
                continue

            logger.info(f'Processing {path}')
            dest = os.path.join(destination_path, f'{os.path.splitext(file)[0]}_mono.mid')
            success = extractor.make_file_valid(path, dest)
            
            new_row = pd.DataFrame({
                'raw_root': [evaluable_directory_path],
                'raw_relative_path': [relative_path],
                'success': [success],
                'mono_root': [evaluable_destination_path],
                'mono_relative_path': [os.path.relpath(dest, destination_path) if success else None]
            })
            log = pd.concat([log, new_row], ignore_index=True)

            if success:
                logger.info(f'{file} successfully made monophonic')

    log.to_csv(PROCESSED_MELODY_TRACKER, index=False)

produce_monophonic_dataset('MAESTRO_PATH')

[2024-09-03 21:18:44] [INFO ] Skipping LICENSE as it is not a MIDI file
[2024-09-03 21:18:44] [INFO ] Skipping maestro-v3.0.0.csv as it is not a MIDI file
[2024-09-03 21:18:44] [INFO ] Skipping maestro-v3.0.0.json as it is not a MIDI file
[2024-09-03 21:18:44] [INFO ] Skipping README as it is not a MIDI file
[2024-09-03 21:18:44] [INFO ] Processing plagdet/data/midi_databases/maestro\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi
[2024-09-03 21:18:44] [INFO ] plagdet/data/midi_databases/maestro\2004\MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi is longer than 3 minutes, extracting 90 bars.
[2024-09-03 21:18:46] [INFO ] Adjusted tempo to original: 120.0 BPM
[2024-09-03 21:18:46] [INFO ] MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi successfully made monophonic
[2024-09-03 21:18:46] [INFO ] Processing plagdet/data/midi_databases/maestro\2004\MIDI-Unprocessed_SMF_02_R1_

In [12]:
path = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\maestro'

for root, dirs, files in os.walk(path):
    for file in files:
        if file.endswith('.mid') or file.endswith('.midi'):
            filee = os.path.join(root, file)

import mido
midi = mido.MidiFile(filee)
# Print all set tempo events
print("Set Tempo Events:")
for i, track in enumerate(midi.tracks):
    for msg in track:
        if msg.type == 'set_tempo':
            tempo = mido.tempo2bpm(msg.tempo)
            print(f"Track {i}: Tempo set to {tempo:.2f} BPM")


MIDIPlayer(filee, 500, styler=cifka_advanced, title='First Monophonic Melody')


Set Tempo Events:
Track 0: Tempo set to 120.00 BPM


In [5]:
path = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\synthetic_dataset\monophonic_midis\melodies'
# Get a list of all MIDI files in the directory
midi_files = [f for f in os.listdir(path) if f.endswith('.mid') or f.endswith('.midi')]

file = os.path.join(path, midi_files[0])

MIDIPlayer(file, 500, styler=cifka_advanced, title='First Monophonic Melody')


does the me,lody extraction always set a file to 120bpm?

In [2]:
os.getcwd()

'c:\\Users\\cunn2\\OneDrive\\DSML\\Project\\thesis-repo'

In [5]:
from ast import literal_eval
eval('MONOPHONIC_MIDIS_PATH')


'plagdet/data/synthetic_dataset/monophonic_midis/melodies'

In [None]:
import os

os.chdir('../')
os.chdir('../')
from plagdet.src.defaults import *



for root, dirs, files in os.walk(r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis'):
    for file in files:
        print(os.path.join(root, f'{os.path.splitext(file)[0]}_mono.mid'))

C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Heartbreaker_mono.mid
C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\No-Diggity_mono.mid
C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\No-Scrubs_mono.mid
C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Oh-Why_mono.mid
C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Shape-of-You_mono.mid
C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\You-Drive-Me-Crazy_mono.mid


In [None]:
file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\No-Scrubs.mid'
print(get_tempo_and_time_signature(file))
print(calculate_bars_for_three_minutes(*get_tempo_and_time_signature(file)))
print(calculate_total_bars(file))

MIDIPlayer(file, 500, styler=cifka_advanced, title='My Player')

(93.000186000372, (4, 4))
69.750139500279
(86.73177083333333, 346.9270833333333, 93.000186000372)


In [None]:
generator = SyntheticDataGenerator()

dest = 'segment.midi'
generator.extract_bars(file, dest, 5, 2)
print(get_tempo_and_time_signature(dest))

MIDIPlayer(dest, 500, styler=cifka_advanced, title='My Player')


[2024-09-03 13:18:41] [INFO ] Original file: Tempo: 93.000186000372, Time Signature: (4, 4)


(120, (4, 4))


In [None]:
file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\No-Scrubs.mid'

from mido import MidiFile
print("Original file tempo:")
print(get_tempo_and_time_signature(file))

extractor = MonophonicMelodyExtractor()
extractor.make_file_valid(file, 'melody.midi')

print("\nModified file tempo:")
print(get_tempo_and_time_signature('melody.midi'))

# Add this: Print all tempo changes in the original and modified files
def print_all_tempos(midi_file):
    midi = MidiFile(midi_file)
    print(f"Tempos in {midi_file}:")
    for i, track in enumerate(midi.tracks):
        for msg in track:
            if msg.type == 'set_tempo':
                print(f"Track {i}: Tempo {60000000 / msg.tempo:.2f} BPM")

print("\nOriginal file tempos:")
print_all_tempos(file)

print("\nModified file tempos:")
print_all_tempos('melody.midi')

MIDIPlayer('melody.midi', 500, styler=cifka_advanced, title='My Player')

Original file tempo:
(93.000186000372, (4, 4))


[2024-09-03 13:18:42] [INFO ] Original file: Tempo: 93.000186000372, Time Signature: (4, 4), Ticks per beat: 480



Modified file tempo:
(120.0, (4, 4))

Original file tempos:
Tempos in C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\No-Scrubs.mid:
Track 0: Tempo 93.00 BPM

Modified file tempos:
Tempos in melody.midi:
Track 0: Tempo 120.00 BPM


In [None]:
extractor.adjust_tempo(file, 120, 'melody.midi')
MIDIPlayer('melody.midi', 500, styler=cifka_advanced, title='My Player')


[2024-09-03 13:18:42] [INFO ] File before configuration: Tempo: 93.000186000372, Time Signature: (4, 4)


TypeError: attribute must be an integer

In [None]:
MIDIPlayer(file, 500, styler=cifka_advanced, title='My Player')


In [None]:
# file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\copyright_midis\532\metamidi_Ed_Sheeran_Shape_of_You_2.mid'
file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\No-Scrubs.mid'
MIDIPlayer(file, 500, styler=cifka_advanced, title='My Player')
extractor = MonophonicMelodyExtractor()
extractor.make_file_valid(file, 'melody.midi')
print(get_tempo_and_time_signature('melody.midi'))
MIDIPlayer('melody.midi', 500, styler=cifka_advanced, title='My Player')


[2024-09-03 13:06:10] [INFO ] Original file: Tempo: 93.000186000372, Time Signature: (4, 4), Ticks per beat: 480


(120.0, (4, 4))


In [None]:
file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Shape-of-You.mid'

print(get_tempo_and_time_signature(file))
print(calculate_bars_for_three_minutes(*get_tempo_and_time_signature(file)))
print(calculate_total_bars(file))

extractor = MonophonicMelodyExtractor()
extractor.make_file_valid(file, 'melody.midi', melody_index=5)


(100.0, (4, 4))
75.0
(23.083854166666665, 92.33541666666666, 100.0)


TypeError: make_file_valid() got an unexpected keyword argument 'melody_index'

In [None]:
MIDIPlayer('melody.midi', 500, styler=cifka_advanced, title='My Player')

In [None]:
file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Oh-Why.mid'

extractor = MonophonicMelodyExtractor()
extractor.make_file_valid(file, 'melody.midi', melody_index=1)
MIDIPlayer('melody.midi', 500, styler=cifka_advanced, title='My Player')

C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Oh-Why.mid is longer than 3 minutes. Truncating to 90 bars.
Melody index 1 is out of range. The file has 0 melodies. Using melody at index -1.


IndexError: list index out of range

In [None]:
file = r'C:\Users\cunn2\OneDrive\DSML\Project\thesis-repo\plagdet\data\midi_databases\aa_midis\Shape-of-You.mid'

extractor = MonophonicMelodyExtractor()
extractor.file_to_melody_file(file, 'melody_1.midi', melody_index=0)
MIDIPlayer('melody_1.midi', 500, styler=cifka_advanced, title='My Player')

In [None]:
import random
import copy
from typing import List
import pretty_midi

def extract_two_bar_segment(melody: pretty_midi.Instrument, start_bar: int) -> List[pretty_midi.Note]:
    # Assuming 4/4 time signature and 480 ticks per quarter note
    ticks_per_bar = 4 * 480
    start_time = start_bar * ticks_per_bar / 480
    end_time = (start_bar + 2) * ticks_per_bar / 480
    
    return [note for note in melody.notes if start_time <= note.start < end_time]

def replace_segment(target_melody: pretty_midi.Instrument, segment: List[pretty_midi.Note], replace_bar: int):
    ticks_per_bar = 4 * 480
    start_time = replace_bar * ticks_per_bar / 480
    end_time = (replace_bar + 2) * ticks_per_bar / 480
    
    # Remove existing notes in the 2-bar segment
    target_melody.notes = [note for note in target_melody.notes if note.start < start_time or note.end > end_time]
    
    # Insert the new segment
    for note in segment:
        new_note = copy.deepcopy(note)
        new_note.start += start_time - segment[0].start
        new_note.end += start_time - segment[0].start
        target_melody.notes.append(new_note)
    
    target_melody.notes.sort(key=lambda x: x.start)

def apply_subtle_disturbance(segment: List[pretty_midi.Note]):
    for note in segment:
        # Subtle pitch shift (-1 to +1 semitones)
        note.pitch += random.randint(-1, 1)
        
        # Subtle timing shift (-25 to +25 milliseconds)
        shift = random.uniform(-0.025, 0.025)
        note.start += shift
        note.end += shift

def create_plagiarized_melody(source_file: str, target_file: str, output_file: str):
    source_midi = pretty_midi.PrettyMIDI(source_file)
    target_midi = pretty_midi.PrettyMIDI(target_file)
    
    source_melody = source_midi.instruments[0]  # Assuming the first instrument is the melody
    target_melody = target_midi.instruments[0]
    
    # Extract a random 2-bar segment from the source
    source_bars = len(source_melody.notes) // 8  # Rough estimate of number of bars
    start_bar = random.randint(0, source_bars - 2)
    segment = extract_two_bar_segment(source_melody, start_bar)
    
    # Apply subtle disturbance to the segment
    apply_subtle_disturbance(segment)
    
    # Replace a random 2-bar segment in the target
    target_bars = len(target_melody.notes) // 8
    replace_bar = random.randint(0, target_bars - 2)
    replace_segment(target_melody, segment, replace_bar)
    
    # Save the modified target melody
    target_midi.write(output_file)

# Usage
source_file = "melody.midi"
target_file = "melody_1.midi"
output_file = "plag.midi"

create_plagiarized_melody(source_file, target_file, output_file)

In [None]:
MIDIPlayer('melody_1.midi', 500, styler=cifka_advanced, title='My Player')

In [None]:
import random
from music21 import converter, stream, note, chord, duration

def extract_two_bar_segment(midi_stream):
    # Assuming 4/4 time signature
    measures = midi_stream.measureOffsetMap()
    if len(measures) < 2:
        raise ValueError("MIDI file is too short")
    
    start_measure = random.randint(0, len(measures) - 2)
    segment = stream.Stream()
    for i in range(2):
        segment.append(measures[start_measure + i])
    return segment

def apply_perturbation(segment):
    for element in segment.recurse().notesAndRests:
        if isinstance(element, note.Note):
            # Random pitch change (-2 to +2 semitones)
            element.pitch.transpose(random.randint(-2, 2), inPlace=True)
            
            # Random note duration change
            if random.random() < 0.2:
                element.duration = duration.Duration(element.duration.quarterLength * random.uniform(0.8, 1.2))
            
            # Random note deletion
            if random.random() < 0.1:
                segment.remove(element)
        
        # Random note insertion
        if random.random() < 0.1:
            new_note = note.Note()
            new_note.pitch.midi = random.randint(60, 72)  # C4 to C5
            new_note.duration = duration.Duration(0.25)  # Sixteenth note
            segment.insert(element.offset, new_note)
    
    return segment

def insert_segment(target_stream, segment):
    measures = target_stream.measureOffsetMap()
    insert_measure = random.randint(0, len(measures) - 2)
    
    # Remove existing content in the target measures
    for i in range(2):
        for element in measures[insert_measure + i]:
            target_stream.remove(element)
    
    # Insert the new segment
    target_stream.insertAtMeasure(insert_measure, segment)

def create_plagiarized_melody(source_file, target_file, output_file):
    source_stream = converter.parse(source_file)
    target_stream = converter.parse(target_file)
    
    segment = extract_two_bar_segment(source_stream)
    perturbed_segment = apply_perturbation(segment)
    insert_segment(target_stream, perturbed_segment)
    
    target_stream.write('midi', output_file)

# Usage
source_file = "path/to/source/melody.mid"
target_file = "path/to/target/melody.mid"
output_file = "path/to/output/plagiarized_melody.mid"

create_plagiarized_melody(source_file, target_file, output_file)

FileNotFoundError: Cannot find file in path/to/source/melody.mid