In [4]:
"""
CRAWLING PRIMER MELODY AND STORE IT ON DATABASE
"""
import sys,os
sys.path.append("../../..")
import music21
import miditoolkit
from common.chord_recognition import MIDIChord
import pymongo
from sshtunnel import SSHTunnelForwarder
import pretty_midi
from crawler import check_start_page_url,make_sure_path_exists,crawl_and_save
import glob
import copy
from bson.binary import Binary
import pickle
from pprint import pprint
import shutil

In [34]:
class MONGODBCONFIG:
    """
    Configuration for the Mongodb database
    A database consists of collections (tables)
    Each collection contains a list of documents
    """
    SERVER_URL = "viws.ddns.net"

    REMOTE_ADDRESS = '127.0.0.1'

    REMOTE_PORT = 27017

    USER_NAME = "vimusic" #tam

    PASSWORD = "vimusic1" #tam

    DATABASE_NAME = "vimusic"

    PRIMER_MELODY_COLLECTION = "vimusic_primer_melody"

MIN_NOTES_IN_CHORD = 4

In [47]:
def primer_segmenter(midi_path):
    """
    Segment songs based on it's chord
    This is for splitting primer melody
    paper: https://pdfs.semanticscholar.org/48f9/d49dbca7ccdeedc7626cb720f9933fd8dacd.pdf?_ga=2.236079297.1345141284.1586427468-1284454261.1582875763
    - Input:
        path to midi file
    - Output:
        List of PrettyMIDI (may saved to database)
        Save them as binary
    """
    #0. search for song mood
    #1. read midi file
    midi = miditoolkit.midi.parser.MidiFile(midi_path)

    #2. Check that there is only one time sig, key sig
    if (len(midi.key_signature_changes) > 1 or
    len(midi.time_signature_changes) > 1):
        return []

    #3. Check that time signature is 4/4
    if len(midi.time_signature_changes):
        time_signature = midi.time_signature_changes[0]
    else:
        time_signature = pretty_midi.TimeSignature(numerator=4,denominator=4,time=0.0)
    if time_signature.numerator != 4 or time_signature.denominator !=4:
        return []
        

    #4. Check either the key is major or minor
    if len(midi.key_signature_changes):
        key = midi.key_signature_changes[0]
    else:
        key = pretty_midi.KeySignature(0,0) #default
    string_key = pretty_midi.key_number_to_key_name(key.key_number).split(" ")
    key_name = string_key[0] + ("m" if string_key[1] != 'Major' else "")
    original_key_scale = [p.pitchClass for p in music21.key.Key(key_name).pitches]
    if key.key_number <= 11: #major
        transform_key_scale = [p.pitchClass for p in music21.key.Key('C').pitches]
    else:
        transform_key_scale = [p.pitchClass for p in music21.key.Key('Am').pitches]


    #5. Collecting notes
    notes = []
    accidental_notes_indices = []
    for instrument in midi.instruments:
        if not instrument.is_drum:
            for note in instrument.notes:
                #transform note
                try:
                    index = original_key_scale.index(note.pitch % 12)
                    correct_transformation = transform_key_scale[index]
                    note.pitch = note.pitch - (note.pitch % 12) + correct_transformation
                    notes.append(note)
                except:
                    #skipping accidental note
                    notes.append(note) 
                    accidental_notes_indices.append(len(notes) - 1)
    notes = sorted(notes,key=lambda x : x.start)


    #6. analyzing note
    method = MIDIChord()
    chords = method.extract(notes=notes)
    melody_list = []
            
    #for each chord
    #In this pipeline, we only focuses on major, minor,augmented, diminished, dominant
    #chord_recognition source: https://github.com/YatingMusic/remi
    for start,end,chord_name in chords:
        #Check and split chord to make sure that it exists
        chord,other_stuff = chord_name.split(":")
        if chord == "N" or other_stuff == "N":
            continue
        if "/" in other_stuff:
            mode,bass = other_stuff.split("/")
        else:
            mode = other_stuff
            bass = "None" #easier to store in db
        split_midi = miditoolkit.midi.parser.MidiFile()
        split_notes = copy.deepcopy([(x,i) for i,x in enumerate(notes) if x.start >= start and x.end <= end])
        split_notes_indices = [x[1] for x in split_notes]
        split_notes = [x[0] for x in split_notes]
        if len(split_notes) == 0: #Seems to be no chord here
            continue
        #make sure that it begins at zero
        start_step = min(split_notes,key=lambda x : x.start).start
        piano = pretty_midi.Instrument(0)
        #restart notes, and checkk that there are no accidental notes
        is_successful = True
        for i,n in enumerate(split_notes):
            if split_notes_indices[i] in accidental_notes_indices:
                is_successful = False
                break
            #reset offset
            n.start -= start_step
            n.end -= start_step
            piano.notes.append(n)
        if len(piano.notes) < MIN_NOTES_IN_CHORD:
            is_successful = False
        if is_successful:
            #add instrument, key, time signature
            split_midi.time_signature_changes = [time_signature]
            split_midi.key_signature_changes = [key]
            split_midi.instruments.append(piano)
            melody_list.append((split_midi,chord,mode,bass))

    #filter melody_list: Only retrieve unique chord
    #FIXME: This way to retrieving melody may reduce chances of having new melodies
    unique_chords = []
    melodies = []
    for melody in melody_list:
        if melody[1:] not in unique_chords:
            unique_chords.append(melody[1:])
            melodies.append(melody)

    #return melody,tonic,mode,time_signature_numerator,time_signature_denonimator
    return melodies,string_key[0],string_key[1],time_signature.numerator,time_signature.denominator

In [48]:
"""
1. CRAWLING PRIMER MELODY
"""
DEFAULT_MAX_PAGE = 5
websites_name = [
    ('vgmusic','http://www.vgmusic.com/'),
    ('bitmidi','https://bitmidi.com/')
]
#SSH to server
#7. Initialize mongodb database
###. SSH to server
server = SSHTunnelForwarder(
    MONGODBCONFIG.SERVER_URL,
    ssh_username=MONGODBCONFIG.USER_NAME,
    ssh_password=MONGODBCONFIG.PASSWORD,
    remote_bind_address=(MONGODBCONFIG.REMOTE_ADDRESS,
    MONGODBCONFIG.REMOTE_PORT)
)
server.start()
db = pymongo.MongoClient(server.local_bind_host,server.local_bind_port)
collection = db[MONGODBCONFIG.DATABASE_NAME][MONGODBCONFIG.PRIMER_MELODY_COLLECTION]
for root_folder,website in websites_name:
    make_sure_path_exists(os.path.join(".",root_folder))
    try:
        crawl_and_save(root_folder,website,DEFAULT_MAX_PAGE)
    except:
        print("Some error occuring while crawling. Skipping...")
    pprint("Finish crawling in {}".format(root_folder))
    for filepath in (glob.glob(os.path.join(".",root_folder,"*.mid")) + glob.glob(os.path.join(".",root_folder,"*.midi"))):
        try:
            result_melodies,tonic,mode,numerator,denominator = primer_segmenter(filepath)
            if result_melodies is None:
                continue
        except:
            print("Problem with this :v due to midi file unreadable")
        for re in result_melodies:
            #Check if document exist
            document = collection.find_one({
                'name' : os.path.basename(filepath),
                #This is for key signature of the song
                'tonic' : tonic,
                'mode' : mode,
                'numerator' : numerator,
                'denominator' : denominator,
                #Chord of the melody
                'chord' : re[1],
                'chord_mode' : re[2],
                'bass' : re[3],
            })
            if document is not None: #document exist
                pprint("{} exist".format(os.path.basename(filepath)))
                continue
            the_bytes = pickle.dumps(re[0])
            collection.insert({
                'name' : os.path.basename(filepath),
                'tonic' : tonic,
                'mode' : mode,
                'numerator' : numerator,
                'denominator' : denominator,
                'chord' : re[1],
                'chord_mode' : re[2],
                'bass' : re[3],
                'primer_melody_data' : the_bytes
            })
            pprint("{} analysed in {}".format(filepath,root_folder))
    pprint("Finish analysing midi file in {}. Removing...".format(root_folder))
    shutil.rmtree(os.path.join(".",root_folder))

server.stop()

Pages crawled: 1
Time so far: 2.303201675415039 sec
Avg time per page: 2.3031179904937744 sec
Saved:  GyakutenIppatsuman2.mid
Saved:  SuperSF2-DeeJay.mid
Saved:  SuperSF2-Cammy.mid
Saved:  SMB_-_Starman_-Remix-.mid
Saved:  Extended_Learning_Module_64_bit.mid
Saved:  Icy_Zone.mid
Saved:  s2_options.mid
Saved:  s208b.mid
Saved:  s204b.mid
Saved:  s20eb.mid
Saved:  velanova.mid
Saved:  duo_sra_coralcave1.mid
Saved:  race2win.mid
Saved:  sc2000_Joel.mid
Saved:  STHLAVA.mid
Saved:  Battle_Entry-Concentration.mid
Saved:  Set_Free.mid
Saved:  ChanterellesSong.mid
Saved:  outlanderroadthemeKindaSuckish.mid
Saved:  Rom_Di_Prisco_-_Triton.mid
Saved:  Sigma_Medley.mid
Saved:  Koopa_cape2.mid
Saved:  Mario_and_Luigi_RPG_3_-_In_the_Final.mid
Saved:  MLBIS_File_select.mid
Saved:  Mario_and_Luigi_Bowsers_Inside_Story_-_Broque_Madames_Theme.mid
Saved:  MapleStory_KerningCity_12.mid
Saved:  LoZMM_EndingCredits.mid
Saved:  The_Legend.mid
Saved:  ZeldaFantasy_1_.mid
Saved:  zCavern_of_Remembrance.mid
Sav

KeyboardInterrupt: 