# Pipeline

## MIDI → User Model → Used Midi

### Ideal User

In [1]:
from Clarinet.utils.generatedata import useFolder

collection_dir="Data/Midi/Collection/Original Collection"
type="Ideal"
output_folder=f"Data/Midi/Queries/{type} Queries"
args=[]
kwargs={}

useFolder(collection_dir,output_folder,type,*args,**kwargs)

100%|██████████| 909/909 [03:58<00:00,  3.80it/s]


### Noisy User

In [1]:
from Clarinet.utils.generatedata import useFolder

collection_dir="Data/Midi/Collection/Original Collection"
type="Noisy"
output_folder=f"Data/Midi/Queries/{type} Queries"

channel=-1
pitch=0
extra=0
delete=0
velocity=0
length=0


args=[]
kwargs={"channel":channel,
    "pitch":pitch,
    "extra":extra,
    "delete":delete,
    "velocity":velocity,
    "length":length}

useFolder(collection_dir,output_folder,type,*args,**kwargs)

100%|██████████| 909/909 [03:00<00:00,  5.04it/s]


# Text Conversion
## Collection

In [None]:
from Clarinet.utils.generatedata import midiFolder2Text

collection_dir="Data/Midi/Collection/Original Collection"
output_folder="Data/Text/Original Collection"
channel=0
num_files=-1
num_notes=-1

midiFolder2Text(collection_dir,output_folder=output_folder,num_files=num_files,num_notes=num_notes,channel=channel)

## Queries

In [2]:
from Clarinet.utils.generatedata import midiFolder2QueryText

types=["Ideal","Noisy"]
channel=0
num_queries=30
num_notes=30

query_folders=[f"Data/Midi/Queries/{type} Queries" for type in types]
output_folders=[f"Data/Text/Queries/{type} Queries" for type in types]


for i in range(len(query_folders)):
    query_dir=query_folders[i]
    output_dir=output_folders[i]
    midiFolder2QueryText(query_dir,output_folder=output_dir,num_queries=num_queries,num_notes=num_notes,channel=channel)

100%|██████████| 30/30 [00:03<00:00,  7.73it/s]
100%|██████████| 30/30 [00:03<00:00,  9.47it/s]


# Evaluation

In [None]:
import subprocess
from Clarinet.utils.fast import fast

collection_dir="Data/Text/Collection/Original Collection" # Collection dir is always in text form

types=["Ideal","Noisy"]

query_length=5
stride_length=1
num_queries=2
num_processes=4

query_folders=[f"Data/Text/Queries/{type} Queries" for type in types]
output_folders=[f"Results/Queries/{type} Queries" for type in types]

def run_fasteval(query_dir,collection_dir,query_length,stride_length,output_dir="",num_queries="-1"):
    subprocess.run(["python","fasteval.py","-q",query_dir,"-l",str(query_length),"-c",collection_dir,"-s",str(stride_length),"-o",output_dir,"-n",str(num_queries)])

inputs=[]

for i in range(len(query_folders)):
    query_dir=query_folders[i]
    output_dir=output_folders[i] if i<len(output_folders) else query_dir
    if i<len(output_folders):
        output_dir=output_folders[i]
    else:
        output_dir=""

    inputs.append((query_dir,collection_dir,query_length,stride_length,output_dir,num_queries))

fast(run_fasteval,inputs,num_processes=num_processes)

# Analysis
### Compile Results

In [None]:
from Clarinet.evaluation import compile
metrics=["Recall@1","Recall@3","Recall@5","Recall@10","Mean Rank","Margin of Error"]
naming={"Pitch":[0,0.05,0.1,0.2,0.3],"Extra":[0],"Deleted":[0]}
result_dirs=["Results/Noisy Queries"]
# for loop for above
for result_dir in result_dirs:
    for metric in metrics:
            compile(result_dir,naming,metric)

### Plot Trends

In [None]:
from Clarinet.evaluation import trends
import os

folders=["Results/Analysis/Noisy Queries"]
# List of all folders inside the results folder
for folder in folders:
    # Find all csv files recursively inside result_dir
    csv_files=[]
    for root,dirs,files in os.walk(folder):
        for file in files:
            if file.endswith(".csv"):
                csv_files.append(os.path.join(root,file))
    # For each csv file, plot the trends
    for csv_file in csv_files:
        trends(csv_file,"Pitch")