# Pipeline
MIDI → User Model → Used Midi → Query Text

### Imports

In [2]:
from Clarinet.utils.generatedata import genTextQueries,genMidiQueries

# Ideal User

In [3]:
from Clarinet.utils.generatedata import genTextQueries

collection_dir="Data/Midi/Collection/Original Collection"
type="Ideal"
output_folder=f"Data/Midi/Queries/{type} Queries"

delete_data=True # If delete_data is True, all files in output_folder are deleted. 
                # If delete_data is False, all files in output_folder are moved to discarded.
num_notes=15
num_queries=2

args=[]
kwargs={"channel":[0]}

genTextQueries(collection_dir,type,output_folder,delete_data,num_notes,num_queries,*args,**kwargs)

100%|██████████| 2/2 [00:00<00:00, 13.62it/s]
100%|██████████| 1/1 [00:00<00:00,  6.36it/s]


# Noisy User

In [6]:
from Clarinet.utils.generatedata import genTextQueries

collection_dir="Data/Midi/Collection/Original Collection"
type="Noisy"
output_folder=f"Data/Midi/Queries/{type} Queries"

delete_data=True # If delete_data is True, all files in output_folder are deleted. 
                # If delete_data is False, all files in output_folder are moved to discarded.
num_notes=15
num_queries=-1

args=[]
kwargs={"channel":[0],
"pitch":[0,0.05,0.1,0.2,0.3],
"extra":[0,0.05,0.1,0.2,0.3] ,
"delete":[0,0.05,0.1,0.15,0.2],
"velocity":[0],
"length":[0]}

genTextQueries(collection_dir,type,output_folder,delete_data,num_notes,num_queries,**kwargs)


100%|██████████| 909/909 [01:00<00:00, 14.98it/s]
 12%|█▏        | 111/909 [00:07<00:51, 15.58it/s]
  1%|          | 1/125 [01:07<2:20:10, 67.83s/it]


KeyboardInterrupt: 

# Collection to Text
Skip if Collection already converted to text

In [None]:
from Clarinet.utils.generatedata import midiFolder2Text

collection_dir="Data/Midi/Collection/Original Collection"
output_folder="Data/Text/Original Collection"
channel=0
num_files=-1
num_notes=-1

midiFolder2Text(collection_dir,output_folder=output_folder,num_files=num_files,num_notes=num_notes,channel=channel)

# Evaluation

In [None]:
import subprocess
from Clarinet.utils.fast import fast
from tqdm import tqdm
import itertools

num_processes=4 # CPUs/4 (Check Clarinet.evaluation.evaluate.py, line 15)

query_folder=output_folder.replace("Midi","Text") # List of folders to evaluate
collection_dir=collection_dir.replace("Midi","Text") # Always in TEXT form 

query_length=-1 # Query Length
stride_length=1 # Stride Length

query_num=-1 # Number of queries to evaluate
collection_num=-1

kwargs=kwargs

dont_convert="Text/" in query_folder

def run_fasteval(query_dir,collection_dir,query_length,stride_length,output_dir="",query_num="-1",dont_convert=False,collection_num=-1):
    if dont_convert:
        subprocess.run(["python3","fasteval.py","-q",query_dir,"-l",str(query_length),"-c",collection_dir,"-s",str(stride_length),"-o",output_dir,"-n",str(query_num),"-t","-a",str(collection_num)])
    else:
        subprocess.run(["python3","fasteval.py","-q",query_dir,"-l",str(query_length),"-c",collection_dir,"-s",str(stride_length),"-o",output_dir,"-n",str(query_num),"-a",str(collection_num)])


# Save query folders in the form Data/Noisy Queries/pitch/extra/deleted

keys=list(kwargs.keys())
values=list(kwargs.values())
product=list(itertools.product(*values))

inputs=[]

for tup in product:
    args=[]
    kwargs={keys[i]:tup[i] for i in range(len(keys))}

    name_list=[f"{key.capitalize()} {value}" for key,value in kwargs.items()]
    query_dir=f"{query_folder}/{'/'.join(name_list)}"

    output_dir=f"{query_dir.replace('Data/Text/Queries','Results')}"

    inputs.append((query_dir,collection_dir,query_length,stride_length,output_dir,query_num,dont_convert,collection_num))

fast(run_fasteval,inputs,num_processes=num_processes)

# Analysis
### Compile Results

In [None]:
from Clarinet.evaluation import compile
metrics=["Recall@1","Recall@3","Recall@5","Recall@10","Mean Rank","Margin of Error"]
naming={"Pitch":[0,0.05,0.1,0.2,0.3],"Extra":[0],"Deleted":[0]}
result_dirs=["Results/Noisy Queries"]
# for loop for above
for result_dir in result_dirs:
    for metric in metrics:
            compile(result_dir,naming,metric)

### Plot Trends

In [None]:
from Clarinet.evaluation import trends
import os

folders=["Results/Analysis/Noisy Queries"]
# List of all folders inside the results folder
for folder in folders:
    # Find all csv files recursively inside result_dir
    csv_files=[]
    for root,dirs,files in os.walk(folder):
        for file in files:
            if file.endswith(".csv"):
                csv_files.append(os.path.join(root,file))
    # For each csv file, plot the trends
    for csv_file in csv_files:
        trends(csv_file,"Pitch")