# Pipeline

## Query Generation

### Ideal Queries

In [None]:
from Clarinet.utils.generatedata import midiFolder2Queries
import os

collection_dir="Data/Midi/Original Collection"
output_folder="Data/Midi/Ideal Queries"
channel=0
num_files=-1
num_notes=-1

midiFolder2Queries(collection_dir,output_folder=output_folder,num_files=num_files,num_notes=num_notes,channel=channel)

### Noisy Queries

In [None]:
from Clarinet.utils.generatedata import useFolder

query_dir="Data/Midi/Ideal Queries"
type="Noisy"
output_folder=f"Data/Midi/{type} Queries"
args=[]
kwargs={"channel":-1,
    "pitch":0,
    "extra":0,
    "delete":0,
    "velocity":0,
    "length":0}

useFolder(query_dir,type,output_folder,*args,**kwargs)

## Text Conversion
Ignore if collection is already in text format

In [None]:
from Clarinet.utils.generatedata import midiFolder2Text

collection_dir="Data/Midi/Original Collection"
output_folder="Text/Original Collection"
channel=0
num_files=-1
num_notes=-1

midiFolder2Text(collection_dir,output_folder=output_folder,num_files=num_files,num_notes=num_notes,channel=channel)

# Evaluation

In [None]:
import subprocess
from Clarinet.utils.fast import fast

collection_dir="Data/Text/Original Collection" # Collection dir is always in text form
query_folders=["Data/Midi/Original Queries"]
output_folders=["Results/Midi/Original Queries"]
query_length=5
stride_length=1
num_processes=4
query_num=2



def run_fasteval(query_dir,collection_dir,query_length,stride_length,output_dir="",query_num="-1"):
    subprocess.run(["python","fasteval.py","-q",query_dir,"-l",str(query_length),"-c",collection_dir,"-s",str(stride_length),"-o",output_dir,"-n",str(query_num)])

inputs=[]

for i in range(len(query_folders)):
    query_dir=query_folders[i]
    output_dir=output_folders[i] if i<len(output_folders) else query_dir
    if i<len(output_folders):
        output_dir=output_folders[i]
    else:
        output_dir=""

    inputs.append((query_dir,collection_dir,query_length,stride_length,output_dir,query_num))

fast(run_fasteval,inputs,num_processes=num_processes)

# Analysis
## Compile Results

In [None]:
from Clarinet.evaluation import compile
metrics=["Recall@1","Recall@3","Recall@5","Recall@10","Mean Rank","Margin of Error"]
naming={"Pitch":[0,0.05,0.1,0.2,0.3],"Extra":[0],"Deleted":[0]}
result_dirs=["Results/Noisy Queries"]
# for loop for above
for result_dir in result_dirs:
    for metric in metrics:
            compile(result_dir,naming,metric)

## Plot Trends

In [None]:
from Clarinet.evaluation import trends
import os

folders=["Results/Analysis/Noisy Queries"]
# List of all folders inside the results folder
for folder in folders:
    # Find all csv files recursively inside result_dir
    csv_files=[]
    for root,dirs,files in os.walk(folder):
        for file in files:
            if file.endswith(".csv"):
                csv_files.append(os.path.join(root,file))
    # For each csv file, plot the trends
    for csv_file in csv_files:
        trends(csv_file,"Pitch")