# Ablation Study Calculations
This notebook contains all my code to run the ablation study and to compute the results

In [15]:
import sys
import os
import json
from dotenv import load_dotenv
from typing import Literal

load_dotenv()
DATA_DIR = os.getenv("DATA_DIR_PATH")

In [9]:
notebook_dir = os.getcwd() # Get the current working directory of the notebook
src_dir = os.path.abspath(os.path.join(notebook_dir, '..', '..')) # Construct the path to the src directory
sys.path.append(src_dir) # Add the src directory to the system path

from src.pipelines.pipeline_runner import run_data_through_generator
from src.utils import load_json_to_pipelinedata, save_objects_as_json

## Complete the ablation runs

In [21]:
what_to_ablate = Literal["query-rewriter", "sparse-retriever", "reranker", "sample-questions", "one-shot"] 

# What do you wish to get rid of in this run?
what_to_ablate = "query-rewriter"

ablation_params = {
        "no_rewriter": False,
        "no_sparse_retriever": False,
        "no_reranker": False,
        "no_sample_questions": False,
        "no_one_shot": False
    }

if what_to_ablate == "query-rewriter":
    input_file = os.path.join("..", "..", "data", "datasets", "ablation", "retrieved-(no-query-rewrite).json")
    output_file = os.path.join("..", "..", "data", "complete_runs", "ablation-llama3-no-query-rewrite.json")
    ablation_params["no_rewriter"] = True
elif what_to_ablate == "sparse-retriever":
    input_file = os.path.join("..", "..", "data", "datasets", "ablation", "retrieved-(no-sparse-retrieval).json")
    output_file = os.path.join("..", "..", "data", "complete_runs", "ablation-llama3-no-sparse-retriever.json")
    ablation_params["no_sparse_retriever"] = True
elif what_to_ablate == "reranker":
    input_file = os.path.join("..", "..", "data", "datasets", "ablation", "retrieved-(no-query-rewrite).json")
    output_file = os.path.join("..", "..", "data", "complete_runs", "ablation-llama3-no-reranker.json")
    ablation_params["no_reranker"] = True
elif what_to_ablate == "sample-questions":
    input_file = os.path.join("..", "..", "data", "datasets", "main", "test-prompts-rewritten-retrieved.json")
    output_file = os.path.join("..", "..", "data", "complete_runs", "ablation-llama3-no-sample-questions.json")
    ablation_params["no_sample_questions"] = True
elif what_to_ablate == "one-shot":
    input_file = os.path.join("..", "..", "data", "datasets", "main", "test-prompts-rewritten-retrieved.json")
    output_file = os.path.join("..", "..", "data", "complete_runs", "ablation-llama3-no-one-shot.json")
    ablation_params["no_one_shot"] = True

with open(input_file, 'r') as file:
    data = json.load(file)

incomplete_pipeline_data = load_json_to_pipelinedata(data)

res = run_data_through_generator(incomplete_pipeline_data, "llama3-70b-8192", ablation_params=ablation_params, verbose=False)

# save_objects_as_json(res, output_file, rewrite=True)




False


## Ablation study automatic evaluation