# Imports & Preparation Code


In [None]:
import json
import os
from collections import defaultdict
from glob import glob
from statistics import mean
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import colors as mcolors
from tqdm import tqdm

from ..g_index_final import Benchmark
from ..utils import AVAILABLE_TEMPLATES, Dataset, DatasetDetails, get_template,cache_dd


pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
sns.set_style("whitegrid")
figsize=(8,8)
plt.rcParams.update({
    "lines.color": "black",
    "patch.edgecolor": "white",
    "axes.edgecolor": "0.15",
    "axes.linewidth":1.25,
    "axes.grid":True,
#     "text.color": "white",
#     "axes.facecolor": "white",
#     "axes.edgecolor": "lightgray",
#     "axes.labelcolor": "white",
#     "xtick.color": "white",
#     "ytick.color": "white",
#     "grid.color": "grey",
#     "figure.facecolor": "black",
#     "figure.edgecolor": "black",
    "savefig.facecolor": "white",
    "savefig.edgecolor": "white"
    })
alpha=0.85

In [None]:
EXP_DIR = '../experiments/'
TEMPLATE_DIR = '../templates/'
exp_files = glob.glob(EXP_DIR + "*.json")
template_files = glob.glob(TEMPLATE_DIR + "*.json")

In [None]:
colors = ["#375E97","#FB6542","#FFBB00","#3F681C"]
models = ['gpt2-774M','gpt2-345M','gpt2-1.5B','gptneo-2.7B']
model_colormap = {model:color for color,model in zip(colors,models) }
name_correction = {'gpt2-medium': 'gpt2-345M',
                   'gpt2-large': 'gpt2-774M',
                   'gpt2-xl': 'gpt2-1.5B',
                   'EleutherAI/gpt-neo-2.7B': 'gptneo-2.7B'}

# Simulations

## G-index Vs Training Samples ( for varying Ptheta )

### Setting up Simulation Variables

In [None]:
n_samples = [ 20 * i for i in range(1,33) ] 
PThetaValues = np.around(np.arange(0.2, 1.2, 0.2),2).tolist()
TaskDomains = [{"name":task_name,"num_samples":5} for task_name in atemps]
num_domains = 16
iters_per_n_samples = 20
dd_cache = cache_dd()

### Running the Simulations

In [None]:

results_by_pt_train_samples= defaultdict(dict)
for pt in PThetaValues:
    pt_dict = {curricula_name:pt for curricula_name in available_templates()}
    nsamples_dict = {"n_samples":[],"g_index":[]}
    for ns in tqdm(n_samples):
        for itp in range(iters_per_n_samples):
            sample_distribution_dict = [ {"name":temp_name,"num_samples":n_sam}  for temp_name,n_sam in zip(atemps,resplit( ns * num_domains,num_domains )) ]
            default_values_dict = {"CurriculaDomains": sample_distribution_dict,"taskDomains":td,
                                   "P":1e-3,"E":1e2,"PTheta":pt_dict,"use_dd_cache":True,"dd_cache":dd_cache}
            bmark = ModifiedBenchmark(**default_values_dict)
            nsamples_dict["n_samples"].append(ns)
            nsamples_dict["g_index"].append(bmark.GetExperimentIndices().GIndex)

    results_by_pt_train_samples[pt] = nsamples_dict

# What is G-index?

Definition here


![](../images/benchmark_t.png)