# Experiment: Check how model influence accuracy

Parameters:
- model size (number of weights / deep)
- embedding size
- training set

In [1]:
# https://huggingface.co/models?pipeline_tag=text-generation&p=1

MODELS = [
    'sshleifer/tiny-gpt2', 'lvwerra/gpt2-imdb', 'LorenzoDeMattei/GePpeTto', 'microsoft/CodeGPT-small-py', 'minimaxir/magic-the-gathering',
    'rinna/japanese-gpt2-medium', 'sberbank-ai/rugpt3small_based_on_gpt2', 'shtoshni/gpt2-chess-uci', 'uer/gpt2-chinese-poem',
    'ceostroff/harry-potter-gpt2-fanfiction', 'chrisliu298/arxiv_ai_gpt2', 'microsoft/DialoGPT-medium',
    'gpt2', 'gpt2-medium', 'gpt2-large', 'distilgpt2',
]

# TASK_NAMES = ['mnist-add-10', 'mnist-add-100', 'mnist', 'bit-xor-100']

# MODEL_SIZE = {
#     "gpt2": ...,
#     "gpt2-medium": 2_046_043,
#     'tiny-gpt2': 3_911,
# }

In [2]:
# import os
# from tqdm.notebook import tqdm

# jobs = []
# for _ in range(3):
#     for task in TASK_NAMES:
#         for model in MODELS:
#             jobs.append((_, task, model))

# with tqdm(leave=False) as progress:
#     pass
#     for _, task, model in jobs:
#         progress.set_description(f"{task}: {model}")
#         os.system(f"python3 ../run.py --tag exp-model-size --task-name {task} --model {model} ../configs/exp-model-size/gpt2.json")
    

## Collect experiments results

In [3]:
import wandb
import pandas as pd


api = wandb.Api()

# Change oreilly-class/cifar to <entity/project-name>
runs = api.runs("dl-project2/universal-computation-engine")

results = []
for run in runs:
    if 'exp-model-size' in run.tags and 'completed' in run.tags:
        state = run.name, run.config['task']+"-"+str(run.config['n']), run.config['model_name'], run.summary.get('Final Accuracy'), run.config.get('model_weights'), run.config.get('model_all_weights')
        results.append(state)

df = pd.DataFrame(results, columns=['name', 'task', 'model_name', 'final_accuracy', 'trained_weights', 'all_weights'])
df['%_weights'] = 100 * df['trained_weights'] / df['all_weights']
df

Unnamed: 0,name,task,model_name,final_accuracy,trained_weights,all_weights,%_weights
0,05-28-fpt-20210528-1241,mnist-10,sshleifer/tiny-gpt2,0.11490,84.0,102778.0,0.081730
1,05-28-fpt-20210528-1237,mnist-10,distilgpt2,0.91650,40714.0,81933322.0,0.049692
2,05-28-fpt-20210528-1234,bit-xor-10,sshleifer/tiny-gpt2,0.50059,102.0,102796.0,0.099226
3,05-28-fpt-20210528-1226,bit-xor-10,gpt2,0.94996,62228.0,124463636.0,0.049997
4,05-28-fpt-20210528-1218,bit-xor-10,uer/gpt2-chinese-poem,0.99968,62228.0,103190036.0,0.060304
...,...,...,...,...,...,...,...
97,05-27-fpt-20210527-1114,mnist-add-10,gpt2,1.98910,,,
98,05-27-fpt-20210527-1045,mnist-add-10,gpt2,1.99940,,,
99,05-27-fpt-20210527-1038,mnist-add-10,gpt2,2.68520,,,
100,05-27-fpt-20210527-1025,mnist-add-10,gpt2,1.90330,,,


In [4]:
from IPython.display import display

for task in set(df.task):
    df_task = df[df.task == task].groupby(['task', 'model_name']). \
        agg({'final_accuracy': ['size', 'mean', 'std'], 'all_weights': 'mean', 'trained_weights': ['max']}). \
        sort_values(by=('final_accuracy', 'mean'), ascending=('mnist-add' in task))
    display(df_task)

Unnamed: 0_level_0,Unnamed: 1_level_0,final_accuracy,final_accuracy,final_accuracy,all_weights,trained_weights
Unnamed: 0_level_1,Unnamed: 1_level_1,size,mean,std,mean,max
task,model_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
mnist-add-10,ceostroff/harry-potter-gpt2-fanfiction,9,1.795033,0.142095,125112667.0,1497691.0
mnist-add-10,chrisliu298/arxiv_ai_gpt2,1,1.9036,,775154011.0,1308251.0
mnist-add-10,distilgpt2,2,1.90685,0.048013,82585435.0,1479259.0
mnist-add-10,gpt2,9,2.045522,0.288062,125112667.0,1497691.0
mnist-add-10,gpt2-medium,5,2.11652,0.506292,355720283.0,2046043.0
mnist-add-10,sberbank-ai/rugpt3small_based_on_gpt2,3,2.156867,0.37079,125904475.0,2284123.0
mnist-add-10,lvwerra/gpt2-imdb,4,2.177775,0.239575,125112667.0,1497691.0
mnist-add-10,microsoft/CodeGPT-small-py,3,2.398633,0.185609,124916059.0,1497691.0
mnist-add-10,uer/gpt2-chinese-poem,3,2.420567,0.31296,103839067.0,1497691.0
mnist-add-10,LorenzoDeMattei/GePpeTto,4,2.472475,0.519733,109555291.0,1497691.0


Unnamed: 0_level_0,Unnamed: 1_level_0,final_accuracy,final_accuracy,final_accuracy,all_weights,trained_weights
Unnamed: 0_level_1,Unnamed: 1_level_1,size,mean,std,mean,max
task,model_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
mnist-10,microsoft/DialoGPT-medium,1,0.9772,,354850826.0,128010.0
mnist-10,ceostroff/harry-potter-gpt2-fanfiction,2,0.96285,0.011667,124460554.0,59146.0
mnist-10,gpt2,2,0.9594,0.004384,124460554.0,59146.0
mnist-10,rinna/japanese-gpt2-medium,1,0.9585,,336155658.0,128010.0
mnist-10,sberbank-ai/rugpt3small_based_on_gpt2,2,0.95375,0.010394,125252362.0,59146.0
mnist-10,microsoft/CodeGPT-small-py,2,0.95065,0.034436,124263946.0,59146.0
mnist-10,lvwerra/gpt2-imdb,2,0.94605,0.009405,124460554.0,59146.0
mnist-10,gpt2-medium,1,0.9458,,354850826.0,128010.0
mnist-10,uer/gpt2-chinese-poem,2,0.942,0.038749,103186954.0,59146.0
mnist-10,LorenzoDeMattei/GePpeTto,2,0.9316,0.013435,108903178.0,59146.0


Unnamed: 0_level_0,Unnamed: 1_level_0,final_accuracy,final_accuracy,final_accuracy,all_weights,trained_weights
Unnamed: 0_level_1,Unnamed: 1_level_1,size,mean,std,mean,max
task,model_name,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
bit-xor-10,sberbank-ai/rugpt3small_based_on_gpt2,1,0.99991,,125255444.0,62228.0
bit-xor-10,microsoft/CodeGPT-small-py,1,0.99979,,124267028.0,62228.0
bit-xor-10,uer/gpt2-chinese-poem,1,0.99968,,103190036.0,62228.0
bit-xor-10,gpt2,1,0.94996,,124463636.0,62228.0
bit-xor-10,lvwerra/gpt2-imdb,2,0.699395,0.283571,124463636.0,62228.0
bit-xor-10,minimaxir/magic-the-gathering,2,0.575805,0.104334,1687188.0,8340.0
bit-xor-10,distilgpt2,1,0.50163,,81936404.0,43796.0
bit-xor-10,sshleifer/tiny-gpt2,2,0.50078,0.000269,102796.0,102.0
bit-xor-10,shtoshni/gpt2-chess-uci,1,0.4995,,85532180.0,62228.0


In [5]:
# for r in results:
#     if r[1] == 'mnist-add-10' and r[2] == 'ceostroff/harry-potter-gpt2-fanfiction':
#         print(r)


In [6]:
# run.config
# run.summary.keys()
# run.name