In [4]:
#%matplotlib qt
%matplotlib inline
import os
import sys
import numpy as np
import torch
import matplotlib.pyplot as plt
import json
import torchtext
import torch.nn as nn
import nltk
import pandas as pd
nltk.download('punkt')
cmap = plt.get_cmap('viridis')
from mpl_toolkits.mplot3d import Axes3D

[nltk_data] Downloading package punkt to /home/jupyter/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [6]:
import os
rootdir = '/home/stephane/automatic-data-generation/'
dataroot= rootdir + 'data/'
os.chdir(rootdir)

# Grid search 1 : general

python -m grid results_grid "python automatic_data_generation/train_and_eval_cvae.py -ng 2000 -ep 50" --dataset-size:int 125 250 500 1000 --seed:int 1 2 3 --temperature:float 0.5 1 2 --embedding-dimension:int 100 300 --hidden-size:int 256 512 --n_parallel 8

In [45]:
from pathlib import Path
import shutil
results_dir = 'results_grid'
runs = []

for filename in os.listdir(results_dir):
    filepath = Path(results_dir)/Path(filename)
    if filename.endswith('.pkl'):
        runs.append(torch.load(filepath))
        
runs = [run for run in runs if (run['args']['dataset_size']==125)]
        
data = {}
for arg_name in runs[0]['args'].keys():
    data[arg_name] = [run['args'][arg_name] for run in runs]
data['pickle']   = np.array([run['args']['pickle'].split('/')[-1].rstrip('.pkl') for run in runs])
    
data['bleu_quality']   = np.array([run['delexicalised_metrics']['bleu_scores']['quality']['avg'] for run in runs])
data['bleu_diversity'] = np.array([run['delexicalised_metrics']['bleu_scores']['diversity']['avg'] for run in runs])
data['accuracy']       = np.array([run['delexicalised_metrics']['intent_accuracy'] for run in runs])
data['originality']       = np.array([run['delexicalised_metrics']['originality']['avg'] for run in runs])
data['transfer']   = np.array([run['delexicalised_metrics']['transfer']['avg'] for run in runs])

data['metric'] = data['accuracy']**10 * data['originality'] #* data['bleu_quality']

df = pd.DataFrame(data, columns=['pickle', 'dataset_size', 'n_epochs', 'seed', #'x1', 'x2', 'k1', 'k2', 
                                 'hidden_size', 'embedding_dimension', 'temperature',
                                 'bleu_quality', 'bleu_diversity', 'accuracy', 'originality', 'transfer', 'metric'])
df = df.sort_values(by = 'metric', ascending=False)
print(len(runs))
df.head(100)

36


Unnamed: 0,pickle,dataset_size,n_epochs,seed,hidden_size,embedding_dimension,temperature,bleu_quality,bleu_diversity,accuracy,originality,transfer,metric
23,42754,125,50,2,512,300,0.5,0.673257,0.034804,0.938,0.222829,0.386131,0.11749
7,22314,125,50,1,512,300,0.5,0.672509,0.034813,0.969,0.144285,0.290224,0.105307
15,13588,125,50,3,512,300,0.5,0.678157,0.063655,0.944,0.168844,0.329153,0.094887
16,14347,125,50,2,512,100,0.5,0.667885,0.030559,0.942,0.163037,0.51263,0.0897
19,49592,125,50,2,256,100,0.5,0.61276,0.05346,0.9325,0.16025,0.567055,0.079669
2,24607,125,50,3,256,100,0.5,0.616589,0.061802,0.952,0.129616,0.46762,0.079255
6,38011,125,50,1,512,100,0.5,0.607011,0.063375,0.9205,0.169388,0.529006,0.073981
35,34433,125,50,3,512,100,0.5,0.571414,0.079712,0.893,0.228991,0.525362,0.073847
29,32580,125,50,2,256,300,0.5,0.687051,0.030944,0.953,0.108083,0.437325,0.066786
31,26031,125,50,2,512,300,2.0,0.69431,0.036054,0.9575,0.096807,0.385464,0.062704


# Grid search 2 : encoder vs decoder

python -m grid results_grid_2 "python automatic_data_generation/train_and_eval_cvae.py -ng 2000 -ep 50 --temperature 0.5" --dataset-size:int 125 250 500 1000 --seed:int 1 2 3 --temperature:float 0.5 --embedding-dimension:int 100 300 --hidden-size-encoder 256 512 --hidden-size-decoder 256 512 --num-layers-encoder 1 2 --num-layers-decoder 1 2 --n_parallel 5

In [55]:
from pathlib import Path
import shutil
results_dir = 'results_grid_2'
runs = []

for filename in os.listdir(results_dir):
    filepath = Path(results_dir)/Path(filename)
    if filename.endswith('.pkl'):
        runs.append(torch.load(filepath))
        
runs = [run for run in runs if (run['args']['dataset_size']==250)]
        
data = {}
for arg_name in runs[0]['args'].keys():
    data[arg_name] = [run['args'][arg_name] for run in runs]
data['pickle']   = np.array([run['args']['pickle'].split('/')[-1].rstrip('.pkl') for run in runs])
    
data['bleu_quality']   = np.array([run['delexicalised_metrics']['bleu_scores']['quality']['avg'] for run in runs])
data['bleu_diversity'] = np.array([run['delexicalised_metrics']['bleu_scores']['diversity']['avg'] for run in runs])
data['accuracy']       = np.array([run['delexicalised_metrics']['intent_accuracy']['avg'] for run in runs])
data['originality']       = np.array([run['delexicalised_metrics']['originality']['avg'] for run in runs])
data['transfer']   = np.array([run['delexicalised_metrics']['transfer']['metric']['avg'] for run in runs])

data['metric'] = data['accuracy']**10 * data['originality'] #* data['bleu_quality']

df = pd.DataFrame(data, columns=['pickle', 'dataset_size', 'n_epochs', 'seed', #'x1', 'x2', 'k1', 'k2', 
                                 'hidden_size_encoder', 'hidden_size_decoder', 'num_layers_encoder', 'num_layers_decoder',
                                 'bleu_quality', 'bleu_diversity', 'accuracy', 'originality', 'transfer', 'metric'])
df = df.sort_values(by = 'metric', ascending=False)
print(len(runs))
df.head(1000)

48


Unnamed: 0,pickle,dataset_size,n_epochs,seed,hidden_size_encoder,hidden_size_decoder,num_layers_encoder,num_layers_decoder,bleu_quality,bleu_diversity,accuracy,originality,transfer,metric
7,12519,250,50,2,256,512,1,1,0.638807,0.060557,0.935353,0.498346,0.304125,0.2554398
4,21592,250,50,2,512,512,1,1,0.662608,0.078846,0.951351,0.397274,0.279878,0.2412659
20,11228,250,50,1,512,512,1,1,0.528193,0.163881,0.877784,0.75745,0.541289,0.2056975
40,36685,250,50,1,256,256,1,1,0.667367,0.073008,0.972072,0.231009,0.272707,0.1740263
21,17767,250,50,3,512,512,1,1,0.54746,0.107754,0.907091,0.450109,0.445288,0.1697557
11,41793,250,50,1,256,512,1,1,0.573059,0.100616,0.896644,0.487992,0.415171,0.1639125
42,49857,250,50,2,256,512,1,1,0.518425,0.144624,0.864179,0.699652,0.533157,0.162524
1,20504,250,50,3,256,512,1,1,0.588241,0.083043,0.924229,0.351438,0.350621,0.1598246
13,49998,250,50,2,256,256,1,1,0.631828,0.051716,0.972812,0.206802,0.252737,0.1569798
0,23023,250,50,1,512,512,1,1,0.55731,0.095963,0.884699,0.509475,0.49333,0.14965


## Get all the seeds of the best run

In [28]:
irun = np.argmax(data['metric'])
print(irun)
run = runs[irun]
list(zip(run['generated']['intents'], runs[irun]['generated']['utterances']))[:10]

11


[('PlayMusic', 'play the album  short and sweet  by enduser  with last fm  '),
 ('RateBook',
  'would rate current  photograph  a four  and a best rating of 6  '),
 ('RateBook', 'rate this  book  a 3  '),
 ('PlayMusic', 'play talking to the universe  on zvooq  '),
 ('GetWeather', 'what is the weather in northway  vanuatu  '),
 ('BookRestaurant',
  'book a reservation for eight  at venetian theatre  in two am  '),
 ('GetWeather', 'what is the ocean breeze park  forecast in iowa  '),
 ('SearchCreativeWork', 'find a painting  called all the wrong reasons  '),
 ('AddToPlaylist', 'put this album  on hits of the 70s  '),
 ('RateBook',
  'i think following  saga  should have a rating value of three  and a best rating of 6  ')]

In [38]:
run['logs']['dev']['classifications']

{'BookRestaurant': {'BookRestaurant': 4309,
  'GetWeather': 520,
  'PlayMusic': 0,
  'RateBook': 37,
  'SearchCreativeWork': 0,
  'SearchScreeningEvent': 98,
  'AddToPlaylist': 36,
  'None': 0},
 'GetWeather': {'BookRestaurant': 316,
  'GetWeather': 4448,
  'PlayMusic': 110,
  'RateBook': 9,
  'SearchCreativeWork': 0,
  'SearchScreeningEvent': 107,
  'AddToPlaylist': 10,
  'None': 0},
 'PlayMusic': {'BookRestaurant': 21,
  'GetWeather': 0,
  'PlayMusic': 4582,
  'RateBook': 43,
  'SearchCreativeWork': 0,
  'SearchScreeningEvent': 47,
  'AddToPlaylist': 307,
  'None': 0},
 'RateBook': {'BookRestaurant': 2,
  'GetWeather': 4,
  'PlayMusic': 0,
  'RateBook': 4878,
  'SearchCreativeWork': 40,
  'SearchScreeningEvent': 72,
  'AddToPlaylist': 4,
  'None': 0},
 'SearchCreativeWork': {'BookRestaurant': 0,
  'GetWeather': 3,
  'PlayMusic': 22,
  'RateBook': 28,
  'SearchCreativeWork': 4938,
  'SearchScreeningEvent': 5,
  'AddToPlaylist': 4,
  'None': 0},
 'SearchScreeningEvent': {'BookRestauran

In [85]:
ibest = np.argmax(data['metric'])
iruns = [ibest]
best = runs[ibest]
best_args = [value for arg, value in best['args'].items() if arg not in ['seed','pickle']]

for irun, run in enumerate(runs):
    if irun==ibest:
        continue
    args = [value for arg, value in run['args'].items() if arg not in ['seed','pickle']]
    #print(args)
    if args == best_args:
        iruns.append(irun)

pickles = [runs[i]['args']['pickle'].rstrip('.pkl') for i in iruns]
print(pickles)

# Save selected runs

savedir = results_dir + '/selected_runs_{}'.format(best['args']['dataset_size'])
if os.path.exists(savedir):
    ! sudo rm -r {savedir} # shutil.rmtree(savedir)
! sudo mkdir {savedir}

for pickle in pickles:
    ! sudo cp -r {pickle} {savedir}
    ! sudo cp {pickle}.pkl {savedir}

print('done')

['results_grid/04810', 'results_grid/27623', 'results_grid/13167']
done
