In [62]:
#%matplotlib qt
%matplotlib inline
import os
import sys
import numpy as np
import torch
import matplotlib.pyplot as plt
import json
import torchtext
import torch.nn as nn
import nltk
import pandas as pd

nltk.download('punkt')
cmap = plt.get_cmap('viridis')
from mpl_toolkits.mplot3d import Axes3D

[nltk_data] Downloading package punkt to /home/jupyter/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [6]:
import os
rootdir = '/home/stephane/automatic-data-generation/'
dataroot= rootdir + 'data/'
os.chdir(rootdir)

# Grid search 1 : general

python -m grid results_grid "python automatic_data_generation/train_and_eval_cvae.py -ng 2000 -ep 50" --dataset-size:int 125 250 500 1000 --seed:int 1 2 3 --temperature:float 0.5 1 2 --embedding-dimension:int 100 300 --hidden-size:int 256 512 --n_parallel 8

In [45]:
from pathlib import Path
import shutil
results_dir = 'results_grid'
runs = []

for filename in os.listdir(results_dir):
    filepath = Path(results_dir)/Path(filename)
    if filename.endswith('.pkl'):
        runs.append(torch.load(filepath))
        
runs = [run for run in runs if (run['args']['dataset_size']==125)]
        
data = {}
for arg_name in runs[0]['args'].keys():
    data[arg_name] = [run['args'][arg_name] for run in runs]
data['pickle']   = np.array([run['args']['pickle'].split('/')[-1].rstrip('.pkl') for run in runs])
    
data['bleu_quality']   = np.array([run['delexicalised_metrics']['bleu_scores']['quality']['avg'] for run in runs])
data['bleu_diversity'] = np.array([run['delexicalised_metrics']['bleu_scores']['diversity']['avg'] for run in runs])
data['accuracy']       = np.array([run['delexicalised_metrics']['intent_accuracy'] for run in runs])
data['originality']       = np.array([run['delexicalised_metrics']['originality']['avg'] for run in runs])
data['transfer']   = np.array([run['delexicalised_metrics']['transfer']['avg'] for run in runs])

data['metric'] = data['accuracy']**10 * data['originality'] #* data['bleu_quality']

df = pd.DataFrame(data, columns=['pickle', 'dataset_size', 'n_epochs', 'seed', #'x1', 'x2', 'k1', 'k2', 
                                 'hidden_size', 'embedding_dimension', 'temperature',
                                 'bleu_quality', 'bleu_diversity', 'accuracy', 'originality', 'transfer', 'metric'])
df = df.sort_values(by = 'metric', ascending=False)
print(len(runs))
df.head(100)

36


Unnamed: 0,pickle,dataset_size,n_epochs,seed,hidden_size,embedding_dimension,temperature,bleu_quality,bleu_diversity,accuracy,originality,transfer,metric
23,42754,125,50,2,512,300,0.5,0.673257,0.034804,0.938,0.222829,0.386131,0.11749
7,22314,125,50,1,512,300,0.5,0.672509,0.034813,0.969,0.144285,0.290224,0.105307
15,13588,125,50,3,512,300,0.5,0.678157,0.063655,0.944,0.168844,0.329153,0.094887
16,14347,125,50,2,512,100,0.5,0.667885,0.030559,0.942,0.163037,0.51263,0.0897
19,49592,125,50,2,256,100,0.5,0.61276,0.05346,0.9325,0.16025,0.567055,0.079669
2,24607,125,50,3,256,100,0.5,0.616589,0.061802,0.952,0.129616,0.46762,0.079255
6,38011,125,50,1,512,100,0.5,0.607011,0.063375,0.9205,0.169388,0.529006,0.073981
35,34433,125,50,3,512,100,0.5,0.571414,0.079712,0.893,0.228991,0.525362,0.073847
29,32580,125,50,2,256,300,0.5,0.687051,0.030944,0.953,0.108083,0.437325,0.066786
31,26031,125,50,2,512,300,2.0,0.69431,0.036054,0.9575,0.096807,0.385464,0.062704


# Grid search 2 : encoder vs decoder

python -m grid results_grid_2 "python automatic_data_generation/train_and_eval_cvae.py -ng 2000 -ep 50 --temperature 0.5" --dataset-size:int 125 250 500 1000 --seed:int 1 2 3 --temperature:float 0.5 --embedding-dimension:int 100 300 --hidden-size-encoder 256 512 --hidden-size-decoder 256 512 --num-layers-encoder 1 2 --num-layers-decoder 1 2 --n_parallel 5

In [68]:
from pathlib import Path
import shutil
results_dir = 'results_grid_2'
runs = []

for filename in os.listdir(results_dir):
    filepath = Path(results_dir)/Path(filename)
    if filename.endswith('.pkl'):
        runs.append(torch.load(filepath))
        
runs = [run for run in runs if (run['args']['dataset_size']==500)]
        
data = {}
for arg_name in runs[0]['args'].keys():
    data[arg_name] = [run['args'][arg_name] for run in runs]
data['pickle']   = np.array([run['args']['pickle'].split('/')[-1].rstrip('.pkl') for run in runs])
    
data['bleu_quality']   = np.array([run['delexicalised_metrics']['bleu_scores']['quality']['avg'] for run in runs])
data['bleu_diversity'] = np.array([run['delexicalised_metrics']['bleu_scores']['diversity']['avg'] for run in runs])
data['accuracy']       = np.array([run['delexicalised_metrics']['intent_accuracy']['avg'] for run in runs])
data['originality']       = np.array([run['delexicalised_metrics']['originality']['avg'] for run in runs])
data['transfer']   = np.array([run['delexicalised_metrics']['transfer']['metric']['avg'] for run in runs])

data['metric'] = data['accuracy']**10 * data['originality'] #* data['bleu_quality']

df = pd.DataFrame(data, columns=['pickle', 'dataset_size', 'n_epochs', 'seed', 'embedding_dimension',#'x1', 'x2', 'k1', 'k2', 
                                 'hidden_size_encoder', 'hidden_size_decoder', 'num_layers_encoder', 'num_layers_decoder',
                                 'bleu_quality', 'bleu_diversity', 'accuracy', 'originality', 'transfer', 'metric'])
df = df.sort_values(by = 'metric', ascending=False)
print(len(runs))
df.head(1000)

48


Unnamed: 0,pickle,dataset_size,n_epochs,seed,embedding_dimension,hidden_size_encoder,hidden_size_decoder,num_layers_encoder,num_layers_decoder,bleu_quality,bleu_diversity,accuracy,originality,transfer,metric
2,15832,500,50,1,100,512,256,1,1,0.611668,0.097473,0.974891,0.487479,0.235702,0.3780215
0,10861,500,50,3,100,256,256,1,1,0.626343,0.10918,0.965018,0.521307,0.253679,0.3651299
41,16756,500,50,2,100,256,256,1,1,0.654596,0.094833,0.972463,0.459773,0.203465,0.3477562
32,31140,500,50,3,100,512,256,1,1,0.590995,0.127982,0.966964,0.473487,0.225481,0.3383837
30,12051,500,50,1,100,256,256,1,1,0.664477,0.082993,0.965074,0.472456,0.272588,0.3311073
25,21959,500,50,2,100,512,256,1,1,0.647282,0.10284,0.955318,0.442412,0.285821,0.280097
17,6875,500,50,1,300,256,256,1,1,0.686757,0.045655,0.989671,0.253479,0.11737,0.2284814
5,8722,500,50,3,300,512,256,1,1,0.60865,0.101259,0.997937,0.217189,0.045993,0.2127495
29,1654,500,50,2,300,512,256,1,1,0.660595,0.044978,0.991207,0.224995,0.108662,0.2059758
35,5967,500,50,3,300,256,256,1,1,0.607678,0.065273,0.983011,0.238574,0.085404,0.2010054


# Grid search 3 : transfer

In [82]:
from pathlib import Path
import shutil
results_dir = 'results_grid_3'
runs = []

for filename in os.listdir(results_dir):
    filepath = Path(results_dir)/Path(filename)
    if filename.endswith('.pkl'):
        runs.append(torch.load(filepath))
        
runs = [run for run in runs if (run['args']['dataset_size']==250)]
        
data = {}
for arg_name in runs[0]['args'].keys():
    data[arg_name] = [run['args'][arg_name] for run in runs]
data['pickle']   = np.array([run['args']['pickle'].split('/')[-1].rstrip('.pkl') for run in runs])
    
data['bleu_quality']   = np.array([run['delexicalised_metrics']['bleu_scores']['quality']['avg'] for run in runs])
data['bleu_diversity'] = np.array([run['delexicalised_metrics']['bleu_scores']['diversity']['avg'] for run in runs])
data['accuracy']       = np.array([run['delexicalised_metrics']['intent_accuracy']['avg'] for run in runs])
data['originality']       = np.array([run['delexicalised_metrics']['originality']['avg'] for run in runs])
data['transfer']   = np.array([run['delexicalised_metrics']['transfer']['metric']['avg'] for run in runs])

data['metric'] = data['accuracy']**10 #* data['originality'] #* data['bleu_quality']

df = pd.DataFrame(data, columns=['pickle', 'dataset_size', 'n_epochs', 'seed', 'embedding_dimension',#'x1', 'x2', 'k1', 'k2', 
                                 'none_size', 'cosine_threshold',
                                 'bleu_quality', 'bleu_diversity', 'accuracy', 'originality', 'transfer', 'metric'])
df = df.sort_values(by = 'metric', ascending=False)
print(len(runs))
df.head(1000)

28


Unnamed: 0,pickle,dataset_size,n_epochs,seed,embedding_dimension,none_size,cosine_threshold,bleu_quality,bleu_diversity,accuracy,originality,transfer,metric
13,31310,250,50,2,100,500,0.95,0.666482,0.058162,0.989843,0.464415,0.143502,0.9029525
10,11870,250,50,2,100,250,0.95,0.637181,0.037398,0.987437,0.377183,0.152615,0.8812404
11,11922,250,50,1,100,125,0.95,0.623607,0.051992,0.986772,0.334068,0.166623,0.8753237
25,952,250,50,2,100,125,0.95,0.673238,0.034711,0.978018,0.270446,0.096345,0.8007007
21,48834,250,50,2,100,125,0.9,0.591681,0.043198,0.956525,0.380788,0.170883,0.6411544
6,23047,250,50,1,100,250,0.95,0.645437,0.065808,0.946041,0.4473,0.182426,0.5742489
17,43521,250,50,1,100,125,0.9,0.639747,0.070236,0.945408,0.345341,0.193693,0.5704159
8,45045,250,50,1,100,250,0.9,0.548576,0.073697,0.903201,0.536315,0.331323,0.361279
2,25629,250,50,1,100,125,0.8,0.571981,0.071293,0.899668,0.354612,0.281371,0.3473926
26,8860,250,50,2,100,250,0.9,0.650276,0.067675,0.892524,0.541025,0.289169,0.3207757


# Get all the seeds of the best run

In [81]:
irun = np.argmax(data['metric'])
print(irun)
run = runs[irun]
list(zip(run['generated']['intents'], runs[irun]['generated']['delexicalised']))[:20]

8


[('AddToPlaylist', 'add _artist_ to _playlist_owner_ _playlist_ list'),
 ('None',
  'was it _weatherforecasttemperaturename_ in _weatherforecastcountry_ _weatherforecaststartdatetime_'),
 ('SearchCreativeWork', 'find the _object_type_ _object_name_ play on'),
 ('PlayMusic', 'play the _sort_ sort by _artist_ on _service_'),
 ('None', 'any chance it it will be _forecast_temperature_name_'),
 ('GetWeather', 'what are the weather forecast for _city_ _state_'),
 ('PlayMusic',
  '_party_size_description_ play my _playlist_ playlist on _service_'),
 ('SearchScreeningEvent', 'what are the _object_type_ for _location_name_'),
 ('None', 'show me the weather forecast for the city of _city_'),
 ('AddToPlaylist', 'add this _music_item_ to _playlist_'),
 ('PlayMusic',
  'play the _sort_ by _artist_ from _music_item_ from the _year_'),
 ('SearchScreeningEvent',
  'is _movie_name_ being played at the _location_name_'),
 ('AddToPlaylist', 'add this _music_item_ to _playlist_'),
 ('SearchCreativeWork', 

In [38]:
run['logs']['dev']['classifications']

{'BookRestaurant': {'BookRestaurant': 4309,
  'GetWeather': 520,
  'PlayMusic': 0,
  'RateBook': 37,
  'SearchCreativeWork': 0,
  'SearchScreeningEvent': 98,
  'AddToPlaylist': 36,
  'None': 0},
 'GetWeather': {'BookRestaurant': 316,
  'GetWeather': 4448,
  'PlayMusic': 110,
  'RateBook': 9,
  'SearchCreativeWork': 0,
  'SearchScreeningEvent': 107,
  'AddToPlaylist': 10,
  'None': 0},
 'PlayMusic': {'BookRestaurant': 21,
  'GetWeather': 0,
  'PlayMusic': 4582,
  'RateBook': 43,
  'SearchCreativeWork': 0,
  'SearchScreeningEvent': 47,
  'AddToPlaylist': 307,
  'None': 0},
 'RateBook': {'BookRestaurant': 2,
  'GetWeather': 4,
  'PlayMusic': 0,
  'RateBook': 4878,
  'SearchCreativeWork': 40,
  'SearchScreeningEvent': 72,
  'AddToPlaylist': 4,
  'None': 0},
 'SearchCreativeWork': {'BookRestaurant': 0,
  'GetWeather': 3,
  'PlayMusic': 22,
  'RateBook': 28,
  'SearchCreativeWork': 4938,
  'SearchScreeningEvent': 5,
  'AddToPlaylist': 4,
  'None': 0},
 'SearchScreeningEvent': {'BookRestauran

In [85]:
ibest = np.argmax(data['metric'])
iruns = [ibest]
best = runs[ibest]
best_args = [value for arg, value in best['args'].items() if arg not in ['seed','pickle']]

for irun, run in enumerate(runs):
    if irun==ibest:
        continue
    args = [value for arg, value in run['args'].items() if arg not in ['seed','pickle']]
    #print(args)
    if args == best_args:
        iruns.append(irun)

pickles = [runs[i]['args']['pickle'].rstrip('.pkl') for i in iruns]
print(pickles)

# Save selected runs

savedir = results_dir + '/selected_runs_{}'.format(best['args']['dataset_size'])
if os.path.exists(savedir):
    ! sudo rm -r {savedir} # shutil.rmtree(savedir)
! sudo mkdir {savedir}

for pickle in pickles:
    ! sudo cp -r {pickle} {savedir}
    ! sudo cp {pickle}.pkl {savedir}

print('done')

['results_grid/04810', 'results_grid/27623', 'results_grid/13167']
done
