# Random graph experiments I: Color

## Graph and data generation module ##

In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1" # export OMP_NUM_THREADS=4
os.environ["OPENBLAS_NUM_THREADS"] = "1" # export OPENBLAS_NUM_THREADS=4 
os.environ["VECLIB_MAXIMUM_THREADS"] = "1" # export VECLIB_MAXIMUM_THREADS=4

In [2]:
import sys, time, random
import numpy as np
from model.MNLogit_bfgs_torch import MNLogit

In [5]:
filenames = sorted([name for name in os.listdir('features-fitness') if len(name.split('&')) > 3 and 'FULL' in name])

In [6]:
filenames

['FULL&nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=1.0e-01&checkpoint=1.00&time=2019-08-03T04:10:11&seed=QBQR1G.npz&extract_time=2019-08-12T23:53:34.npz',
 'FULL&nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=1.0e-02&checkpoint=1.00&time=2019-08-03T04:05:24&seed=12URDV0.npz&extract_time=2019-08-12T23:53:27.npz',
 'FULL&nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=1.0e-03&checkpoint=1.00&time=2019-08-03T04:10:15&seed=7UU4YX.npz&extract_time=2019-08-12T23:55:38.npz',
 'FULL&nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=1.0e-04&checkpoint=1.00&time=2019-08-03T04:05:24&seed=1HSTEW3.npz&extract_time=2019-08-12T22:21:46.npz',
 'FULL&nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=2.0e-01&checkpoint=1.00&time=2019-08-03T04:05:23&seed=1X20RE4.npz&extract_time=2019-08-12T23:53:22.npz',
 'FULL&nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=2.0e-02&checkpoint=1.00&time=2019-08-03T04:10:14&se

In [7]:
def fit(kwargs, num_threads=16):
  dataset_id = kwargs['id']
  sampling = kwargs['sampling']
  c = sampling[0]
  dat = np.load('features-fitness/' + filenames[dataset_id])
  results = {'results':[]}
  results['dataset'] = {s.split('=')[0]:s.split('=')[1] for s in filenames[dataset_id].replace('.npz','').split('&')[1:]}
  results['dataset']['sampling'] = sampling
  
  for i in range(1):
    m = MNLogit(num_threads)
    m.data(dat['Xs_'+c][i], dat['ys_'+c][i], sws=dat['sws_'+c][i])
    m.fit(max_num_iter=500, clip=1.0, clip_norm_ord=2)
    info = m.get_model_info()
    info['se'] = list(info['se'])
    info['weights'] = list(info['weights'])
    results['results'].append(info)
    
  print("Dataset {},{} done".format(dataset_id,sampling))
  return results

In [9]:
from multiprocessing import Pool
from itertools import product
args = [{'id':i, 'sampling':s} for i,s in product(range(12), ['uniform'])]
with Pool(4) as p:
  x = p.map(fit, args)

Dataset 2,uniform done
Dataset 0,uniform done
Dataset 1,uniform done
Dataset 3,uniform done
Dataset 4,uniform done
Dataset 5,uniform done
Dataset 6,uniform done
Dataset 7,uniform done
Dataset 8,uniform done
Dataset 10,uniform done
Dataset 9,uniform done
Dataset 11,uniform done


In [10]:
x

[{'dataset': {'checkpoint': '1.00',
   'choice_edges': '2.0e+04',
   'color_ratio': '1.0e-01',
   'er_edges': '5.0e+05',
   'extract_time': '2019-08-12T23:53:34',
   'nodes': '1.0e+05',
   'sampling': 'uniform',
   'seed': 'QBQR1G',
   'time': '2019-08-03T04:10:11'},
  'results': [{'avg_loss': 10.616915020542102,
    'num_iter': 25,
    'se': [0.01009304900790987, 0.005100069045224399],
    'weights': [1.963151015618758, 0.9978449893068309]}]},
 {'dataset': {'checkpoint': '1.00',
   'choice_edges': '2.0e+04',
   'color_ratio': '1.0e-02',
   'er_edges': '5.0e+05',
   'extract_time': '2019-08-12T23:53:27',
   'nodes': '1.0e+05',
   'sampling': 'uniform',
   'seed': '12URDV0',
   'time': '2019-08-03T04:05:24'},
  'results': [{'avg_loss': 10.93456919998882,
    'num_iter': 25,
    'se': [0.019622638153771688, 0.005000659005714443],
    'weights': [1.973420683002613, 0.9925549910632095]}]},
 {'dataset': {'checkpoint': '1.00',
   'choice_edges': '2.0e+04',
   'color_ratio': '1.0e-03',
   'er

In [11]:
import json
with open('fitted-synthetic-color-degree-full.json','w') as f:
  json.dump(x, f)

In [None]:
# fit({'id':66, 'sampling':'stratified'}, num_threads=64)

In [None]:
# import json
# with open('fitted-synthetic-color-degree.json','r') as f:
#   x = json.load(f)

In [None]:
from collections import Counter
xs = []
ys_s = []
ys_u = []
# np.random.shuffle(x)
for d in x:
  for i in range(20):
    if d['dataset']['sampling'] == 'importance':
      xs.append(float(d['dataset']['color_ratio']))
      ys_s.append(d['results'][i]['weights'][0])

In [None]:
import matplotlib.pyplot as plt
plt.plot(xs, ys_s, '.', markersize=1)
plt.plot([7e-5 ,0.7], [2,2], color='grey', linewidth=0.5)
plt.xscale('log')
plt.ylim(1,3)
plt.title('weight of color, 23pos:1neg, color+fitness, log(N+1/s+1)')

In [None]:
filename = 'nodes=1.0e+05&er_edges=5.0e+05&choice_edges=2.0e+04&color_ratio=1.0e-02&checkpoint=1.00&time=2019-08-02T14:58:30&seed=NGMCEM.npz&extract_time=2019-08-02T23:28:35'
dat = np.load("features/{}.npz".format(filename))

In [None]:
new_sws = dat['sws_s']

In [None]:
sampling = 'stratified'
c = sampling[0]
for i in range(15):
  m = MNLogit(num_threads=64)
  m.data(dat['Xs_'+c][i], dat['ys_'+c][i], sws=new_sws[i])
  m.fit(max_num_iter=500, clip=1.0, clip_norm_ord=2)
  print(m.get_model_info())