In [1]:
import numpy as np
import random
import dadi
import pickle
import os, sys
sys.path.append(os.path.join(os.getcwd(), '..')) # this is the ml_dadi dir
import data_manip, ml_models
from data_manip import generating_data
from ml_models import rfr_train, mlpr_train

In [2]:
# generate a list of theta values to run scaling and add variance
theta_list = [1,10000,1000,100] # order of increase variance

In [3]:
# designate demographic model, sample size, and extrapolation grid 
func = dadi.Demographics2D.split_mig
ns = [20,20]
pts_l = [40, 50, 60]
logs = [True, True, False, False]

In [4]:
# Generate parameter list for training:
train_params = [(nu1, nu2, T, m) for nu1 in np.linspace(-2, 2, 5)
                            for nu2 in np.linspace(-2, 2, 5)
                            for T in np.linspace(0.1, 2, 10)
                            for m in np.linspace(1, 10, 10)]
# print training set info 
print('n_samples training: ', len(train_params))
print('Range of training params:', min(train_params), 'to', max(train_params))
print('Theta list:', theta_list)
# Make a list of training data dictionaries, one dictionary for each theta case
list_train_dict = generating_data(train_params, theta_list, func, ns, pts_l, logs)
pickle.dump(list_train_dict, open('data/new_func/train_data', 'wb'), 2)

n_samples training:  2500
Range of training params: (-2.0, -2.0, 0.1, 1.0) to (2.0, 2.0, 2.0, 10.0)
Theta list: [1, 10000, 1000, 100]


In [5]:
# Load datasets for training
list_train_dict = pickle.load(open('data/new_func/train_data','rb'))

In [6]:
# Train RFR and save the list of trained RFR into pickle file
list_rfr = [rfr_train(train_dict) for train_dict in list_train_dict]
pickle.dump(list_rfr, open('data/new_func/list_rfr', 'wb'), 2)

In [7]:
# Train MLPR and save the list of trained MLPR into pickle file
list_mlpr = [mlpr_train(train_dict, max_iter=1000) 
                for train_dict in list_train_dict]
pickle.dump(list_mlpr, open('data/new_func/list_mlpr', 'wb'), 2)

In [8]:
# Generate Test Datasets
test_params = []
for i in range(100):
    nu1 = random.random() * 4 - 2
    nu2 = random.random() * 4 - 2
    T = random.random() * 1.9 + 0.1
    m = random.random() * 9 + 1
    params = (nu1, nu2, T, m)
    test_params.append(params)
# print testing set info 
print('n_samples testing: ', len(test_params))
print('Range of testing params:', min(test_params), 'to', max(test_params))
print('Theta list:', theta_list)
# Make a list of test data dictionaries, one dictionary for each theta case
list_test_dict = generating_data(test_params, theta_list, func, ns, pts_l, logs)
# Save testing set as a pickle file
pickle.dump(list_test_dict, open('data/new_func/test_data', 'wb'), 2)

n_samples testing:  200
Range of testing params: (-1.9975195534627979, 0.2677594022856864, 1.9745935510695751, 5.71439269421244) to (1.996530658159866, 0.848968117511478, 1.5548772787189087, 7.899324008073518)
Theta list: [1, 10000, 1000, 100]
