## Callin Switzer
April 2019

Multiprocessing with multiple arguments

In [9]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import pandas as pd
import seaborn as sns
from scipy.integrate import odeint
import random
import time
from datetime import datetime
import sys
from multiprocessing import Pool, cpu_count
import simUtils_twoTorque # note that this is a custom-written file 
import importlib
import functools
import sqlite3
from collections import OrderedDict

print(sys.version)

3.6.7 (default, Feb 28 2019, 07:28:18) [MSC v.1900 64 bit (AMD64)]


In [10]:
now = datetime.now()
print("last run on " + str(now))

pythonMadeData = r"D:/Dropbox/AcademiaDropbox/mothMachineLearning_dataAndFigs/PythonGeneratedData_twoTorque_2"

if not os.path.exists(pythonMadeData):
    os.mkdir(pythonMadeData)

last run on 2019-11-01 09:38:34.508923


In [11]:
np.random.seed(12345)
_ = importlib.reload(simUtils_twoTorque)

In [13]:
# save global options

globalDict = OrderedDict({"bhead": 0.507,
            "ahead": 0.908,
            "bbutt": 0.1295,
            "abutt": 1.7475, 
            "rho": 1, 
            "rhoA": 0.00118, 
            "muA": 0.000186, 
            "L1": 0.908, 
            "L2": 1.7475,  
            "L3": 0.908,
            "K": 23000,
            "c":  14075.8,
            "g": 980.0,
            "betaR":  0.0,
            "nstep": 2,
            "nrun" : 1000000 # (max) number of  trajectories.
            })

# Calculated variables
globalDict['m1'] = globalDict['rho']*(4/3)*np.pi*(globalDict['bhead']**2)*globalDict['ahead']
globalDict["m2"] = globalDict["rho"]*(4/3)*np.pi*(globalDict["bbutt"]**2)*globalDict["abutt"]
globalDict["echead"] = globalDict["ahead"]/globalDict["bhead"]
globalDict['ecbutt'] = globalDict['abutt']/globalDict['bbutt']
globalDict['I1'] = (1/5)*globalDict['m1']*(globalDict['bhead']**2)*(1 + globalDict['echead']**2)
globalDict['I2'] = (1/5)*globalDict['m2']*(globalDict['bbutt']**2)*(1 + globalDict['ecbutt']**2)
globalDict['S_head'] = np.pi*globalDict['bhead']**2
globalDict['S_butt'] = np.pi*globalDict['bbutt'] **2
t = np.linspace(0, 0.02, num = globalDict["nstep"], endpoint = True)

# convert dict to list, since @jit works better with lists
globalList = [ v for v in globalDict.values() ]


# ranges for control variables
rangeDict = {"Fmin": 0,
             "Fmax": 44300,
             "alphaMin":  0,
             "alphaMax":2*np.pi, 
             "tau0Min": -1000000, # refref: this is 10x more than I set for the previous one
             "tau0Max": 1000000, 
             "tau_wMin": -100000,
             "tau_wMax": 100000}

# ranges for controls 
ranges = np.array([[rangeDict["Fmin"], rangeDict["Fmax"]], 
                   [rangeDict["alphaMin"], rangeDict["alphaMax"]], 
                   [rangeDict["tau0Min"], rangeDict["tau0Max"] ], 
                   [rangeDict["tau_wMin"], rangeDict["tau_wMax"] ]])

# ranges for initial conditions
IC_ranges = np.array([[0, 0], 
                      [-1500, 1500],  
                      [0, 0], 
                      [-1500, 1500],
                      [0, 2*np.pi], 
                      [-25, 25], 
                      [0, 2*np.pi], 
                      [-25, 25]])

In [14]:
# start loop here:
## refref: I ran this loop twice to generate training data and once for test data

dataType = "trainingData_"
for ii in np.arange(0,10):
    print(ii)

    # generate random ICs and controls
    # random F, alpha, tau, tau_w
    FAlphaTau_list = np.random.uniform(ranges[:, 0], ranges[:, 1], 
                                       size=(globalDict["nrun"], ranges.shape[0]))

    # random initial conditions for state 0
    state0_ICs = np.random.uniform(IC_ranges[:, 0], IC_ranges[:, 1], size=(globalDict["nrun"], IC_ranges.shape[0]))

    # run simulations in parallel, "nrun"s at a time
    p = Pool(cpu_count() - 1)
    stt = time.time()
    bb = p.map(functools.partial(simUtils_twoTorque.flyBug_listInput_TwoTorque, t=t, 
                                  state0_ICs = state0_ICs, 
                                  FAlphaTau_list= FAlphaTau_list, 
                                  globalList = globalList), range(globalDict["nrun"]))
    print("time for one run:", time.time() - stt)
    p.close()
    p.join()
    
    # reshape to put into a pd data frame
    bb2 = np.array(bb).reshape(globalDict["nrun"], -1, order = "F")
    bb3 = np.hstack([bb2, FAlphaTau_list])

    simDF = pd.DataFrame(bb3, columns =  ["x_0", "xd_0","y_0","yd_0",
                                         "theta_0","thetad_0","phi_0","phid_0", 
                                         "x_f", "xd_f","y_f","yd_f",
                                         "theta_f","thetad_f","phi_f","phid_f", 
                                              "F", "alpha", "tau0", "tau_w"])

    # write to database, 
    # makes a new database if it doesn't already exist
    con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))


    # get table names from database
    try:
        cursorObj = con1.cursor()
        cursorObj.execute('SELECT name from sqlite_master where type= "table"')
        tableNames = cursorObj.fetchall()
        cursorObj.close()
    except:
        print("can't get table names")

    # refref: name changed from "trainingData_" to "testingData_" when I generated new data
    simDF.to_sql(dataType + str(len(tableNames)).zfill(2), con1, if_exists = "fail", index = False)
    
    # close connection
    con1.close()


0
time for one run: 309.3429124355316
1
time for one run: 294.9301688671112
2
time for one run: 301.78392601013184
3
time for one run: 297.3791446685791
4
time for one run: 300.8784501552582
5
time for one run: 293.39779138565063
6
time for one run: 301.97912764549255
7
time for one run: 299.20177245140076
8
time for one run: 317.4520170688629
9
time for one run: 303.86609506607056


In [15]:
# start loop for test data

dataType = "testingData_"
for ii in np.arange(0,5):
    print(ii)

    # generate random ICs and controls
    # random F, alpha, tau, tau_w
    FAlphaTau_list = np.random.uniform(ranges[:, 0], ranges[:, 1], 
                                       size=(globalDict["nrun"], ranges.shape[0]))

    # random initial conditions for state 0
    state0_ICs = np.random.uniform(IC_ranges[:, 0], IC_ranges[:, 1], size=(globalDict["nrun"], IC_ranges.shape[0]))

    # run simulations in parallel, "nrun"s at a time
    p = Pool(cpu_count() - 1)
    stt = time.time()
    bb = p.map(functools.partial(simUtils_twoTorque.flyBug_listInput_TwoTorque, t=t, 
                                  state0_ICs = state0_ICs, 
                                  FAlphaTau_list= FAlphaTau_list, 
                                  globalList = globalList), range(globalDict["nrun"]))
    print("time for one run:", time.time() - stt)
    p.close()
    p.join()
    
    # reshape to put into a pd data frame
    bb2 = np.array(bb).reshape(globalDict["nrun"], -1, order = "F")
    bb3 = np.hstack([bb2, FAlphaTau_list])

    simDF = pd.DataFrame(bb3, columns =  ["x_0", "xd_0","y_0","yd_0",
                                         "theta_0","thetad_0","phi_0","phid_0", 
                                         "x_f", "xd_f","y_f","yd_f",
                                         "theta_f","thetad_f","phi_f","phid_f", 
                                              "F", "alpha", "tau0", "tau_w"])

    # write to database, 
    # makes a new database if it doesn't already exist
    con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))


    # get table names from database
    try:
        cursorObj = con1.cursor()
        cursorObj.execute('SELECT name from sqlite_master where type= "table"')
        tableNames = cursorObj.fetchall()
        cursorObj.close()
    except:
        print("can't get table names")

    simDF.to_sql(dataType + str(len(tableNames)).zfill(2), con1, if_exists = "fail", index = False)
    
    # close connection
    con1.close()

0
time for one run: 297.110356092453
1
time for one run: 323.5797622203827
2
time for one run: 298.08820819854736
3
time for one run: 298.9751365184784
4
time for one run: 329.3194799423218


In [16]:
# get table names in database
con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
cursorObj = con1.cursor()
res = cursorObj.execute("SELECT name FROM sqlite_master WHERE type='table';")
tableNames = [name[0] for name in res]
con1.close()
print(tableNames)

['trainingData_00', 'trainingData_01', 'trainingData_02', 'trainingData_03', 'trainingData_04', 'trainingData_05', 'trainingData_06', 'trainingData_07', 'trainingData_08', 'trainingData_09', 'testingData_10', 'testingData_11', 'testingData_12', 'testingData_13', 'testingData_14']


In [17]:
# Combine testing Data into a single Table
con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
con1.execute("DROP TABLE IF EXISTS test")
sqlStatement = "CREATE TABLE test AS " + " UNION ALL ".join(["SELECT * FROM " + tableNames[ii] for ii in range(len(tableNames)) if tableNames[ii].startswith("testingData_")])
print(sqlStatement)
con1.execute(sqlStatement)
con1.close()

CREATE TABLE test AS SELECT * FROM testingData_10 UNION ALL SELECT * FROM testingData_11 UNION ALL SELECT * FROM testingData_12 UNION ALL SELECT * FROM testingData_13 UNION ALL SELECT * FROM testingData_14


In [18]:
# Combine Training Data into a single Table
con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
con1.execute("DROP TABLE IF EXISTS train")
sqlStatement = "CREATE TABLE train AS " + " UNION ALL ".join(["SELECT * FROM " + tableNames[ii] for ii in range(len(tableNames)) if tableNames[ii].startswith("trainingData_")])
print(sqlStatement)
con1.execute(sqlStatement)
con1.close()

CREATE TABLE train AS SELECT * FROM trainingData_00 UNION ALL SELECT * FROM trainingData_01 UNION ALL SELECT * FROM trainingData_02 UNION ALL SELECT * FROM trainingData_03 UNION ALL SELECT * FROM trainingData_04 UNION ALL SELECT * FROM trainingData_05 UNION ALL SELECT * FROM trainingData_06 UNION ALL SELECT * FROM trainingData_07 UNION ALL SELECT * FROM trainingData_08 UNION ALL SELECT * FROM trainingData_09


In [19]:
# print print the max row number
def largestRowNumber(cursor, table_name, print_out=False):
    """ Returns the total number of rows in the database """
    cursor.execute("SELECT max(rowid) from  {}".format(table_name))
    n = cursor.fetchone()[0]
    if print_out:
        print('\nTotal rows: {}'.format(n))
    return(n)

con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
cursorObj = con1.cursor()
largestRowNumber(cursorObj, "train", print_out=True)
largestRowNumber(cursorObj, "test", print_out=True)
con1.close()


Total rows: 10000000

Total rows: 5000000


In [20]:
# drop intermediate, smaller training datasets
con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
sqlStatement = "".join(["DROP TABLE IF EXISTS " + tableNames[ii] + "; " for ii in range(len(tableNames)) if tableNames[ii].startswith("trainingData_")])
print(sqlStatement)
con1.executescript(sqlStatement)
con1.close()

DROP TABLE IF EXISTS trainingData_00; DROP TABLE IF EXISTS trainingData_01; DROP TABLE IF EXISTS trainingData_02; DROP TABLE IF EXISTS trainingData_03; DROP TABLE IF EXISTS trainingData_04; DROP TABLE IF EXISTS trainingData_05; DROP TABLE IF EXISTS trainingData_06; DROP TABLE IF EXISTS trainingData_07; DROP TABLE IF EXISTS trainingData_08; DROP TABLE IF EXISTS trainingData_09; 


In [21]:
# drop intermediate, smaller testing datasets
con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
sqlStatement = "".join(["DROP TABLE IF EXISTS " + tableNames[ii] + "; " for ii in range(len(tableNames)) if tableNames[ii].startswith("testingData_")])
print(sqlStatement)
con1.executescript(sqlStatement)
con1.close()

DROP TABLE IF EXISTS testingData_10; DROP TABLE IF EXISTS testingData_11; DROP TABLE IF EXISTS testingData_12; DROP TABLE IF EXISTS testingData_13; DROP TABLE IF EXISTS testingData_14; 


In [22]:
# get table names in database
con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
cursorObj = con1.cursor()
res = cursorObj.execute("SELECT name FROM sqlite_master WHERE type='table';")
tableNames = [name[0] for name in res]
con1.close()
print(tableNames)

['test', 'train']


In [None]:
# # check data

# con1 = sqlite3.connect(os.path.join(pythonMadeData, "twoTorqueData.db"))
# trainDF = pd.read_sql_query('''
#                             SELECT * FROM train
#                             LIMIT 5
#                             ''', 
#                             con1)

# con1.close()
# trainDF

In [23]:
pythonMadeData

'D:/Dropbox/AcademiaDropbox/mothMachineLearning_dataAndFigs/PythonGeneratedData_twoTorque_2'