## Callin Switzer
### 16 Jan 2020
### Simulate data for training neural network 
### This uses the "one torque" or  the "underactuated" model

In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import os
import pandas as pd
import seaborn as sns
from scipy.integrate import odeint
import random
import time
from datetime import datetime
import sys
from multiprocessing import Pool, cpu_count
import simUtils_one_torque # note that this is a custom-written file 
import importlib
import functools
import sqlite3
from collections import OrderedDict

print(sys.version)

3.6.5 |Anaconda, Inc.| (default, Mar 29 2018, 13:32:41) [MSC v.1900 64 bit (AMD64)]


In [2]:
now = datetime.now()
print("last run on " + str(now))

pythonMadeData = r"D:/Dropbox/AcademiaDropbox/mothMachineLearning_dataAndFigs/PythonGeneratedData_oneTorque"
databaseFileName = "onetorqueDataV3.db"


if not os.path.exists(pythonMadeData):
    os.mkdir(pythonMadeData)

last run on 2020-01-29 14:35:01.205262


In [3]:
np.random.seed(12345)
_ = importlib.reload(simUtils_one_torque)

In [4]:
# save global options

globalDict = OrderedDict({
            "bhead": 0.5,
            "ahead": 0.9,
            "bbutt": 0.75,
            "abutt": 1.9, 
            "rho_head": 0.9,
            "rho_butt": 0.4,
            "rhoA": 0.00118, 
            "muA": 0.000186, 
            "L1": 0.9, 
            "L2": 1.9,  
            "L3": 0.75,
            "K": 23000,
            "c":  14075.8,
            "g": 980.0,
            "betaR":  0.0,
            "nstep": 2, # return start and endpoints
            "nrun" : 1000000 # (max) number of  trajectories.
            })

# Calculated variables
globalDict['m1'] = globalDict['rho_head']*(4/3)*np.pi*(globalDict['bhead']**2)*globalDict['ahead']
globalDict["m2"] = globalDict["rho_butt"]*(4/3)*np.pi*(globalDict["bbutt"]**2)*globalDict["abutt"]
globalDict["echead"] = globalDict["ahead"]/globalDict["bhead"]
globalDict['ecbutt'] = globalDict['abutt']/globalDict['bbutt']
globalDict['I1'] = (1/5)*globalDict['m1']*(globalDict['bhead']**2)*(1 + globalDict['echead']**2)
globalDict['I2'] = (1/5)*globalDict['m2']*(globalDict['bbutt']**2)*(1 + globalDict['ecbutt']**2)
globalDict['S_head'] = np.pi*globalDict['bhead']**2
globalDict['S_butt'] = np.pi*globalDict['bbutt'] **2
t = np.linspace(0, 0.02, num = globalDict["nstep"], endpoint = True)

# convert dict to list, since @jit works better with lists
globalList = [ v for v in globalDict.values() ]


# ranges for control variables
rangeDict = {"Fmin": 0,
             "Fmax": 44300,
             "alphaMin":  0,
             "alphaMax":2*np.pi, 
             "tau0Min": -100000, 
             "tau0Max": 100000}

# ranges for controls 
ranges = np.array([[rangeDict["Fmin"], rangeDict["Fmax"]], 
                   [rangeDict["alphaMin"], rangeDict["alphaMax"]], 
                   [rangeDict["tau0Min"], rangeDict["tau0Max"] ]])

# ranges for initial conditions
IC_ranges = np.array([[0, 0],        #x
                      [-1500, 1500], #xdot  
                      [0, 0],        #y
                      [-1500, 1500], #ydot
                      [0, 2*np.pi],  #theta
                      [-25, 25],     #theta dot
                      [0, 2*np.pi],  #phi
                      [-25, 25]])    # phi dot

In [14]:
def generateData(dataType, nrun):
    '''
    Generate training data
    
    Params:
        dataType (str): a label for the data - "trainingData_" or 
                        "testingData"
        nrun (int): number of runs of the for-loop.
    '''
    
    for ii in np.arange(0,nrun): 
        print(ii)

        # generate random ICs and controls
        # random F, alpha, tau
        FAlphaTau_list = np.random.uniform(ranges[:, 0], ranges[:, 1], 
                                           size=(globalDict["nrun"], ranges.shape[0]))

        # random initial conditions for state 0
        state0_ICs = np.random.uniform(IC_ranges[:, 0], IC_ranges[:, 1], size=(globalDict["nrun"], IC_ranges.shape[0]))

        # run simulations in parallel, "nrun"s at a time
        p = Pool(cpu_count() - 2)
        stt = time.time()
        bb = p.map(functools.partial(simUtils_one_torque.flyBug_listInput_oneTorque, t=t, 
                                      state0_ICs = state0_ICs, 
                                      FAlphaTau_list= FAlphaTau_list, 
                                      globalList = globalList), range(globalDict["nrun"]))
        print("time for one run:", time.time() - stt)
        p.close()
        p.join()

        # reshape to put into a pd data frame
        bb2 = np.array(bb).reshape(globalDict["nrun"], -1, order = "F")
        bb3 = np.hstack([bb2, FAlphaTau_list])

        simDF = pd.DataFrame(bb3, columns =  ["x_0", "xd_0","y_0","yd_0",
                                             "theta_0","thetad_0","phi_0","phid_0", 
                                             "x_f", "xd_f","y_f","yd_f",
                                             "theta_f","thetad_f","phi_f","phid_f", 
                                                  "F", "alpha", "tau0"])

        # write to database, 
        # makes a new database if it doesn't already exist
        con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))

        # get table names from database
        try:
            cursorObj = con1.cursor()
            cursorObj.execute('SELECT name from sqlite_master where type= "table"')
            tableNames = cursorObj.fetchall()
            cursorObj.close()
        except:
            print("can't get table names")

        simDF.to_sql(dataType + str(len(tableNames)).zfill(2), con1, if_exists = "fail", index = False)

        # close connection
        con1.close()

In [15]:
# generate training data
dataType = "trainingData_"
generateData(dataType, 1)

0
time for one run: 132.79464149475098


In [17]:
dataType = "testingData_"
generateData(dataType, 1)

0
time for one run: 131.2130799293518


In [16]:
# get table names in database
con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
cursorObj = con1.cursor()
res = cursorObj.execute("SELECT name FROM sqlite_master WHERE type='table';")
tableNames = [name[0] for name in res]
con1.close()
print(tableNames)

['test', 'train', 'trainingData_02']


In [8]:
 # Combine testing Data into a single Table
con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
con1.execute("DROP TABLE IF EXISTS test")
sqlStatement = "CREATE TABLE test AS " + " UNION ALL ".join(["SELECT * FROM " + tableNames[ii] for ii in range(len(tableNames)) if tableNames[ii].startswith("testingData_")])
print(sqlStatement)
con1.execute(sqlStatement)
con1.close()

CREATE TABLE test AS SELECT * FROM testingData_12


In [9]:
# Combine Training Data into a single Table
con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
con1.execute("DROP TABLE IF EXISTS train")
sqlStatement = "CREATE TABLE train AS " + " UNION ALL ".join(["SELECT * FROM " + tableNames[ii] for ii in range(len(tableNames)) if tableNames[ii].startswith("trainingData_")])
print(sqlStatement)
con1.execute(sqlStatement)
con1.close()

CREATE TABLE train AS SELECT * FROM trainingData_00 UNION ALL SELECT * FROM trainingData_01 UNION ALL SELECT * FROM trainingData_02 UNION ALL SELECT * FROM trainingData_03 UNION ALL SELECT * FROM trainingData_04 UNION ALL SELECT * FROM trainingData_05 UNION ALL SELECT * FROM trainingData_06 UNION ALL SELECT * FROM trainingData_07 UNION ALL SELECT * FROM trainingData_08 UNION ALL SELECT * FROM trainingData_09 UNION ALL SELECT * FROM trainingData_10 UNION ALL SELECT * FROM trainingData_11


In [10]:
# print print the max row number
def largestRowNumber(cursor, table_name, print_out=False):
    """ Returns the total number of rows in the database """
    cursor.execute("SELECT max(rowid) from  {}".format(table_name))
    n = cursor.fetchone()[0]
    if print_out:
        print('\nTotal rows: {}'.format(n))
    return(n)

con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
cursorObj = con1.cursor()
largestRowNumber(cursorObj, "train", print_out=True)
largestRowNumber(cursorObj, "test", print_out=True)
con1.close()


Total rows: 2000100

Total rows: 1000000


In [11]:
# drop intermediate, smaller training datasets
con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
sqlStatement = "".join(["DROP TABLE IF EXISTS " + tableNames[ii] + "; " for ii in range(len(tableNames)) if tableNames[ii].startswith("trainingData_")])
print(sqlStatement)
con1.executescript(sqlStatement)
con1.close()

DROP TABLE IF EXISTS trainingData_00; DROP TABLE IF EXISTS trainingData_01; DROP TABLE IF EXISTS trainingData_02; DROP TABLE IF EXISTS trainingData_03; DROP TABLE IF EXISTS trainingData_04; DROP TABLE IF EXISTS trainingData_05; DROP TABLE IF EXISTS trainingData_06; DROP TABLE IF EXISTS trainingData_07; DROP TABLE IF EXISTS trainingData_08; DROP TABLE IF EXISTS trainingData_09; DROP TABLE IF EXISTS trainingData_10; DROP TABLE IF EXISTS trainingData_11; 


In [12]:
# drop intermediate, smaller testing datasets
con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
sqlStatement = "".join(["DROP TABLE IF EXISTS " + tableNames[ii] + "; " for ii in range(len(tableNames)) if tableNames[ii].startswith("testingData_")])
print(sqlStatement)
con1.executescript(sqlStatement)
con1.close()

DROP TABLE IF EXISTS testingData_12; 


In [13]:
# get table names in database
con1 = sqlite3.connect(os.path.join(pythonMadeData, databaseFileName))
cursorObj = con1.cursor()
res = cursorObj.execute("SELECT name FROM sqlite_master WHERE type='table';")
tableNames = [name[0] for name in res]
con1.close()
print(tableNames)

['test', 'train']
