## Input

In [1]:
num_testdata=100
table_main='main' # name of maintable
table_keywords = 'keywords'
db = 'test.db'   # test.db

## Init

In [2]:
import pandas as pd

In [3]:
header = ['id','simid','mediawiki','path','date','moddate','type','tags']
header_keywords = ['main_id','key','val']

### random time

In [4]:
from random import randrange
from datetime import timedelta, datetime
import time

def random_date(start, end):
    """
    This function will return a random datetime between two datetime 
    objects.
    """
    delta = end - start
    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
    random_second = randrange(int_delta)
    return start + timedelta(seconds=random_second)

d1 = datetime.strptime('1/1/2016 1:30 PM', '%m/%d/%Y %I:%M %p')
d2 = datetime.strptime('1/1/2018 4:50 AM', '%m/%d/%Y %I:%M %p')


### random tags

In [5]:
def random_tags():
    "Returns a random tags"
    possible_tags=['important','pic','caco3','ubi','abc','rna','dna','surfactants','MetaDynamics']
    num_tags=randrange(0,5)  # number of tags
    tags=[]
    for _ in range(num_tags):
        i = randrange(0,len(possible_tags)-1)
        tag = possible_tags[i]
        tags.append(tag)
    tags=",".join(set(tags))
    return tags

### random type

In [6]:
def random_type():
    """returns a random type"""
    atuple=[
        '',
        'LAMMPS',
        'GROMACS',
        'PYTHON',
        'OVERVIEW',
    ]
    i = randrange(0,len(atuple)-1)
    return atuple[i]

## create data main

In [7]:
data=[]
for i in range(num_testdata):
    # setup data
    main={}
    main['simid']="MK{:04d}".format(i+1)
    main['mediawiki']="MK{:04d}".format(i+1)
    
    d=random_date(d1, d2)
    unixtime = time.mktime(d.timetuple())
    main['date']=d.strftime('%Y/%m/%d')
    main['moddate']=unixtime
    
    main['tags']=random_tags()
    main['type']=random_type()
    # create entry
    entry=[]
    for head in header:
        if head in main.keys():
            entry+=[main[head]]
        else:
            entry+=['']
    data.append(entry[1:])  # add entry to data
    

In [8]:
df_main=pd.DataFrame(data,columns=header[1:])
df_main

Unnamed: 0,simid,mediawiki,path,date,moddate,type,tags
0,MK0001,MK0001,,2017/04/05,1.491417e+09,,"abc,dna"
1,MK0002,MK0002,,2017/10/24,1.508822e+09,LAMMPS,
2,MK0003,MK0003,,2016/12/06,1.481000e+09,PYTHON,
3,MK0004,MK0004,,2017/11/04,1.509770e+09,,"important,caco3,abc"
4,MK0005,MK0005,,2016/01/28,1.453998e+09,,
5,MK0006,MK0006,,2016/10/29,1.477730e+09,PYTHON,
6,MK0007,MK0007,,2016/05/15,1.463304e+09,,
7,MK0008,MK0008,,2017/08/24,1.503584e+09,,
8,MK0009,MK0009,,2016/01/26,1.453838e+09,,"surfactants,important"
9,MK0010,MK0010,,2016/06/24,1.466733e+09,LAMMPS,"pic,ubi"


## create data keywords

In [9]:
def random_keywords():
    """returns a dict of random keywords"""
    atuple=[
        ['linker','a'],
        ['linker','b'],
        ['linker','c'],
        ['linker','d'],
        ['pH','7'],
        ['pH','8'],
        ['pH','9'],
        ['Temp','300'],
        ['Temp','500'],
        ['concentration','1'],
        ['concentration','2'],
    ]
    num_keys=randrange(0,5)  # number of keys
    keywords={}
    for _ in range(num_keys):
        i = randrange(0,len(atuple)-1)
        t = atuple[i]
        keywords[t[0]]=t[1]
    return keywords

In [10]:
data=[]
for i in df_main.index:
    keywords=random_keywords()
    for k,v in keywords.iteritems():
        data.append([i,k,v])

In [11]:
df_keywords=pd.DataFrame(data,columns=header_keywords)
df_keywords

Unnamed: 0,main_id,key,val
0,0,Temp,300
1,1,pH,7
2,1,Temp,500
3,3,linker,c
4,4,pH,7
5,4,linker,a
6,5,concentration,1
7,5,Temp,500
8,6,pH,8
9,6,concentration,1


## Save table

In [12]:
from sqlalchemy import create_engine
import os

In [13]:
if os.path.exists(db): # check if old
    os.remove(db) # remove

### save main

In [14]:
table = table_main  # set table name
df    = df_main     # set dataframe
kwargs={}
defaults={'if_exists' : 'replace'}
kwargs=dict(defaults,**kwargs)

In [15]:
# create the engine    
engine = create_engine('sqlite:///{}'.format(db))
# save dataframe        
df.to_sql(table,engine,**kwargs)

### save keywords

In [16]:
table = table_keywords  # set table name
df    = df_keywords     # set dataframe
kwargs={}
defaults={'if_exists' : 'replace'}
kwargs=dict(defaults,**kwargs)

In [17]:
# create the engine    
engine = create_engine('sqlite:///{}'.format(db))
# save dataframe        
df.to_sql(table,engine,**kwargs)