In [5]:
import os
import pandas as pd
import numpy as np
import json
import uuid
import random
from time import time

In [6]:
# random task_id
cur_id = -1
def gen_task_id():
    global cur_id
    cur_id += 1
    return cur_id

# random start time in num milliseconds
def gen_start_time(mu):
    return int(time()*1e3 + random.gauss(mu, 400))

def gen_end_time(start, mu):
    return int(start + random.gauss(mu, 400))

def gen_music(mu):
    roll = random.gauss(mu, 0.5)
    if roll <= 0.5:
        return 0
    else:
        return 1
    
def gen_interruptions(mu):
    return np.abs(int(random.gauss(mu, 3)))

def gen_workspace_volume(mu):
    roll = random.gauss(mu, 0.5)
    if roll > 1:
        return 1
    if roll < 0:
        return 0
    return roll
    
def gen_meetings(mu):
    return np.abs(int(random.gauss(mu, 3)))

def gen_breaks(mu):
    return np.abs(int(random.gauss(mu, 5)))

def gen_progress(mu):
    val = np.abs(random.gauss(mu, 0.2))
    if val > 1:
        val = 1
    return val

def gen_row(time_mu, 
            music_mu, 
            interruptions_mu, 
            volume_mu, 
            meetings_mu, 
            breaks_mu, 
            progress_mu):
    start = gen_start_time(time_mu)
    return np.array([
        gen_task_id(),
        start,
        gen_end_time(start, time_mu),
        gen_music(music_mu),
        gen_interruptions(interruptions_mu),
        gen_workspace_volume(volume_mu),
        gen_meetings(meetings_mu),
        gen_breaks(breaks_mu),
        gen_progress(progress_mu),
    ])

def gen_quiet_row():
    return gen_row(time_mu=500,
                    music_mu=0.2,
                    interruptions_mu=3,
                    volume_mu=0.1,
                    meetings_mu=2,
                    breaks_mu=6,
                    progress_mu=0.7
                   )

def gen_loud_row():
    return gen_row(time_mu=500,
                    music_mu=0.7,
                    interruptions_mu=7,
                    volume_mu=0.7,
                    meetings_mu=7,
                    breaks_mu=1,
                    progress_mu=0.2
                   )

In [7]:
columns = ['task_id',
           'start_time',
           'end_time',
           'music',
           'num_interruptions',
           'workspace_volume', 
           'num_meetings',
           'num_breaks',
           'progress']

In [8]:
loud_dataset = np.array([gen_loud_row() for x in range(1000)])
loud_df = pd.DataFrame(data=loud_dataset, columns=columns)

quiet_dataset = np.array([gen_quiet_row() for x in range(1000)])
quiet_df = pd.DataFrame(data=quiet_dataset, columns=columns)

In [10]:
quiet_df.head()

Unnamed: 0,task_id,start_time,end_time,music,num_interruptions,workspace_volume,num_meetings,num_breaks,progress
0,1000.0,1541291000000.0,1541291000000.0,0.0,7.0,0.210556,1.0,8.0,0.696915
1,1001.0,1541291000000.0,1541291000000.0,0.0,0.0,0.0,5.0,1.0,0.581728
2,1002.0,1541291000000.0,1541291000000.0,0.0,6.0,0.0,0.0,3.0,0.814499
3,1003.0,1541291000000.0,1541291000000.0,0.0,2.0,0.0,4.0,6.0,0.630473
4,1004.0,1541291000000.0,1541291000000.0,0.0,6.0,0.172996,2.0,9.0,0.549313


In [18]:
DATA_DIR = "data"

In [19]:
loud_df.to_json(os.path.join(DATA_DIR, "loud.json"))
loud_df.to_csv(os.path.join(DATA_DIR, "loud.csv"), index=False)
quiet_df.to_json(os.path.join(DATA_DIR, "quiet.json"))
quiet_df.to_csv(os.path.join(DATA_DIR, "quiet.csv"), index=False)

In [12]:
# need to disable date conversion to keep using unix time
test_load_df = pd.read_json("quiet.json", convert_dates=False)

In [13]:
test_load_df.head()

Unnamed: 0,end_time,music,num_breaks,num_interruptions,num_meetings,progress,start_time,task_id,workspace_volume
0,1541290912929,0,8,7,1,0.696915,1541290912671,1000,0.210556
1,1541290912395,0,1,0,5,0.581728,1541290912476,1001,0.0
10,1541290912698,1,3,0,1,0.541119,1541290911785,1010,0.536674
100,1541290912090,0,5,1,0,0.769778,1541290911632,1100,0.485778
101,1541290912101,0,4,0,2,0.552534,1541290911623,1101,0.008379
