In [1]:
import pandas as pd
import numpy as np

In [2]:
# User inputs
start_date = '2017-10-01'
total_days = 200

In [3]:
df = pd.DataFrame()

# Generate date range for the time-series as string
df['date'] = pd.date_range(start_date, periods = total_days).strftime('%Y-%m-%d')
# Generate frequency of the data
df['count'] = np.random.randint(5, 50, total_days)

df.head()

Unnamed: 0,date,count
0,2017-10-01,24
1,2017-10-02,42
2,2017-10-03,33
3,2017-10-04,31
4,2017-10-05,42


In [4]:
# Expand the frequencies, generate # of records - date x count
df = pd.concat([pd.DataFrame(data = [row], index = range(row['count'])) for _, row in df.iterrows()],
               ignore_index = True)

In [5]:
import random
def generate_timestamp(column):
    return '{0:s} {1:02d}:{2:02d}:{3:02d}'.format(column, random.randint(0, 23), # Hours
                                                  random.randint(0, 59), # Minutes
                                                  random.randint(0, 59)) # Seconds
# Convert date string to timestamp string and sort
df['date'] = df['date'].apply(generate_timestamp)
df.sort_values(by = 'date', inplace = True)

In [6]:
# Looks good!
df.head()

Unnamed: 0,date,count
2,2017-10-01 00:34:49,24
5,2017-10-01 01:08:10,24
18,2017-10-01 01:10:59,24
14,2017-10-01 03:19:49,24
20,2017-10-01 04:15:22,24


In [7]:
# Set of moods to spread randomly. This can be repeated for more randomness.
mood = ['happy', 'neutral', 'sad']

In [8]:
# Randomly assign moods to each row
df['mood'] = np.random.choice(list(mood), len(df))
# Device Id stays same
df['device'] = '1'

df.head()

Unnamed: 0,date,count,mood,device
2,2017-10-01 00:34:49,24,neutral,1
5,2017-10-01 01:08:10,24,sad,1
18,2017-10-01 01:10:59,24,neutral,1
14,2017-10-01 03:19:49,24,neutral,1
20,2017-10-01 04:15:22,24,sad,1


In [9]:
# Clean up!
df.drop('count', axis = 1, inplace = True)

In [220]:
# Save as CSV
df.to_csv("smiley.csv")