In [24]:
import sys
import numpy as np
import datetime
import dill

from constant import *
from environment import *
from behavior import *
from utils import utils
from utils.chronometer import Chronometer

sys.path.append('../Nurture/server/notification/')
from nurture.learning.agents import *
from nurture.learning.state import State

In [37]:
# environment setup
rewardCriteria = {
        ANSWER_NOTIFICATION_ACCEPT: 1,
        ANSWER_NOTIFICATION_IGNORE: 0,
        ANSWER_NOTIFICATION_DISMISS: -5,
}
environment = MTurkSurveyUser(
    filePaths=[
            'survey/ver2_mturk/results/01_1st_Batch_3137574_batch_results.csv',
            'survey/ver2_mturk/results/02_Batch_3148398_batch_results.csv',
            'survey/ver2_mturk/results/03_Batch_3149214_batch_results.csv',
    ],
    filterFunc=(lambda r: ord(r['rawWorkerID'][-1]) % 3 == 2),
)

behavior = ExtraSensoryBehavior([
    'behavior/data/2.txt',
    'behavior/data/4.txt',
    'behavior/data/5.txt',
    'behavior/data/6.txt',
])
simulationLengthDay = 140
stepWidthMinutes = 1

behavior/data/2.txt
behavior/data/4.txt
behavior/data/5.txt
behavior/data/6.txt


No record for day=0, location=0, activity=2, notification=0
No record for day=0, location=0, activity=2, notification=1
No record for day=0, location=0, activity=3, notification=0
No record for day=0, location=0, activity=3, notification=1
No record for day=0, location=0, activity=4, notification=0
No record for day=0, location=0, activity=4, notification=1
No record for day=0, location=1, activity=2, notification=0
No record for day=0, location=1, activity=2, notification=1
No record for day=0, location=1, activity=3, notification=0
No record for day=0, location=1, activity=3, notification=1
No record for day=0, location=1, activity=4, notification=0
No record for day=0, location=1, activity=4, notification=1
No record for day=1, location=0, activity=2, notification=0
No record for day=1, location=0, activity=2, notification=1
No record for day=1, location=0, activity=3, notification=0
No record for day=1, location=0, activity=3, notification=1
No record for day=1, location=0, activit

In [26]:
behavior.printSummary()

# of total records: 36270
# of `nan` in location: 8512
# of `nan` in Activity: 18430
Location:
    home: 19375
    work: 8155
  others: 8740
Activity:
  stationary: 32565
     walking: 1885
     running: 118
     driving: 1204
   commuting: 498


In [38]:
# simulation setup
chronometer = Chronometer(skipFunc=(lambda hour, _m, _d: hour < 10 or hour >= 22))

lastNotificationMinute = 0
lastNotificationHour = 0
lastNotificationNumDays = 0

# statistics
simulationResults = []
totalReward = 0.
numSteps = 0

In [28]:
# helper functions
def _get_time_of_day(currentHour, currentMinute):
    return currentHour / 24. + currentMinute / 24. / 60.

def _get_day_of_week(currentDay, currentHour, currentMinute):
    return currentDay / 7. + currentHour / 7. / 24. + currentMinute / 7. / 24. / 60.

def _get_motion(original_activity):
    mapping = {
        utils.STATE_ACTIVITY_STATIONARY: State.MOTION_STATIONARY,
        utils.STATE_ACTIVITY_WALKING: State.MOTION_WALKING,
        utils.STATE_ACTIVITY_RUNNING: State.MOTION_RUNNING,
        utils.STATE_ACTIVITY_DRIVING: State.MOTION_DRIVING,
        utils.STATE_ACTIVITY_COMMUTE: State.MOTION_DRIVING,
    }
    return mapping[original_activity]

def _printResults(results):
    notificationEvents = [r for r in results if r['decision']]
    numNotifications = len(notificationEvents)
    numAcceptedNotis = len([r for r in notificationEvents if r['reward'] > 0])
    numDismissedNotis = len([r for r in notificationEvents if r['reward'] < 0])

    answerRate = numAcceptedNotis / numNotifications if numNotifications > 0 else 0.
    dismissRate = numDismissedNotis / numNotifications if numNotifications > 0 else 0.
    numActionedNotis = numAcceptedNotis + numDismissedNotis
    responseRate = numAcceptedNotis / numActionedNotis if numActionedNotis > 0 else 0.

    totalReward = sum([r['reward'] for r in results])

    expectedNumDeliveredNotifications = sum([r['probOfAnswering'] for r in results])
    deltaDays = results[-1]['context']['numDaysPassed'] - results[0]['context']['numDaysPassed'] + 1

    print("  reward=%f / step=%d (%f)" % (totalReward, len(results), totalReward / len(results)))
    print("  %d notifications have been sent (%.1f / day):" % (numNotifications, numNotifications / deltaDays))
    print("    - %d are answered (%.2f%%)"  % (numAcceptedNotis, answerRate * 100.))
    print("    - %d are dismissed (%.2f%%)"  % (numDismissedNotis, dismissRate * 100.))
    print("    - response rate: %.2f%%"  % (responseRate * 100.))
    print("  Expectation of total delivered notifications is %.2f" % expectedNumDeliveredNotifications)

def _filterByWeek(results, week):
    startDay = week * 7
    endDay = startDay + 7
    return [r for r in results
            if startDay <= r['context']['numDaysPassed'] < endDay]

In [49]:
utils.STATE_ACTIVITY_COMMUTE

4

In [None]:
numDaysPassed, currentHour, currentMinute, currentDay = chronometer.forward(stepWidthMinutes)
toBePrintedWeek = 0
agent = TensorForceDQNAgent()
agent.agent = DQNAgent(
        states=dict(type='float', shape=(15,)),
        actions=dict(type='int', num_actions=2),
        network=[
            dict(type='dense', size=20),
            dict(type='dense', size=20)
        ],
        batched_observe=False,
        actions_exploration={
            'type': 'epsilon_decay',
            'initial_epsilon': 0.3,
            'final_epsilon': 0.05,
            'timesteps': 80000,
        },
)
#agent = QLearningAgent()

while numDaysPassed < simulationLengthDay:
    # get environment info (user context)
    lastNotificationTime = utils.getDeltaMinutes(
            numDaysPassed, currentHour, currentMinute,
            lastNotificationNumDays, lastNotificationHour, lastNotificationMinute,
    )
    #stateLastNotification = utils.getLastNotificationState(lastNotificationTime)
    stateLocation, stateActivity = behavior.getLocationActivity(
            currentHour, currentMinute, currentDay)
    probAnsweringNotification, probIgnoringNotification, probDismissingNotification = (
            environment.getResponseDistribution(
                currentHour, currentMinute, currentDay,
                stateLocation, stateActivity, lastNotificationTime,
            )
    )
    probAnsweringNotification, probIgnoringNotification, probDismissingNotification = utils.normalize(
            probAnsweringNotification, probIgnoringNotification, probDismissingNotification)

    # prepare observations
    state = State(
        timeOfDay=_get_time_of_day(currentHour, currentMinute),
        dayOfWeek=_get_day_of_week(currentDay, currentHour, currentMinute),
        motion=_get_motion(stateActivity),
        location=stateLocation,
        notificationTimeElapsed=lastNotificationTime,
        ringerMode=np.random.choice(a=State.allRingerModeValues()),
        screenStatus=np.random.choice(a=State.allScreenStatusValues()),
    )
    
    # small hack - some agent keeps track of time
    try:
        agent.last_notification_time -= datetime.timedelta(minutes=stepWidthMinutes)
    except:
        pass
    
    # get action
    sendNotification = agent.get_action(state)

    # calculate reward
    if not sendNotification:
        reward = 0
    else:
        userReaction = np.random.choice(
                a=[ANSWER_NOTIFICATION_ACCEPT, ANSWER_NOTIFICATION_IGNORE, ANSWER_NOTIFICATION_DISMISS],
                p=[probAnsweringNotification, probIgnoringNotification, probDismissingNotification],
        )
        reward = rewardCriteria[userReaction]
        lastNotificationNumDays = numDaysPassed
        lastNotificationHour = currentHour
        lastNotificationMinute = currentMinute
    agent.feed_reward(reward)

    # log this session
    simulationResults.append({
            'context': {
                'numDaysPassed': numDaysPassed,
                'hour': currentHour,
                'minute': currentMinute,
                'day': currentDay,
                'location': stateLocation,
                'activity': stateActivity,
                'lastNotification': lastNotificationTime,
            },
            'probOfAnswering': probAnsweringNotification,
            'probOfIgnoring': probIgnoringNotification,
            'probOfDismissing': probDismissingNotification,
            'decision': sendNotification,
            'reward': reward,
    })

    # get the next decision time point
    numDaysPassed, currentHour, currentMinute, currentDay = chronometer.forward(stepWidthMinutes)
    
    # print current state
    currentWeek = numDaysPassed // 7
    if currentWeek > toBePrintedWeek:
        print()
        print("===== end of week %d ====" % toBePrintedWeek)
        _printResults(_filterByWeek(simulationResults, toBePrintedWeek))
        toBePrintedWeek = currentWeek

In [40]:
numTotalWeeks = simulationLengthDay // 7
for i in range(numTotalWeeks):
    print()
    print("===== end of week %d ====" % i)
    _printResults(_filterByWeek(simulationResults, i))


===== end of week 0 ====
  reward=-370.000000 / step=5040 (-0.073413)
  463 notifications have been sent (66.1 / day):
    - 195 are answered (42.12%)
    - 113 are dismissed (24.41%)
    - response rate: 63.31%
  Expectation of total delivered notifications is 1949.20

===== end of week 1 ====
  reward=-257.000000 / step=5040 (-0.050992)
  397 notifications have been sent (56.7 / day):
    - 178 are answered (44.84%)
    - 87 are dismissed (21.91%)
    - response rate: 67.17%
  Expectation of total delivered notifications is 1952.30

===== end of week 2 ====
  reward=-208.000000 / step=5040 (-0.041270)
  277 notifications have been sent (39.6 / day):
    - 112 are answered (40.43%)
    - 64 are dismissed (23.10%)
    - response rate: 63.64%
  Expectation of total delivered notifications is 2042.30

===== end of week 3 ====
  reward=-250.000000 / step=5040 (-0.049603)
  278 notifications have been sent (39.7 / day):
    - 110 are answered (39.57%)
    - 72 are dismissed (25.90%)
    -

In [None]:
for r in simulationResults:
    print(r['reward'])

In [41]:
agent.agent.save_model('/tmp/jp_test4/')
#print(agent.agent)

'/tmp/jp_test4/-100800'

In [44]:
agent.last_send_notification = datetime.datetime.now()
agent.num_steps = 0
agent.on_pickle_save()
dill.dump(agent, open('../Nurture/server/notification/local_data/models/initial/tf-dqn-tuned.p', 'wb'))

In [9]:
agent2 = TensorForceDQNAgent()
from tensorforce.agents import DQNAgent
agent2.agent = DQNAgent(
        states=dict(type='float', shape=(15,)),
        actions=dict(type='int', num_actions=2),
        network=[
            dict(type='dense', size=20),
            dict(type='dense', size=20)
        ],
        batched_observe=False,
        actions_exploration={
            'type': 'epsilon_decay',
            'initial_epsilon': 0.1,
            'final_epsilon': 0.015,
            'timesteps': 3000,
        },
)
agent2.agent.restore_model('/tmp/jp_test2/')
agent2.on_pickle_save()
dill.dump(agent2, open('../Nurture/server/notification/local_data/models/initial/tf-dqn-tuned.p', 'wb'))

[tf-dqn] generate_initial_model()
[tf-dqn] _get_native_agent()
[tf-dqn] done importing
[tf-dqn] figured out number of dimensions 15
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[tf-dqn] spawn the agent
[tf-dqn] generate_initial_model() done
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Restoring parameters from /tmp/jp_test2/-100800
[tf-dqn] on_pickle_save()
[tf-dqn] on_pickle_save() done


In [None]:
numDaysPassed, currentHour, currentMinute, currentDay = chronometer.forward(stepWidthMinutes)
toBePrintedWeek = 0

agent2 = TensorForceDQNAgent()
agent2.agent = DQNAgent(
        states=dict(type='float', shape=(15,)),
        actions=dict(type='int', num_actions=2),
        network=[
            dict(type='dense', size=20),
            dict(type='dense', size=20)
        ],
        batched_observe=False,
        actions_exploration={
            'type': 'epsilon_decay',
            'initial_epsilon': 0.1,
            'final_epsilon': 0.015,
            'timesteps': 3000,
        },
)
agent2.agent.restore_model('/tmp/jp_test/')
#agent2.agent.model.network = agent.agent.model.network
#agent2.agent.model.target_network = agent.agent.model.target_network
#agent = QLearningAgent()
#agent2 = agent

while numDaysPassed < 21:
    # get environment info (user context)
    lastNotificationTime = utils.getDeltaMinutes(
            numDaysPassed, currentHour, currentMinute,
            lastNotificationNumDays, lastNotificationHour, lastNotificationMinute,
    )
    #stateLastNotification = utils.getLastNotificationState(lastNotificationTime)
    stateLocation, stateActivity = behavior.getLocationActivity(
            currentHour, currentMinute, currentDay)
    probAnsweringNotification, probIgnoringNotification, probDismissingNotification = (
            environment.getResponseDistribution(
                currentHour, currentMinute, currentDay,
                stateLocation, stateActivity, lastNotificationTime,
            )
    )
    probAnsweringNotification, probIgnoringNotification, probDismissingNotification = utils.normalize(
            probAnsweringNotification, probIgnoringNotification, probDismissingNotification)

    # prepare observations
    state = State(
        timeOfDay=_get_time_of_day(currentHour, currentMinute),
        dayOfWeek=_get_day_of_week(currentDay, currentHour, currentMinute),
        motion=_get_motion(stateActivity),
        location=stateLocation,
        notificationTimeElapsed=lastNotificationTime,
        ringerMode=np.random.choice(a=State.allRingerModeValues()),
        screenStatus=np.random.choice(a=State.allScreenStatusValues()),
    )
    
    # small hack - some agent keeps track of time
    try:
        agent2.last_notification_time -= datetime.timedelta(minutes=stepWidthMinutes)
    except:
        pass
    
    # get action
    sendNotification = agent2.get_action(state)

    # calculate reward
    if not sendNotification:
        reward = 0
    else:
        userReaction = np.random.choice(
                a=[ANSWER_NOTIFICATION_ACCEPT, ANSWER_NOTIFICATION_IGNORE, ANSWER_NOTIFICATION_DISMISS],
                p=[probAnsweringNotification, probIgnoringNotification, probDismissingNotification],
        )
        reward = rewardCriteria[userReaction]
        lastNotificationNumDays = numDaysPassed
        lastNotificationHour = currentHour
        lastNotificationMinute = currentMinute
    agent2.feed_reward(reward)

    # log this session
    simulationResults.append({
            'context': {
                'numDaysPassed': numDaysPassed,
                'hour': currentHour,
                'minute': currentMinute,
                'day': currentDay,
                'location': stateLocation,
                'activity': stateActivity,
                'lastNotification': lastNotificationTime,
            },
            'probOfAnswering': probAnsweringNotification,
            'probOfIgnoring': probIgnoringNotification,
            'probOfDismissing': probDismissingNotification,
            'decision': sendNotification,
            'reward': reward,
    })

    # get the next decision time point
    numDaysPassed, currentHour, currentMinute, currentDay = chronometer.forward(stepWidthMinutes)
    
    # print current state
    currentWeek = numDaysPassed // 7
    if currentWeek > toBePrintedWeek:
        print()
        print("===== end of week %d ====" % toBePrintedWeek)
        _printResults(_filterByWeek(simulationResults, toBePrintedWeek))
        toBePrintedWeek = currentWeek

In [109]:
numTotalWeeks = simulationLengthDay // 7
for i in range(3):
    print()
    print("===== end of week %d ====" % i)
    _printResults(_filterByWeek(simulationResults, i))


===== end of week 0 ====
  reward=-815.000000 / step=5040 (-0.161706)
  838 notifications have been sent (119.7 / day):
    - 300 are answered (35.80%)
    - 223 are dismissed (26.61%)
    - response rate: 57.36%
  Expectation of total delivered notifications is 1794.70

===== end of week 1 ====
  reward=-822.000000 / step=5040 (-0.163095)
  840 notifications have been sent (120.0 / day):
    - 298 are answered (35.48%)
    - 224 are dismissed (26.67%)
    - response rate: 57.09%
  Expectation of total delivered notifications is 1766.70

===== end of week 2 ====
  reward=-739.000000 / step=5040 (-0.146627)
  853 notifications have been sent (121.9 / day):
    - 326 are answered (38.22%)
    - 213 are dismissed (24.97%)
    - response rate: 60.48%
  Expectation of total delivered notifications is 1827.70


In [87]:
d.model.get_components()

{'distribution': <tensorforce.core.distributions.categorical.Categorical at 0x7fc611c07400>,
 'distribution_action': <tensorforce.core.distributions.categorical.Categorical at 0x7fc611c07400>,
 'network': <tensorforce.core.networks.network.LayeredNetwork at 0x7fc69c6e87f0>,
 'target_distribution': <tensorforce.core.distributions.categorical.Categorical at 0x7fc611c07fd0>,
 'target_distribution_action': <tensorforce.core.distributions.categorical.Categorical at 0x7fc611c07fd0>,
 'target_network': <tensorforce.core.networks.network.LayeredNetwork at 0x7fc611c076a0>}

In [103]:
agent2.agent.save_model('/tmp/test_new_model/')

'/tmp/test_new_model/-206639'