In [1]:
import numpy as np
import pandas as pd
import pymc3 as pm
import matplotlib.pyplot as plt
import seaborn as sns
import csv
import math

%matplotlib inline



In [2]:
from os import listdir
from os.path import isfile, join
path = '../RetweetDataAOAS/retweet_data/'
root_tweet_names = [f for f in listdir(path) if isfile(join(path, f))]
num_root_tweets = len(root_tweet_names)

In [3]:
# Produces a dictionary of dataframes for each tweetfile, with initial 
# preprocessing
fields = ['RetweetCount', 'UserId', 'ScreenName', 'FollowerCount', 
          'DistanceFromRoot','Time', 'ParentScreenName', 'Text']
tweet_dfs = []
for i in range(num_root_tweets):
    tweet_df = pd.read_csv(path+root_tweet_names[i], sep="\t", header=None, 
                         quoting=csv.QUOTE_NONE, names=fields, encoding = "ISO-8859-1")
    
    tweet_df['Time'] = pd.to_datetime(tweet_df['Time'])
#     tweet_df["UserId"] = pd.to_numeric(tweet_df["UserId"], errors = 'ignore')
    
    screen_name_index = {}
    for index, row in tweet_df.iterrows():
        screen_name_index[row['ScreenName']] = index
    tweet_df['ParentDfIndex'] = tweet_df['ParentScreenName'].map(screen_name_index)
    
    tweet_dfs.append(tweet_df)

In [4]:
# Returns a dictionary of reaction times S_j^x keyed by user id
# Also modify the df to include a ReactionTime column
# S_j^x = T_j^x - T_P(j)^x is the time of the user's tweet minus its parent

def generate_reaction_times(tweet_df):
    reaction_times = {}
    for index, row in tweet_df.iterrows():
#         if index > 0: # ignore root tweet
        reaction_time = row['Time'] - tweet_df.at[row['ParentDfIndex'],
                                                  'Time']
        reaction_times[row['UserId']] = reaction_time
        tweet_df.loc[index,"ReactionTime"] = reaction_time
    return reaction_times

In [5]:
# modifies the dataframe to include M_j^x
# M_j^x for tweet x and user j is the number of j's followers who retweet x

def generate_number_of_follower_who_retweet(tweet_df):
    number_of_follower_who_retweet = {}
    
    for index, row in tweet_df.iterrows():
        parent_user_id = tweet_df.at[row['ParentDfIndex'], 'UserId']
        if parent_user_id not in number_of_follower_who_retweet:
            number_of_follower_who_retweet[parent_user_id] =1
        else:
            number_of_follower_who_retweet[parent_user_id] += 1
    
    # add to dataFrame
    for index, row in tweet_df.iterrows():
        if row['UserId'] in number_of_follower_who_retweet:
            count = number_of_follower_who_retweet[row['UserId']]
            tweet_df.loc[index,"FollowersRetweeted"] = count
        else: 
            tweet_df.loc[index,"FollowersRetweeted"] = 0

    return number_of_follower_who_retweet

In [6]:
log_s_j_x = []
for i in range(num_tweets):
    s_j_x = sorted(generate_reaction_times(tweet_dfs[i]).values())
    generate_number_of_follower_who_retweet(tweet_dfs[i])
    log_s_j_x.append([np.log(i.seconds) for i in s_j_x])

In [7]:
# count number of unique users

t = tweet_dfs[0]['UserId']
for i in range(1,len(tweet_dfs)):
    t = pd.concat([t, tweet_dfs[i]['UserId']])
t.nunique()

11604

In [8]:
tweet_dfs[1].head(20)

Unnamed: 0,RetweetCount,UserId,ScreenName,FollowerCount,DistanceFromRoot,Time,ParentScreenName,Text,ParentDfIndex,ReactionTime,FollowersRetweeted
0,0,15097615,hilaryr,40041,0,2012-04-12 03:05:42,hilaryr,oh and @AnnDRomney welcome to Twitter. You wi...,0,00:00:00,79.0
1,1,236026761,DylanByers,4332,1,2012-04-12 03:05:50,hilaryr,RT @hilaryr: oh and @AnnDRomney welcome to Twi...,0,00:00:08,0.0
2,2,21316253,ZekeJMiller,12002,1,2012-04-12 03:06:04,hilaryr,RT @hilaryr: oh and @AnnDRomney welcome to Twi...,0,00:00:22,0.0
3,3,21234528,EmilyABC,4070,1,2012-04-12 03:06:10,hilaryr,! RT @hilaryr: oh and @AnnDRomney welcome to T...,0,00:00:28,2.0
4,4,18430529,COWBOYSDODGERS,110,1,2012-04-12 03:06:42,hilaryr,RT @hilaryr: oh and @AnnDRomney welcome to Twi...,0,00:01:00,0.0
5,5,19024627,joshgreenman,3221,1,2012-04-12 03:06:48,hilaryr,RT @hilaryr: oh and @AnnDRomney welcome to Twi...,0,00:01:06,0.0
6,6,19954098,carpdd,289,1,2012-04-12 03:07:19,hilaryr,RT @hilaryr: oh and @AnnDRomney welcome to Twi...,0,00:01:37,0.0
7,7,442649949,WillDarrell,22,1,2012-04-12 03:07:36,hilaryr,RT @hilaryr: oh and @AnnDRomney welcome to Twi...,0,00:01:54,0.0
8,8,19523851,janoid,396,1,2012-04-12 03:08:17,hilaryr,With friends like you...RT @hilaryr oh and @An...,0,00:02:35,0.0
9,9,28628850,greggutfeld,76996,1,2012-04-12 03:09:02,hilaryr,oh that's precious! RT @hilaryr oh and @AnnDRo...,0,00:03:20,34.0


In [11]:
# currently working pymc3 model 
with pm.Model() as twitter_model:
    # global model parameters
    # Time-related hyperparameters
    alpha = pm.Normal('alpha', mu=0, sd=100)
    sigma_squared_delta = pm.InverseGamma('sigma_squared_delta', alpha=2, beta=2)
    log_a_tau = pm.Normal('log_a_tau', mu=0, sd=10)
    b_tau = pm.Gamma('b_tau', alpha=1, beta=.002)
    
    # Graph-related hyperparameters
    # sigma_squared_b = pm.InverseGamma('sigma_squared_b', alpha=0.5, beta=0.5, testval=10000)
    # beta_0 = pm.Normal('beta_0', mu=0, tau=1/sigma_squared_b)
    # beta_F = pm.Normal('beta_F', mu=0, tau=1/sigma_squared_b)
    # beta_d = pm.Normal('beta_d', mu=0, tau=1/sigma_squared_b)
    
    # log-normal model for reaction times, nonrecursive...
    a_tau = pm.Deterministic('a_tau', pm.math.exp(log_a_tau))
    for i in range(num_tweets):
        t_x = pm.InverseGamma('tau_x_squared_' + str(i), alpha=a_tau, beta=b_tau)
        a_x = pm.Normal('alpha_x_' + str(i), mu=alpha, tau=1/sigma_squared_delta)        
        l_x = pm.Normal('log_s_j_x_' + str(i), mu=a_x, tau=t_x**0.5, observed=log_s_j_x[i])


In [10]:
# pymc3 model in development
with pm.Model() as twitter_model:
    # global model parameters
    # Time-related hyperparameters
    alpha = pm.Normal('alpha', mu=0, sd=100)
    sigma_squared_delta = pm.InverseGamma('sigma_squared_delta', alpha=2, beta=2)
    log_a_tau = pm.Normal('log_a_tau', mu=0, sd=10)
    b_tau = pm.Gamma('b_tau', alpha=1, beta=.002)
    
    # Graph-related hyperparameters
    # binom model for graph structure
    sigma_squared_b = pm.InverseGamma('sigma_squared_b', alpha=0.5, beta=0.5, testval=10000)
    beta_0 = pm.Normal('beta_0', mu=0, sd=100)
    beta_F = pm.Normal('beta_F', mu=0, sd=100)
    beta_d = pm.Normal('beta_d', mu=0, sd=100)
    
    # log-normal model for reaction times, nonrecursive...
    a_tau = pm.Deterministic('a_tau', pm.math.exp(log_a_tau))
    for i in range(num_root_tweets):
        t_x = pm.InverseGamma('tau_x_squared_' + str(i), alpha=a_tau, beta=b_tau)
        a_x = pm.Normal('alpha_x_' + str(i), mu=alpha, tau=1/sigma_squared_delta)        
        l_x = pm.Normal('log_s_j_x_' + str(i), mu=a_x, tau=t_x**0.5, observed=log_s_j_x[i])
        
        # binom model for user
        for j, row in tweet_dfs[i].iterrows():
        f_j_x = row['FollowerCount']
        d_j_x = row['DistanceFromRoot']
        b_j_x = beta_0 + beta_F*math.log(f_j_x + 1) + beta_d*math.log()
        mu_j = beta_0 + beta_f*

IndentationError: expected an indented block (<ipython-input-10-e7bcd7598d51>, line 26)

In [13]:
# Run and fit our model
with twitter_model:
    trace = pm.sample(300, tune=300)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [alpha_x_51, tau_x_squared_51_log__, alpha_x_50, tau_x_squared_50_log__, alpha_x_49, tau_x_squared_49_log__, alpha_x_48, tau_x_squared_48_log__, alpha_x_47, tau_x_squared_47_log__, alpha_x_46, tau_x_squared_46_log__, alpha_x_45, tau_x_squared_45_log__, alpha_x_44, tau_x_squared_44_log__, alpha_x_43, tau_x_squared_43_log__, alpha_x_42, tau_x_squared_42_log__, alpha_x_41, tau_x_squared_41_log__, alpha_x_40, tau_x_squared_40_log__, alpha_x_39, tau_x_squared_39_log__, alpha_x_38, tau_x_squared_38_log__, alpha_x_37, tau_x_squared_37_log__, alpha_x_36, tau_x_squared_36_log__, alpha_x_35, tau_x_squared_35_log__, alpha_x_34, tau_x_squared_34_log__, alpha_x_33, tau_x_squared_33_log__, alpha_x_32, tau_x_squared_32_log__, alpha_x_31, tau_x_squared_31_log__, alpha_x_30, tau_x_squared_30_log__, alpha_x_29, tau_x_squared_29_log__, alpha_x_28, tau_x_squared_28_log__, alpha_x_27

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/usr/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = '/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = ''
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
/usr/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f7bb3289830, file "/...2.7/dist-packages/ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_fname='/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f7bb3289830, file "/...2.7/dist-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         self.io_loop = ioloop.IOLoop.current()
    477         try:
--> 478             self.io_loop.start()
        self.io_loop.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    479         except KeyboardInterrupt:
    480             pass
    481 
    482 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    276         if self.control_stream:
    277             self.control_stream.on_recv(self.dispatch_control, copy=False)
    278 
    279         def make_dispatcher(stream):
    280             def dispatcher(msg):
--> 281                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    282             return dispatcher
    283 
    284         for s in self.shell_streams:
    285             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 5, 2, 18, 30, 5, 604427, tzinfo=tzutc()), u'msg_id': u'19AE13CA535C428586371752A24ECA0D', u'msg_type': u'execute_request', u'session': u'37034FF6E2E4426FA9A0277E54949633', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'19AE13CA535C428586371752A24ECA0D', 'msg_type': u'execute_request', 'parent_header': {}})
    227             self.log.warn("Unknown message type: %r", msg_type)
    228         else:
    229             self.log.debug("%s: %s", msg_type, msg)
    230             self.pre_handler_hook()
    231             try:
--> 232                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['37034FF6E2E4426FA9A0277E54949633']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 5, 2, 18, 30, 5, 604427, tzinfo=tzutc()), u'msg_id': u'19AE13CA535C428586371752A24ECA0D', u'msg_type': u'execute_request', u'session': u'37034FF6E2E4426FA9A0277E54949633', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'19AE13CA535C428586371752A24ECA0D', 'msg_type': u'execute_request', 'parent_header': {}}
    233             except Exception:
    234                 self.log.error("Exception in message handler:", exc_info=True)
    235             finally:
    236                 self.post_handler_hook()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['37034FF6E2E4426FA9A0277E54949633'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2018, 5, 2, 18, 30, 5, 604427, tzinfo=tzutc()), u'msg_id': u'19AE13CA535C428586371752A24ECA0D', u'msg_type': u'execute_request', u'session': u'37034FF6E2E4426FA9A0277E54949633', u'username': u'username', u'version': u'5.2'}, 'metadata': {}, 'msg_id': u'19AE13CA535C428586371752A24ECA0D', 'msg_type': u'execute_request', 'parent_header': {}})
    392         if not silent:
    393             self.execution_count += 1
    394             self._publish_execute_input(code, parent, self.execution_count)
    395 
    396         reply_content = self.do_execute(code, silent, store_history,
--> 397                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    398 
    399         # Flush output before sending the reply.
    400         sys.stdout.flush()
    401         sys.stderr.flush()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)',), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)',)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)', store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.With object>], cell_name='<ipython-input-13-dcdbb1979a0a>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 7f7b58130850, executi..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f7b46f045b0, file "<ipython-input-13-dcdbb1979a0a>", line 2>
        result = <ExecutionResult object at 7f7b58130850, executi..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f7b46f045b0, file "<ipython-input-13-dcdbb1979a0a>", line 2>, result=<ExecutionResult object at 7f7b58130850, executi..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f7b46f045b0, file "<ipython-input-13-dcdbb1979a0a>", line 2>
        self.user_global_ns = {'In': ['', u"import numpy as np\nimport pandas as pd\nimpor...ath\n\nget_ipython().magic(u'matplotlib inline')", u"from os import listdir\nfrom os.path import is...n(path, f))]\nnum_tweets = len(root_tweet_names)", u'# Produces a dictionary of dataframes for each...ame_index)\n    \n    tweet_dfs.append(tweet_df)', u'# Returns a dictionary of reaction times S_j^x...ime"] = reaction_time\n    return reaction_times', u'# modifies the dataframe to include M_j^x\n# M...= 0\n\n    return number_of_follower_who_retweet', u'log_s_j_x = []\nfor i in range(num_tweets):\n ...s_j_x.append([np.log(i.seconds) for i in s_j_x])', u"# count number of unique users\n\nt = tweet_df...concat([t, tweet_dfs[i]['UserId']])\nt.nunique()", u'tweet_dfs[1].head(20)', u"# currently working pymc3 model \nwith pm.Mode...i), mu=a_x, tau=t_x**0.5, observed=log_s_j_x[i])", u"# pymc3 model in development\nwith pm.Model() ...ta_d*math.log()\n        mu_j = beta_0 + beta_f*", u"# currently working pymc3 model \nwith pm.Mode...i), mu=a_x, tau=t_x**0.5, observed=log_s_j_x[i])", u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(1000, tune=1000, cores=4)', u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)'], 'Out': {7: 11604, 8:     RetweetCount     UserId       ScreenName Fol...      0.0  
19     00:01:43                 0.0  }, '_':     RetweetCount     UserId       ScreenName Fol...      0.0  
19     00:01:43                 0.0  , '_7': 11604, '_8':     RetweetCount     UserId       ScreenName Fol...      0.0  
19     00:01:43                 0.0  , '__': 11604, '___': '', '__builtin__': <module '__builtin__' (built-in)>, '__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Automatically created module for IPython interactive environment', ...}
        self.user_ns = {'In': ['', u"import numpy as np\nimport pandas as pd\nimpor...ath\n\nget_ipython().magic(u'matplotlib inline')", u"from os import listdir\nfrom os.path import is...n(path, f))]\nnum_tweets = len(root_tweet_names)", u'# Produces a dictionary of dataframes for each...ame_index)\n    \n    tweet_dfs.append(tweet_df)', u'# Returns a dictionary of reaction times S_j^x...ime"] = reaction_time\n    return reaction_times', u'# modifies the dataframe to include M_j^x\n# M...= 0\n\n    return number_of_follower_who_retweet', u'log_s_j_x = []\nfor i in range(num_tweets):\n ...s_j_x.append([np.log(i.seconds) for i in s_j_x])', u"# count number of unique users\n\nt = tweet_df...concat([t, tweet_dfs[i]['UserId']])\nt.nunique()", u'tweet_dfs[1].head(20)', u"# currently working pymc3 model \nwith pm.Mode...i), mu=a_x, tau=t_x**0.5, observed=log_s_j_x[i])", u"# pymc3 model in development\nwith pm.Model() ...ta_d*math.log()\n        mu_j = beta_0 + beta_f*", u"# currently working pymc3 model \nwith pm.Mode...i), mu=a_x, tau=t_x**0.5, observed=log_s_j_x[i])", u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(1000, tune=1000, cores=4)', u'# Run and fit our model\nwith twitter_model:\n    trace = pm.sample(300, tune=300)'], 'Out': {7: 11604, 8:     RetweetCount     UserId       ScreenName Fol...      0.0  
19     00:01:43                 0.0  }, '_':     RetweetCount     UserId       ScreenName Fol...      0.0  
19     00:01:43                 0.0  , '_7': 11604, '_8':     RetweetCount     UserId       ScreenName Fol...      0.0  
19     00:01:43                 0.0  , '__': 11604, '___': '', '__builtin__': <module '__builtin__' (built-in)>, '__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Automatically created module for IPython interactive environment', ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
/media/storage/AppData/Dropbox (Personal)/MIT/2018S/6.882-fp/andrew/<ipython-input-13-dcdbb1979a0a> in <module>()
      1 # Run and fit our model
      2 with twitter_model:
----> 3     trace = pm.sample(300, tune=300)

...........................................................................
/usr/local/lib/python2.7/dist-packages/pymc3/sampling.py in sample(draws=600, step=<pymc3.step_methods.hmc.nuts.NUTS object>, init='auto', n_init=200000, start=[{'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...}, {'alpha': array(0.14073512651293574), 'alpha_x_0': array(0.7216821498584356), 'alpha_x_1': array(0.11880665999443374), 'alpha_x_10': array(-0.5101913778178075), 'alpha_x_11': array(0.2816812562688684), 'alpha_x_12': array(0.27088186728065766), 'alpha_x_13': array(0.580414738922884), 'alpha_x_14': array(0.46504540239788517), 'alpha_x_15': array(0.22794983621268394), 'alpha_x_16': array(0.39699470776314194), ...}], trace=None, chain_idx=0, chains=2, njobs=2, tune=300, nuts_kwargs=None, step_kwargs=None, progressbar=True, model=<pymc3.model.Model object>, random_seed=[611552271, 661875906], live_plot=False, discard_tuned_samples=True, live_plot_kwargs=None, compute_convergence_checks=True, **kwargs={})
    414     parallel = njobs > 1 and chains > 1 and not has_population_samplers
    415     if parallel:
    416         _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs))
    417         _print_step_hierarchy(step)
    418         try:
--> 419             trace = _mp_sample(**sample_args)
        trace = None
        sample_args = {'chain': 0, 'chains': 2, 'draws': 600, 'live_plot': False, 'live_plot_kwargs': None, 'model': <pymc3.model.Model object>, 'njobs': 2, 'progressbar': True, 'random_seed': [611552271, 661875906], 'start': [{'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...}, {'alpha': array(0.14073512651293574), 'alpha_x_0': array(0.7216821498584356), 'alpha_x_1': array(0.11880665999443374), 'alpha_x_10': array(-0.5101913778178075), 'alpha_x_11': array(0.2816812562688684), 'alpha_x_12': array(0.27088186728065766), 'alpha_x_13': array(0.580414738922884), 'alpha_x_14': array(0.46504540239788517), 'alpha_x_15': array(0.22794983621268394), 'alpha_x_16': array(0.39699470776314194), ...}], ...}
    420         except pickle.PickleError:
    421             _log.warn("Could not pickle model, sampling singlethreaded.")
    422             _log.debug('Pickling error:', exec_info=True)
    423             parallel = False

...........................................................................
/usr/local/lib/python2.7/dist-packages/pymc3/sampling.py in _mp_sample(**kwargs={'draws': 600, 'live_plot': False, 'live_plot_kwargs': None, 'model': <pymc3.model.Model object>, 'step': <pymc3.step_methods.hmc.nuts.NUTS object>, 'trace': None, 'tune': 300})
    944 
    945     chain_nums = list(range(chain, chain + chains))
    946     pbars = [kwargs.pop('progressbar')] + [False] * (chains - 1)
    947     jobs = (delayed(_sample)(*args, **kwargs)
    948             for args in zip(chain_nums, pbars, rseed, start))
--> 949     traces = Parallel(n_jobs=njobs)(jobs)
        traces = undefined
        njobs = 2
        jobs = <generator object <genexpr>>
    950     return MultiTrace(traces)
    951 
    952 
    953 def stop_tuning(step):

...........................................................................
/usr/local/lib/python2.7/dist-packages/joblib/parallel.py in __call__(self=Parallel(n_jobs=2), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=2)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Wed May  2 14:32:19 2018
PID: 27567                                   Python 2.7.12: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _sample>
        args = (0, True, 611552271, {'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...})
        kwargs = {'draws': 600, 'live_plot': False, 'live_plot_kwargs': None, 'model': <pymc3.model.Model object>, 'step': <pymc3.step_methods.hmc.nuts.NUTS object>, 'trace': None, 'tune': 300}
        self.items = [(<function _sample>, (0, True, 611552271, {'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...}), {'draws': 600, 'live_plot': False, 'live_plot_kwargs': None, 'model': <pymc3.model.Model object>, 'step': <pymc3.step_methods.hmc.nuts.NUTS object>, 'trace': None, 'tune': 300})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/usr/local/lib/python2.7/dist-packages/pymc3/sampling.py in _sample(chain=0, progressbar=True, random_seed=611552271, start={'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...}, draws=600, step=<pymc3.step_methods.hmc.nuts.NUTS object>, trace=None, tune=300, model=<pymc3.model.Model object>, live_plot=False, live_plot_kwargs=None, **kwargs={})
    521                             tune, model, random_seed)
    522     if progressbar:
    523         sampling = tqdm(sampling, total=draws)
    524     try:
    525         strace = None
--> 526         for it, strace in enumerate(sampling):
        it = undefined
        strace = None
        sampling =   0%|          | 0/600 [00:00<?, ?it/s]
    527             if live_plot:
    528                 if live_plot_kwargs is None:
    529                     live_plot_kwargs = {}
    530                 if it >= skip_first:

...........................................................................
/usr/local/lib/python2.7/dist-packages/tqdm/_tqdm.py in __iter__(self=  0%|          | 0/600 [00:00<?, ?it/s])
    950             except AttributeError:
    951                 raise TqdmDeprecationWarning("""\
    952 Please use `tqdm_gui(...)` instead of `tqdm(..., gui=True)`
    953 """, fp_write=getattr(self.fp, 'write', sys.stderr.write))
    954 
--> 955             for obj in iterable:
        obj = undefined
        iterable = <generator object _iter_sample>
    956                 yield obj
    957                 # Update and possibly print the progressbar.
    958                 # Note: does not call self.update(1) for speed optimisation.
    959                 n += 1

...........................................................................
/usr/local/lib/python2.7/dist-packages/pymc3/sampling.py in _iter_sample(draws=600, step=<pymc3.step_methods.hmc.nuts.NUTS object>, start={'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...}, trace=None, chain=0, tune=300, model=<pymc3.model.Model object>, random_seed=611552271)
    619         step.tune = bool(tune)
    620         for i in range(draws):
    621             if i == tune:
    622                 step = stop_tuning(step)
    623             if step.generates_stats:
--> 624                 point, states = step.step(point)
        point = {'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...}
        states = undefined
        step.step = <bound method NUTS.step of <pymc3.step_methods.hmc.nuts.NUTS object>>
    625                 if strace.supports_sampler_stats:
    626                     strace.record(point, states)
    627                 else:
    628                     strace.record(point)

...........................................................................
/usr/local/lib/python2.7/dist-packages/pymc3/step_methods/arraystep.py in step(self=<pymc3.step_methods.hmc.nuts.NUTS object>, point={'alpha': array(0.8255756292120513), 'alpha_x_0': array(-0.17272451040733272), 'alpha_x_1': array(-0.642479993291452), 'alpha_x_10': array(0.30966083801768085), 'alpha_x_11': array(0.4201478307703719), 'alpha_x_12': array(0.15649445872263823), 'alpha_x_13': array(-0.30245931165626305), 'alpha_x_14': array(-0.7715458322606361), 'alpha_x_15': array(0.5038502945949337), 'alpha_x_16': array(0.9520822434297227), ...})
    217     def step(self, point):
    218         self._logp_dlogp_func.set_extra_values(point)
    219         array = self._logp_dlogp_func.dict_to_array(point)
    220 
    221         if self.generates_stats:
--> 222             apoint, stats = self.astep(array)
        apoint = undefined
        stats = undefined
        self.astep = <bound method NUTS.astep of <pymc3.step_methods.hmc.nuts.NUTS object>>
        array = array([ 0.30117455,  4.76200522, -0.02489327,  4...5,
       -0.24021095,  0.48260943,  0.82557563])
    223             point = self._logp_dlogp_func.array_to_full_dict(apoint)
    224             return point, stats
    225         else:
    226             apoint = self.astep(array)

...........................................................................
/usr/local/lib/python2.7/dist-packages/pymc3/step_methods/hmc/base_hmc.py in astep(self=<pymc3.step_methods.hmc.nuts.NUTS object>, q0=array([ 0.30117455,  4.76200522, -0.02489327,  4...5,
       -0.24021095,  0.48260943,  0.82557563]))
    112         start = self.integrator.compute_state(q0, p0)
    113 
    114         if not np.isfinite(start.energy):
    115             self.potential.raise_ok()
    116             raise ValueError('Bad initial energy: %s. The model '
--> 117                              'might be misspecified.' % start.energy)
        start.energy = inf
    118 
    119         adapt_step = self.tune and self.adapt_step_size
    120         step_size = self.step_adapt.current(adapt_step)
    121         self.step_size = step_size

ValueError: Bad initial energy: inf. The model might be misspecified.
___________________________________________________________________________