In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot

import sys
sys.path.append("../")

from analytics.classification_aux import em2_classification_cached
from classification import twopoisson_em
from classification import visualization

from analytics.classification_aux import extract_train_test, print_results, compare_results
from aux.events_io import load_events, store_events
from classification.nhst_testing import *

##########################################

import logging
from imp import reload
reload(logging)
fmt = '[%(process)4d][%(asctime)s][%(levelname)-5s][%(module)s:%(lineno)d/%(funcName)s] %(message)s'
logging.basicConfig(format=fmt, level=logging.INFO, datefmt='%I:%M:%S')

%matplotlib inline

In [2]:
user2features = pd.read_csv("../../data/badges/covariates.tsv", sep="\t")
samples = pd.read_csv("../../data/badges/tagedits/researchassistant.tsv", sep="\t")

In [3]:
id2events = load_events(samples)

In [18]:
import matplotlib
from scipy.stats import chi2
from matplotlib import pyplot
import seaborn as sns; sns.set()


TAGEDITOR_INTRO=762.209
TAGEDITOR_MOD=923.0
RESEARCHASSISTANT_INTRO=1286.274



def pyplot_reset():
    pyplot.cla()
    pyplot.clf()
    pyplot.close()
    matplotlib.rcParams.update(matplotlib.rcParamsDefault)
    

def extract_position(events, v):
    actions = np.asarray(events["action"])
    pos = sum(v>actions)-1 
    return pos
    

def plot_badge(events, l, color="black"):
    if l not in events: return
    v = events[l][0]
    pos = extract_position(events, v)+0.5
    pyplot.axvline(x=pos, color=color, lw=3)
    ymin, ymax = pyplot.gca().get_ylim()
    pyplot.text(pos, ymin*2, r" badge awarding, $b_u$",
                rotation=90, fontsize=25, color=color,
                verticalalignment="bottom", horizontalalignment="right")
    

def plot_badge_intro(events, l, v, color="blue"):
    pos = extract_position(events, v)+0.5
    pyplot.axvline(x=pos, color=color, lw=3)
    ymin, ymax = pyplot.gca().get_ylim()
    pyplot.text(pos, ymin*2, " %s" % (l),
                rotation=90, fontsize=25, color=color,
                verticalalignment="bottom", horizontalalignment="right")    
    

def plot_events(events):
    actions = np.asarray(sorted(events["action"]))
    
    diffs = np.diff(sorted(list(actions)+events["max_time"]))
    xs = list(range(len(actions)+1))
    pyplot.plot(xs, list(diffs)+[diffs[-1]], drawstyle="steps-post", lw=3, color="salmon")
    
    positions = [i for i, t in enumerate(sorted(actions)) if i%10==0]
    #labels = ["%i (%.1f)" % (i, t) for i, t in enumerate(sorted(actions)) if i%10==0]
    labels = ["%i" % (i) for i, t in enumerate(sorted(actions)) if i%10==0]
    pyplot.xticks(positions, labels, rotation=0, horizontalalignment="right")#, labelsize=20)#, rotation_mode="anchor")
    
    ##delay between start_time and the next action
    #pos = extract_position(events, events["start_time"][0])
    #nxt = actions[pos+1]
    #delay = nxt - events["start_time"][0]
    #pyplot.plot([pos, pos+1], [delay, delay], drawstyle="steps-post", lw=2, color="red")

    pyplot.yscale("log")
    pyplot.grid(True)
    pyplot.xlabel("action number", fontsize=25)
    pyplot.ylabel("#days between actions", fontsize=25)
    pyplot.xlim((-2, len(actions)+1))
    pyplot.gca().tick_params(axis='both', which='major', labelsize=20)



def plot_user(uid):
    events = id2events[uid]
    actions = events.get("action", [])
    plot_events(events)

    #plot_badge(events, "tag_editor")
    plot_badge(events, "research_assistant", color="black")
    #plot_badge(events, "research_assistant*")
    
    #plot_badge_intro(events, "TAGEDITOR", TAGEDITOR_INTRO, color="blue")
    #plot_badge_intro(events, "TAGEDITORM", TAGEDITOR_MOD, color="blue")
    plot_badge_intro(events, r"badge introduction, $\tau$", RESEARCHASSISTANT_INTRO, color="black")
    


In [19]:
pyplot_reset()
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42  
matplotlib.rcParams['text.usetex'] = True       

plot_user(1288)

from analytics.plots import pyplot_savefig

pyplot.gcf().subplots_adjust(bottom=0.15, left=0.15)# right=0.85, left=0.15)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42     
matplotlib.rcParams['text.usetex'] = True       

pyplot_savefig("../../data/sample_user.pdf")

[30517][10:48:18][INFO ][plots:26/savefig] [plots.savefig] Saving plot to ../../data/sample_user.pdf


In [20]:
u = user2features[user2features.id==1288]
u

Unnamed: 0,id,reputation,location,views,upvotes,downvotes,age,comments,c0,c1,c2,c3,c4,s0,s1,s2,s3,s4
959,1288,11.931076,"Charlotte, NC",11.542309,9.575539,9.1072,40.0,8.085795,3.683458,3.035432,2.926652,2.379401,2.409712,2.932914,3.527206,3.681607,3.763656,3.322067


In [7]:
print("\\small{id} & \\small{", int(u.id), "}\\\\")
print("\\small{age} & \\small{", int(u.age), "}\\\\")
print("\\small{location} & \\small{", u.location.values[0], "}\\\\")
print("\\small{views} & \\small{", int(np.exp(u.views)-1), "}\\\\")
print("\\small{comments} & \\small{", int(np.exp(u.comments)-1), "}\\\\")
print("\\small{upvotes} & \\small{", int(np.exp(u.upvotes)-1), "}\\\\")
print("\\small{downvotes} & \\small{", int(np.exp(u.downvotes)-1), "}\\\\")

\small{id} & \small{ 1288 }\\
\small{age} & \small{ 40 }\\
\small{location} & \small{ Charlotte, NC }\\
\small{views} & \small{ 102981 }\\
\small{comments} & \small{ 3246 }\\
\small{upvotes} & \small{ 14407 }\\
\small{downvotes} & \small{ 9019 }\\
