# User Success Rate Visualization

In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import altair as alt
from matplotlib import pyplot

In [2]:
single_user = pd.read_csv("rawdata/morgandanielle.csv")

## Only NaNoWriMo events
First, we'll look only at events that are part of NaNoWriMo's official Novemeber events

In [3]:
nano_only = single_user[[x.startswith("NaNoWriMo") and int(x.split(" ")[1]) >= 2011 for x in single_user['Event']]]

In [4]:
nano_sorted = nano_only.sort_values(by=['Date']).reset_index()
nano_sorted['CumulativeWC'] = nano_only.groupby("Event").apply(lambda x: x['WordCount'].cumsum()).reset_index()['WordCount']

In [5]:
nano_sorted

Unnamed: 0,index,Date,WordCount,Project,Genre,Event,Goal,CumulativeWC
0,392,2011-11-01,4172,The Redemption of the Renegades,Adventure,NaNoWriMo 2011,"50,000 WORDS",4172
1,393,2011-11-02,2708,The Redemption of the Renegades,Adventure,NaNoWriMo 2011,"50,000 WORDS",6880
2,394,2011-11-03,2155,The Redemption of the Renegades,Adventure,NaNoWriMo 2011,"50,000 WORDS",9035
3,395,2011-11-04,686,The Redemption of the Renegades,Adventure,NaNoWriMo 2011,"50,000 WORDS",9721
4,396,2011-11-05,1991,The Redemption of the Renegades,Adventure,NaNoWriMo 2011,"50,000 WORDS",11712
...,...,...,...,...,...,...,...,...
245,25,2021-11-26,3125,TPOP Book 3: The Magnetism of Madness,Science Fiction,NaNoWriMo 2021,"50,000 WORDS",97174
246,26,2021-11-27,2957,TPOP Book 3: The Magnetism of Madness,Science Fiction,NaNoWriMo 2021,"50,000 WORDS",100131
247,27,2021-11-28,1681,TPOP Book 3: The Magnetism of Madness,Science Fiction,NaNoWriMo 2021,"50,000 WORDS",101812
248,28,2021-11-29,1866,TPOP Book 3: The Magnetism of Madness,Science Fiction,NaNoWriMo 2021,"50,000 WORDS",103678


In [6]:
def justmonthday(x):
    d = datetime.strptime(x,"%Y-%m-%d")
    return(d.strftime("%m-%d"))

nano_sorted['DateNoYear'] = [justmonthday(x) for x in nano_sorted['Date']]

In [7]:
def line_plot(df):
    lines = (
        alt.Chart(df)
        .mark_line().encode(
        x='DateNoYear',
        y='CumulativeWC',
        color='Event')
    )
    yrule = (
        alt.Chart().mark_rule(strokeDash=[12, 6], size=2).encode(y=alt.datum(50000))
    )

    return(lines + yrule )
# lines = (
#     alt.Chart(nano_sorted)
#     .mark_line().encode(
#     x='DateNoYear',
#     y='CumulativeWC',
#     color='Event')
# )
# yrule = (
#     alt.Chart().mark_rule(strokeDash=[12, 6], size=2).encode(y=alt.datum(50000))
# )
 
# lines + yrule 

line_plot(nano_sorted)

## Camp NaNoWriMo Events

In [8]:
camp_only = single_user[[x.startswith("Camp") and int(x.split(" ")[3]) >= 2011 for x in single_user['Event']]]

In [9]:
camp_sorted = camp_only.sort_values(by=['Event', 'Date']).reset_index()
camp_sorted['CumulativeWC'] = camp_sorted.groupby("Event").apply(lambda x: x['WordCount'].cumsum()).reset_index()['WordCount']

In [10]:
camp_sorted

Unnamed: 0,index,Date,WordCount,Project,Genre,Event,Goal,CumulativeWC
0,306,2014-03-31,0,Ad Libitum,Mainstream,Camp NaNoWriMo April 2014,"25,000 WORDS",0
1,307,2014-04-01,0,Ad Libitum,Mainstream,Camp NaNoWriMo April 2014,"25,000 WORDS",0
2,308,2014-04-02,0,Ad Libitum,Mainstream,Camp NaNoWriMo April 2014,"25,000 WORDS",0
3,309,2014-04-03,3060,Ad Libitum,Mainstream,Camp NaNoWriMo April 2014,"25,000 WORDS",3060
4,310,2014-04-04,2647,Ad Libitum,Mainstream,Camp NaNoWriMo April 2014,"25,000 WORDS",5707
...,...,...,...,...,...,...,...,...
155,40,2021-07-14,474,The Physics of Phantoms (Rewrite),Science Fiction,Camp NaNoWriMo July 2021,"20,000 WORDS",11357
156,41,2021-07-19,1431,The Physics of Phantoms (Rewrite),Science Fiction,Camp NaNoWriMo July 2021,"20,000 WORDS",12788
157,42,2021-07-23,1066,The Physics of Phantoms (Rewrite),Science Fiction,Camp NaNoWriMo July 2021,"20,000 WORDS",13854
158,43,2021-07-25,604,The Physics of Phantoms (Rewrite),Science Fiction,Camp NaNoWriMo July 2021,"20,000 WORDS",14458


In [11]:
camp_sorted['DateNoYear'] = [justmonthday(x) for x in camp_sorted['Date']]

In [12]:
line_plot(camp_sorted[camp_sorted.Event.str.contains("April")])

In [13]:
line_plot(camp_sorted[camp_sorted.Event.str.contains("July")])

## Overall, how often are goals met?

And, has this changed over time?

In [14]:
perc_goal = single_user.groupby("Event").apply(lambda x: sum(x['WordCount']) / int(''.join(x['Goal'].iloc[0].split()[0].split(","))))

In [22]:
def event_to_date(event):
    year = int(event.split()[-1])
    if event.startswith("NaNoWriMo"):
        return(datetime(year, 11, 1))
    elif "April" in event:
        return(datetime(year, 4, 1))
    elif "July" in event:
        return(datetime(year, 7, 1))
    
def short_string(event_name):
    year = str(event_name.split()[-1])[:-2]
    if "Camp" in event_name:
        if "April" in event_name:
            return "CN-A " + year
        elif "July" in event_name:
            return "CN-J " + year
    else:
        return "Nano " + year

In [28]:
source = pd.DataFrame({
    'event': [short_string(x) for x in perc_goal.index.tolist()],
    'time': [event_to_date(x) for x in perc_goal.index.tolist()],
    'perc_goal': perc_goal.tolist(),
    'goal': ["Goal Met" if x >= 1 else "Goal Not Met" for x in perc_goal.tolist()],
    'color': ["#ffc014" if x >= 1 else "#996eb5" for x in perc_goal.tolist()]
})

bars = alt.Chart(source).mark_bar().encode(
    x='time',
    y='perc_goal',
    color=alt.Color('color', scale=None)
) 

text = bars.mark_text(align='center', dy=-10).encode(text='event')

(bars+text).properties(width=1000)