In [1]:
import bay12_scraper

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
%matplotlib notebook

In [4]:
import os


In [5]:
p_data = os.path.abspath('../output')

----

# Exploring thread types

In [6]:
p_threads = os.path.join(p_data, 'threads.csv')
df_threads = pd.read_csv(p_threads, header=0, encoding='utf-8')

In [7]:
df_threads.head()

Unnamed: 0,url,thread_num,thread_name,thread_label,thread_replies
0,http://www.bay12forums.com/smf/index.php?topic...,134925,Mafia Setup Discussion and Review,other,598
1,http://www.bay12forums.com/smf/index.php?topic...,45016,Games Threshold Discussion and List [Vote for ...,other,5703
2,http://www.bay12forums.com/smf/index.php?topic...,126856,"Mafia Tools and Utilities (lurkertracker, etc)...",other,38
3,http://www.bay12forums.com/smf/index.php?topic...,88720,New Player's Guide to the Subforum - New to Ma...,other,961
4,http://www.bay12forums.com/smf/index.php?topic...,144075,Notable Games Archive: The New Thread,other,40


In [8]:
df_thread_stats = df_threads.groupby('thread_label')['thread_replies'].agg(['count', 'sum'])
df_thread_stats.columns = ['threads', 'posts']
df_thread_stats['posts'] += df_thread_stats['threads']  # counting original post (1) + replies

# Set "usefulness" measure
df_thread_stats['usefulness'] = 0
usf = {
    3: ['beginners-mafia', 'vanilla'], 
    2: ['classic', 'vengeful', 'supernatural'],
    1: ['closed-setup', 'paranormal', 'kotm'] + ['byor', 'bastard', 'cybrid'],
}
for k, v in usf.items():
    df_thread_stats.usefulness[v] = k
    
# Set and sort index
df_thread_stats = df_thread_stats.reset_index().set_index(['usefulness', 'thread_label']).sort_index(ascending=[False, True])

In [9]:
df_thread_stats.style.bar()

Unnamed: 0_level_0,Unnamed: 1_level_0,threads,posts
usefulness,thread_label,Unnamed: 2_level_1,Unnamed: 3_level_1
3,beginners-mafia,62,26812
3,vanilla,14,6317
2,classic,36,17390
2,supernatural,10,5949
2,vengeful,13,2222
1,bastard,35,21169
1,byor,40,32254
1,closed-setup,52,22593
1,cybrid,7,1899
1,kotm,9,4373


---------

# Exploring roles for "usefulness=3" games

In [10]:
p_roles = os.path.join(p_data, 'roles.csv')
df_roles = pd.read_csv(p_roles, header=0, encoding='utf-8')

In [11]:
df_roles.head()

Unnamed: 0,thread_num,user,role,num_posts,replaced_by
0,170712,BlackHeartKabal,replaced,9,randomgenericusername
1,170712,Dorsidwarf,observer,1,
2,170712,FallacyofUrist,game-master,43,
3,170712,IcyTea31,town,62,
4,170712,Maximum Spin,mafia,88,


We need to first figure out the roles for those who have been replaced.

The map is `old_player -> new_player @ role`, we want to apply the role to the old_player too.

In [12]:
df_roles_fixed = df_roles.copy()

tbr = (df_roles_fixed['role']=='replaced')
df_roles_fixed['replacement_depth'] = 0

cols_fixed = list(df_roles.columns) + ['replacement_depth']

while tbr.sum():
    df_roles_fixed = df_roles_fixed.merge(
        df_roles_fixed, how='left', 
        left_on=['thread_num', 'replaced_by'], 
        right_on=['thread_num', 'user'], 
        suffixes=('', '_repl')
    )[cols_fixed + ['role_repl']]
    
    tbr = (df_roles_fixed['role']=='replaced')
    df_roles_fixed['role'][tbr] = df_roles_fixed['role_repl'][tbr]
    df_roles_fixed['replacement_depth'][tbr] += 1
    df_roles_fixed = df_roles_fixed[cols_fixed]
    

In [13]:
df_roles_fixed[df_roles_fixed['role']=='replaced'].shape

(0, 6)

In [14]:
df_roles_fixed.head()

Unnamed: 0,thread_num,user,role,num_posts,replaced_by,replacement_depth
0,170712,BlackHeartKabal,town,9,randomgenericusername,1
1,170712,Dorsidwarf,observer,1,,0
2,170712,FallacyofUrist,game-master,43,,0
3,170712,IcyTea31,town,62,,0
4,170712,Maximum Spin,mafia,88,,0


In [15]:
df_post_stats = df_roles_fixed.groupby(['role'])['num_posts'].agg(['count', 'sum'])
df_post_stats.columns = ['games', 'posts']

In [16]:
df_post_stats.style.bar()

Unnamed: 0_level_0,games,posts
role,Unnamed: 1_level_1,Unnamed: 2_level_1
game-master,80,3592
ic,78,958
mafia,182,5950
observer,453,1757
town,631,20779
unknown,13,56


In [17]:
df_mt_stats = df_post_stats.loc[['mafia', 'town'], ['games', 'posts']]

In [18]:
df_mt_stats.divide(df_mt_stats.sum(axis=0), axis=1)

Unnamed: 0_level_0,games,posts
role,Unnamed: 1_level_1,Unnamed: 2_level_1
mafia,0.223862,0.222605
town,0.776138,0.777395
