# Comparing the different Polymath projects

We start by setting up, loading the urls of the discussions, and putting everything in a frame.

In [1]:
import yaml
import matplotlib
matplotlib.use('nbagg')
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (20, 20)

from bs4 import BeautifulSoup
from collections import Counter
import json
import pandas as pd
from pandas import DataFrame
from urllib.parse import urlparse

from comment_thread import *
from author_network import *

In [2]:
def process_polymath(project, split=False):
    message = "Polymath {}".format(project[-1]) if project.startswith("pm") else "Mini Polymath {}".format(project[-1])
    SETTINGS = {
    'msg' : message,
    'filename' : message.replace(" ", ""),
    'source' : project,
    'urls' : [],
    'type' : '',
    'parser' : 'html5lib',
    'cmap' : plt.cm.Paired, # not a string, this is an object
    'vmin' : 1,
    'vmax' : 100}
    
    print("loading data")
    with open("DATA/"+ SETTINGS['source'] + ".txt", "r") as input:
        soup = BeautifulSoup(input.readline(), "html.parser")
    
    items = soup.find("ul").find_all("li")
    urls = [item.find("a").get("href") for item in items]
    titles = [item.text for item in items]
    
    if project == "pm1":
        research = [title.startswith(" (") for title in titles]
    else:
        research = ["Discussion" not in title for title in titles]
        
    pm_frame = DataFrame({
        'url': urls,
        'title' : titles,
        'blog' : [urlparse(url).netloc.split('.')[0].title() for url in urls],
        'research' : research},
        columns = ['title', 'url', 'blog', 'research'])
        
    pm_frame['thread'] = [THREAD_TYPES[blog](url) for (url, blog) in zip(pm_frame['url'], pm_frame['blog'])]
    pm_frame['number of comments'] = pm_frame['thread'].apply(lambda x: len(x.node_name.keys()))
    
    indices = pm_frame.index
    threads = pm_frame.thread
    pm_frame['mthread (single)'] = pm_frame['thread'].apply(MultiCommentThread)
    pm_frame['mthread (accumulated)'] = Series([MultiCommentThread(*threads[0:i+1]) for i in indices],
                                               index=indices)
    pm_frame['network'] = pm_frame['mthread (accumulated)'].apply(AuthorNetwork)
    
    if split and not pm_frame['research'].all():
        r_indices = pm_frame[pm_frame['research']].index
        d_indices = pm_frame[~pm_frame['research']].index
        r_threads = pm_frame[pm_frame['research']].thread
        d_threads = pm_frame[~pm_frame['research']].thread
        pm_frame['r_mthread (accumulated)'] = Series([MultiCommentThread(*r_threads[0:i+1]) for i in r_indices],
                                                 index=r_indices)
        pm_frame['d_mthread (accumulated)'] = Series([MultiCommentThread(*d_threads[0:i+1]) for i in d_indices],
                                                 index=d_indices)
        pm_frame['r_network'] = pm_frame[pm_frame['research']]['r_mthread (accumulated)'].apply(AuthorNetwork)
        pm_frame['d_network'] = pm_frame[~pm_frame['research']]['d_mthread (accumulated)'].apply(AuthorNetwork)
        pm_frame = pm_frame.reindex_axis(['title', 'url', 'blog', 'research', 'number of comments',
                                      'thread', 'mthread (single)',
                                      'mthread (accumulated)', 'network',
                                      'r_mthread (accumulated)', 'r_network',
                                      'd_mthread (accumulated)', 'd_network'],
                                     axis=1)
    else:
        pm_frame = pm_frame.reindex_axis(['title', 'url', 'blog', 'research', 'number of comments',
                                      'thread', 'mthread (single)',
                                      'mthread (accumulated)', 'network'],
                                     axis=1)

    pm_frame.index = pd.MultiIndex.from_tuples([(SETTINGS['msg'], i) for i in indices],
                                               names=['Project', 'Ord'])
    
    return pm_frame

In [3]:
PM3_FRAME = process_polymath("pm3", split=True)
PM3_FRAME

loading data


Unnamed: 0_level_0,Unnamed: 1_level_0,title,url,blog,research,number of comments,thread,mthread (single),mthread (accumulated),network,r_mthread (accumulated),r_network,d_mthread (accumulated),d_network
Project,Ord,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Polymath 3,0,"The polynomial Hirsch conjecture, a proposal ...",http://gilkalai.wordpress.com/2009/07/17/the-p...,Gilkalai,True,31,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...,,
Polymath 3,1,"The polynomial Hirsch conjecture, a proposal ...",http://gilkalai.wordpress.com/2009/07/28/polym...,Gilkalai,True,2,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...,,
Polymath 3,2,The polynomial Hirsch conjecture - how to imp...,http://gilkalai.wordpress.com/2009/07/30/the-p...,Gilkalai,True,11,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f3...,,
Polymath 3,3,The Polynomial Hirsch Conjecture: Discussion ...,http://gilkalai.wordpress.com/2009/08/09/the-p...,Gilkalai,False,109,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,,,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10fd...
Polymath 3,4,The Polynomial Hirsch Conjecture: Discussion ...,http://gilkalai.wordpress.com/2009/10/06/the-p...,Gilkalai,False,13,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,,,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...
Polymath 3,5,"Plans for polymath3 (Dec 8, 2009) Inactive.",http://gilkalai.wordpress.com/2009/12/08/plans...,Gilkalai,True,2,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...,,
Polymath 3,6,The Polynomial Hirsch Conjecture: The Crux of...,http://gilkalai.wordpress.com/2010/06/19/the-p...,Gilkalai,True,3,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f3...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f3...,,
Polymath 3,7,"Polynomial Hirsch Conjecture (Sep 29, 2010) ...",http://gilkalai.wordpress.com/2010/09/29/polym...,Gilkalai,True,112,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10e2...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...,,
Polymath 3,8,"The Polynomial Hirsch Conjecture 2 (Oct 3, 20...",http://gilkalai.wordpress.com/2010/10/03/polym...,Gilkalai,True,103,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f3...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...,,
Polymath 3,9,Polymath3 : Polynomial Hirsch Conjecture 3 (O...,http://gilkalai.wordpress.com/2010/10/10/polym...,Gilkalai,True,98,<comment_thread.CommentThreadGilkalai object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f3...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x10f1...,,


In [4]:
PM4_FRAME = process_polymath("pm4", split=True)
PM4_FRAME

loading data


Unnamed: 0_level_0,Unnamed: 1_level_0,title,url,blog,research,number of comments,thread,mthread (single),mthread (accumulated),network,r_mthread (accumulated),r_network,d_mthread (accumulated),d_network
Project,Ord,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Polymath 4,0,Proposal for the project and initial research...,http://polymathprojects.org/2009/07/27/proposa...,Polymathprojects,True,130,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x112c...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113e...,,
Polymath 4,1,Research thread II Opened Aug 9. Inactive.,http://polymathprojects.org/2009/08/09/researc...,Polymathprojects,True,143,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x112c...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x112c...,,
Polymath 4,2,Research thread III Opened Aug 13. Inactive.,http://polymathprojects.org/2009/08/13/researc...,Polymathprojects,True,114,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x112c...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113e...,,
Polymath 4,3,Research thread IV Opened Aug 28. Inactive.,http://polymathprojects.org/2009/08/28/researc...,Polymathprojects,True,64,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x112c...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113e...,,
Polymath 4,4,Research thread V Opened Oct 27. Active.,http://polymathprojects.org/2009/10/27/researc...,Polymathprojects,True,40,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113a...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113a...,,
Polymath 4,5,Discussion thread I Opened July 28. Inactive.,http://polymathprojects.org/2009/07/28/determi...,Polymathprojects,False,52,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x112c...,,,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113e...
Polymath 4,6,Discussion thread II Opened Jun 29. Active.,http://polymathprojects.org/2010/06/29/draft-v...,Polymathprojects,False,31,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113a...,,,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x113e...


In [5]:
PM5_FRAME = process_polymath("pm5", split=True)
PM5_FRAME

loading data


Unnamed: 0_level_0,Unnamed: 1_level_0,title,url,blog,research,number of comments,thread,mthread (single),mthread (accumulated),network
Project,Ord,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Polymath 5,0,Zeroth Post (Dec 17 2009 - Jan 6 2010).,http://gowers.wordpress.com/2009/12/17/erdoss-...,Gowers,True,148,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,1,First Post (Jan 6 - Jan 12).,http://gowers.wordpress.com/2010/01/06/erdss-d...,Gowers,True,125,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,2,Second Post (Jan 9 - Jan 11).,http://gowers.wordpress.com/2010/01/09/erds-di...,Gowers,True,100,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,3,Third Post (Jan 11 - Jan 14).,http://gowers.wordpress.com/2010/01/11/the-erd...,Gowers,True,103,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,4,Fourth Post (Jan 14 - 16).,http://gowers.wordpress.com/2010/01/14/the-erd...,Gowers,True,103,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,5,Fifth Post (Jan 16-19).,http://gowers.wordpress.com/2010/01/16/the-erd...,Gowers,True,101,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,6,First Theoretical Post (Jan 19-21),http://gowers.wordpress.com/2010/01/19/edp1-th...,Gowers,True,98,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,7,Second Theoretical Post (Jan 21-26),http://gowers.wordpress.com/2010/01/21/edp2-a-...,Gowers,True,103,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,8,Third Theoretical Post (Jan 26 -?),http://gowers.wordpress.com/2010/01/26/edp3-a-...,Gowers,True,103,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...
Polymath 5,9,Fourth Theoretical Post (Jan 30- Feb 2),http://gowers.wordpress.com/2010/01/30/edp4-fo...,Gowers,True,109,<comment_thread.CommentThreadGowers object at ...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1279...


In [6]:
PM6_FRAME = process_polymath("pm6", split=True)
PM6_FRAME

loading data


Unnamed: 0_level_0,Unnamed: 1_level_0,title,url,blog,research,number of comments,thread,mthread (single),mthread (accumulated),network
Project,Ord,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Polymath 6,0,Blog post by Tom Sanders about the ideas and d...,http://polymathprojects.org/2011/02/05/polymat...,Polymathprojects,True,16,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x129e...


In [7]:
PM7_FRAME = process_polymath("pm7", split=True)
PM7_FRAME

loading data


Unnamed: 0_level_0,Unnamed: 1_level_0,title,url,blog,research,number of comments,thread,mthread (single),mthread (accumulated),network
Project,Ord,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Polymath 7,0,"First research thread (June 3, 2012) Inactive",http://polymathprojects.org/2012/06/03/polymat...,Polymathprojects,True,102,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1376...
Polymath 7,1,"Current discussion thread (June 9, 2012) Active",http://polymathprojects.org/2012/06/09/polymat...,Polymathprojects,True,40,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1376...
Polymath 7,2,"Research thread 1 (June 12, 2012) Inactive",http://polymathprojects.org/2012/06/12/polymat...,Polymathprojects,True,89,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x129e...
Polymath 7,3,"Research thread 2 (June 15, 2012) Inactive",http://polymathprojects.org/2012/06/15/polymat...,Polymathprojects,True,95,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x129c...
Polymath 7,4,"Research thread 3 (June 24, 2012) Inactive",http://polymathprojects.org/2012/06/24/polymat...,Polymathprojects,True,97,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x1376...
Polymath 7,5,"Research thread 4 (September 10, 2012) Inactive",http://polymathprojects.org/2012/09/10/polymat...,Polymathprojects,True,102,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x129c...
Polymath 7,6,"Research thread 5 (August 9, 2013)",http://polymathprojects.org/2013/08/09/polymat...,Polymathprojects,True,5,<comment_thread.CommentThreadPolymath object a...,<comment_thread.MultiCommentThread object at 0...,<comment_thread.MultiCommentThread object at 0...,<author_network.AuthorNetwork object at 0x13a8...


In [9]:
PM8_FRAME = process_polymath("pm8", split=True)
PM8_FRAME

loading data


KeyError: 'Sbseminar'

In [None]:
PM3_FRAME['network'].iloc[-1].draw_graph(title="Author network for {}".format("Polymath 3"))

In [None]:
PM3_FRAME['network'].iloc[-1].plot_author_activity_pie(project="Polymath 3")
PM3_FRAME['network'].iloc[-1].plot_degree_centrality(project="Polymath 3")

In [None]:
PM3_FRAME['mthread (accumulated)'].iloc[-1].plot_activity('thread', intervals=1, max_span=timedelta(500),
                                                         project="Polymath 3")
PM3_FRAME['mthread (accumulated)'].iloc[-1].plot_growth(drop_last=2,
                                                         project="Polymath 3")

In [None]:
PM4_FRAME['network'].iloc[-1].draw_graph(title="Author network for {}".format("Polymath 4"))

In [None]:
PM4_FRAME['network'].iloc[-1].plot_author_activity_pie(project="Polymath 4")
PM4_FRAME['network'].iloc[-1].plot_degree_centrality(project="Polymath 4")

In [None]:
PM4_FRAME['mthread (accumulated)'].iloc[-1].plot_activity('thread', intervals=1, max_span=timedelta(800),
                                                         project="Polymath 4")
PM4_FRAME['mthread (accumulated)'].iloc[-1].plot_growth(drop_last=15,
                                                         project="Polymath 4")

In [None]:
PM5_FRAME['network'].iloc[-1].draw_graph(title="Author network for {}".format("Polymath 5"))

In [None]:
PM5_FRAME['network'].iloc[-1].plot_author_activity_pie(project="Polymath 5")
PM5_FRAME['network'].iloc[-1].plot_degree_centrality(project="Polymath 5")

In [None]:
PM5_FRAME['mthread (accumulated)'].iloc[-1].plot_activity('thread', intervals=1, max_span=timedelta(1500),
                                                         project="Polymath 5")
PM5_FRAME['mthread (accumulated)'].iloc[-1].plot_growth(drop_last=0,
                                                         project="Polymath 5")

In [None]:
PM7_FRAME['network'].iloc[-1].draw_graph(title="Author network for {}".format("Polymath 7"))

In [None]:
PM7_FRAME['network'].iloc[-1].plot_author_activity_pie(project="Polymath 7")
PM7_FRAME['network'].iloc[-1].plot_degree_centrality(project="Polymath 7")

In [None]:
PM7_FRAME['mthread (accumulated)'].iloc[-1].plot_activity('thread', intervals=1, max_span=timedelta(1500),
                                                         project="Polymath 7")
PM7_FRAME['mthread (accumulated)'].iloc[-1].plot_growth(drop_last=0,
                                                         project="Polymath 7")