In [4]:
import pickle
import gzip 
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
import plotly.io as pio
import numpy as np

comments_analysis = pickle.load(gzip.open("../Results/comments_analysis.pickle.gz","rb"))
submissions_analysis = pickle.load(gzip.open("../Results/submissions_analysis.pickle.gz","rb"))

In [58]:
print("Subreddit","&","median","&","mean","&","std", "\\\\")
for subreddit in ['politics','SandersForPresident','The_Donald','Conservative']:
    print(subreddit,"&",
          submissions_analysis[subreddit]['votes_received']['per_user']['median'],"&",
          submissions_analysis[subreddit]['votes_received']['per_user']['mean'],"&",
          submissions_analysis[subreddit]['votes_received']['per_user']['std'],"\\\\"
         )

Subreddit & median & mean & std \\
politics & 4.0 & 2544.31 & 33872.34 \\
SandersForPresident & 10.0 & 596.18 & 7591.17 \\
The_Donald & 25.0 & 3332.15 & 39356.37 \\
Conservative & 2.0 & 351.75 & 9739.48 \\


In [59]:
for subreddit in ['politics','The_Donald','SandersForPresident','Conservative']:
    
    print(subreddit,"&",
          submissions_analysis[subreddit]['counts']['deleted_posts'],
          round(submissions_analysis[subreddit]['counts']['deleted_posts'] /
          (submissions_analysis[subreddit]['counts']['deleted_posts'] + submissions_analysis[subreddit]['counts']['posts']),2),
          "&",
          comments_analysis[subreddit]['counts']['deleted_comments'],
          round(comments_analysis[subreddit]['counts']['deleted_comments'] /
          (comments_analysis[subreddit]['counts']['deleted_comments'] + submissions_analysis[subreddit]['counts']['comments']),2)
         )

politics & 289141 0.26 & 5522276 0.11
The_Donald & 786541 0.22 & 3586808 0.12
SandersForPresident & 40418 0.2 & 480172 0.13
Conservative & 20327 0.15 & 237281 0.15


In [7]:
def plot_sample(vals,dict_key,title,x_label,y_label,sample_size=100000):
    
    data = []
    
    colors = {
        'The_Donald': '(139,0,0)',
        'Conservative': '(255,0,0)',
        'politics': '(0,191,255)',
        'SandersForPresident': '(0,0,255)',
    }
    
    
    subreddits = list(colors.keys())
    for subreddit in subreddits:
        if len(vals[subreddit]['values'][dict_key]) < sample_size:
            subreddits.remove(subreddit)
    
    
    for subreddit in subreddits:
       
        X = []
        Y = []
        
        sample = np.random.choice(vals[subreddit]['values'][dict_key], sample_size)
        for key, vote in enumerate(sorted(sample,reverse=True)):
            X.append(np.log(key+1))
            if vote <= 0:
                Y.append(0)
            else:
                Y.append(np.log(vote))

        trace = go.Scatter(
            x = X,
            y = Y,
            mode = 'lines',
            name = subreddit,
            line = {
                'color' : ('rgb' + colors[subreddit]),
            }
        )

        data.append(trace)
        
    layout = go.Layout(
            xaxis = dict(
                title = x_label,
                titlefont=dict(
                    family='Courier New, monospace',
                    size=22,
                    color='black'
                )
            ),
            yaxis = dict(
                title = y_label,
                titlefont = dict(
                    family='Courier New, monospace',
                    size=22,
                    color='black'
                )
            )
        )
    
    fig = go.Figure(data=data, layout=layout)
    
    display(py.iplot(fig, filename='line-mode'))

In [8]:
plot_sample(submissions_analysis,'votes','Votes Received Per Submission','log(rank)','log(# votes)')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~soli__/0 or inside your plot.ly account where it is named 'line-mode'


In [9]:
plot_sample(submissions_analysis,'comments','Comments Received Per Submission','log(rank)','log(# comments)')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~soli__/0 or inside your plot.ly account where it is named 'line-mode'


In [14]:
plot_sample(submissions_analysis,'sum_votes','Votes Received Per User (Submissions)','log(rank)','log(# votes)',sample_size=15000)

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~soli__/0 or inside your plot.ly account where it is named 'line-mode'


In [15]:
plot_sample(submissions_analysis,'sum_comments','Comments Received Per User (Submissions)','log(rank)','log(# comments)',sample_size=15000)

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~soli__/0 or inside your plot.ly account where it is named 'line-mode'


In [19]:
plot_sample(comments_analysis,'sum_votes','Votes Received Per User (Comments)','log(rank)','log(# votes)',sample_size=50000)

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~soli__/0 or inside your plot.ly account where it is named 'line-mode'


In [17]:
plot_sample(comments_analysis,'votes','Votes Received Per Comments','log(rank)','log(# votes)')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~soli__/0 or inside your plot.ly account where it is named 'line-mode'
