In [1]:
import calculate
import requests
from multiprocessing import Process, Manager
from decimal import Decimal

In [2]:
def get_story(story_id, stories):
    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id
    resp = requests.get(url)
    story_data = resp.json()
    user_data = get_user(story_data.get('by'))
    story_data['user_karma'] = user_data.get('karma') or 0
    stories.append(story_data)
    return stories

In [3]:
def get_user(user_id):
    url = 'https://hacker-news.firebaseio.com/v0/user/%s.json' % user_id
    resp = requests.get(url)
    return resp.json()

In [4]:
def get_top_stories_with_user_karma():
    manager = Manager()
    stories = manager.list()
    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
    ids = requests.get(url)
    processes = [Process(target=get_story, args=(sid, stories))
                 for sid in ids.json()[:40]]
    for p in processes:
        p.start()
    for p in processes:
        p.join()
    return stories

In [5]:
def calculate_summary_karma():
    stories = get_top_stories_with_user_karma()
    return calculate.summary_stats([
        Decimal(s.get('score')) for s in stories])

In [6]:
def pearsons_karma():
    stories = get_top_stories_with_user_karma()
    user_karma = [Decimal(s.get('user_karma')) for s in stories]
    story_karma = [Decimal(s.get('score')) for s in stories]
    return calculate.pearson(user_karma, story_karma)

In [7]:
pearsons_karma()

-0.005659751973886795

In [8]:
calculate_summary_karma()

| Statistic             | Value         |
-----------------------------------------
| n                     | 40            |
| mean                  | 110.3         |
| median                | 32.0          |
| mode                  | None          |
| maximum               | 934           |
| minimum               | 1             |
| range                 | 933.0         |
| standard deviation    | 211.448244258 |
| variation coefficient | 1.91702850642 |

