# Hacker News Analysis
This is a notebook that analyses the content of hacker news to find out which type of posts receive more engagement, and what time(s) is/are best to make a post.

In [1]:
opened_file = open('hacker_news.csv')
from csv import reader
read_file = reader(opened_file)
hn = list(read_file)
print(hn[:5])

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]


In [2]:
headers = hn[0]
print(headers)

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']


In [3]:
hn = hn[1:]
print(hn[:5])

[['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01'], ['10301696', 'Note by Note: The Making of Steinway L1037 (2007)', 'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0', '8', '2', 'walterbell', '9/30/2015 4:12']]


In [4]:
ask_post = []
show_post = []
other_post = []

for row in hn:
    title = row[1]
    if title.lower().startswith('ask hn'):
        ask_post.append(row)
    elif title.lower().startswith('show hn'):
        show_post.append(row)
    else:
        other_post.append(row)
len(ask_post), len(show_post), len(other_post)

(1744, 1162, 17194)

In [7]:
### Average number of comment on 'ask hn' posts
total_ask_comments = 0
total_ask_posts = 0
for row in ask_post:
    n_comment = int(row[4])
    total_ask_comments += n_comment
    total_ask_posts += 1
avg_ask_comments = total_ask_comments/total_ask_posts
print(round(avg_ask_comments,2))

14.04


In [8]:
### Average number of comment on 'show hn' posts
total_show_comments = 0
total_show_posts = 0
for row in show_post:
    n_comment = int(row[4])
    total_show_comments += n_comment
    total_show_posts += 1
avg_show_comments = total_show_comments/total_show_posts
print(round(avg_show_comments,2))

10.32


In [9]:
### Average number of comment on other posts
total_other_comments = 0
total_other_posts = 0
for row in other_post:
    n_comment = int(row[4])
    total_other_comments += n_comment
    total_other_posts += 1
avg_other_comments = total_other_comments/total_other_posts
print(round(avg_other_comments,2))

26.87


On average, ask posts receive more comments than show posts. Since ask posts are more likely to receive comments, we'll focus our remaining analysis just on these posts. Next, we'll determine if ask posts created at a certain time are more likely to attract comments.

In [11]:
### The number of ask posts and comments by hour created ###
import datetime as dt
result_list = []
for post in ask_post:
    created_at = post[6]
    comment_count = int(post[4])
    result_list.append([created_at, comment_count])
    
counts_by_hour, comments_by_hour = {}, {}
for result in result_list:
    hour = dt.datetime.strptime(result[0], "%m/%d/%Y %H:%M").hour
    comment = result[1]
    if hour not in counts_by_hour:
        counts_by_hour[hour] = 1
        comments_by_hour[hour] = comment
    else:
        counts_by_hour[hour] += 1
        comments_by_hour[hour] += comment
print(counts_by_hour)
print(comments_by_hour)

{9: 45, 13: 85, 10: 59, 14: 107, 16: 108, 23: 68, 12: 73, 17: 100, 15: 116, 21: 109, 20: 80, 2: 58, 18: 109, 3: 54, 5: 46, 19: 110, 1: 60, 22: 71, 8: 48, 4: 47, 0: 55, 6: 44, 7: 34, 11: 58}
{9: 251, 13: 1253, 10: 793, 14: 1416, 16: 1814, 23: 543, 12: 687, 17: 1146, 15: 4477, 21: 1745, 20: 1722, 2: 1381, 18: 1439, 3: 421, 5: 464, 19: 1188, 1: 683, 22: 479, 8: 492, 4: 337, 0: 447, 6: 397, 7: 267, 11: 641}


In [14]:
### Calculating the Average Number of Comments for Ask HN Posts by Hour ###
avg_by_hour = []
for hour in counts_by_hour:
    avg_cmt_by_hour = comments_by_hour[hour]/counts_by_hour[hour]
    avg_by_hour.append([hour,avg_cmt_by_hour])
print(avg_by_hour)

[[9, 5.5777777777777775], [13, 14.741176470588234], [10, 13.440677966101696], [14, 13.233644859813085], [16, 16.796296296296298], [23, 7.985294117647059], [12, 9.41095890410959], [17, 11.46], [15, 38.5948275862069], [21, 16.009174311926607], [20, 21.525], [2, 23.810344827586206], [18, 13.20183486238532], [3, 7.796296296296297], [5, 10.08695652173913], [19, 10.8], [1, 11.383333333333333], [22, 6.746478873239437], [8, 10.25], [4, 7.170212765957447], [0, 8.127272727272727], [6, 9.022727272727273], [7, 7.852941176470588], [11, 11.051724137931034]]


In [15]:
### Sorting and Printing Values from a List of Lists ###
swap_avg_by_hour = []
for row in avg_by_hour:
    swap_avg_by_hour.append([row[-1], row[0]])
sorted_swap = sorted(swap_avg_by_hour, reverse=True)
sorted_swap
print("Top 5 Hours for Ask Posts Comments")
for row in sorted_swap[:5]:
    print("{hour}:00 {avg:.2f} average comments per post".format(
        hour=row[1],
        avg=row[0]))

Top 5 Hours for Ask Posts Comments
15:00 38.59 average comments per post
2:00 23.81 average comments per post
20:00 21.52 average comments per post
16:00 16.80 average comments per post
21:00 16.01 average comments per post
