# HACKER NEWS - Ask HN & Show HN

We retrieved 20,000 rows of Hacker-News posts; an extremely popular user-submitted posting board revolving around technology and startups.
We are specifically interested in posts with titles of "Ask HN" or "Show HN". 
The former is about users asking Hacker-News community specific questions.
The latter is about users showing something interesting to the community.
We will compare if "Ask HN" or "Show HN" receives more comments.
And do posts created in certain time receive more comments on average.

In [1]:
from csv import reader
hn = list(reader(open('hacker_news.csv')))
print(hn[0:5])

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]


In [2]:
headers = hn[0] #separate header
hn = hn[1:] #remove header
print(headers)
print(hn[0:2])

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
[['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30']]


In [3]:
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
    title = row[1]
    if title.lower().startswith("ask hn"):
        ask_posts.append(row)
    if title.lower().startswith("show hn"):
        show_posts.append(row)
    else:
        other_posts.append(row)
        
print('ask hn:',len(ask_posts))
print(ask_posts[0:2])
print('show hn:',len(show_posts))
print(show_posts[0:2])
print('others:',len(other_posts))

ask hn: 1744
[['12296411', 'Ask HN: How to improve my personal website?', '', '2', '6', 'ahmedbaracat', '8/16/2016 9:55'], ['10610020', 'Ask HN: Am I the only one outraged by Twitter shutting down share counts?', '', '28', '29', 'tkfx', '11/22/2015 13:43']]
show hn: 1162
[['10627194', 'Show HN: Wio Link  ESP8266 Based Web of Things Hardware Development Platform', 'https://iot.seeed.cc', '26', '22', 'kfihihc', '11/25/2015 14:03'], ['10646440', 'Show HN: Something pointless I made', 'http://dn.ht/picklecat/', '747', '102', 'dhotson', '11/29/2015 22:46']]
others: 18938


In [4]:
total_ask_comments = 0
for row in ask_posts:
    total_ask_comments += int(row[4])
avg_ask_comments = total_ask_comments / len(ask_posts)
print(avg_ask_comments)

total_show_comments = 0
for row in show_posts:
    total_show_comments += int(row[4])
avg_show_comments = total_show_comments / len(show_posts)
print(avg_show_comments)

print('"Ask HN" has more comments')

14.038417431192661
10.31669535283993
"Ask HN" has more comments


In [5]:
import datetime as dt
result_list = []
for row in ask_posts:
    result_list.append([row[6],int(row[4])])

counts_by_hour = {}
comments_by_hour = {}
for row in result_list:
    parse = dt.datetime.strptime(row[0], '%m/%d/%Y %H:%M')
    form = parse.strftime('%H')
    if form in counts_by_hour:
        counts_by_hour[form] += 1
        comments_by_hour[form] += row[1]
    else:
        counts_by_hour[form] = 1
        comments_by_hour[form] = row[1]
    

In [11]:
avg_by_hour = []
for row_cou in counts_by_hour:
    avg = float(comments_by_hour[row_cou] / counts_by_hour[row_cou],)
    avg_by_hour.append([row_cou, avg])
sorted(avg_by_hour)

[['00', 8.127272727272727],
 ['01', 11.383333333333333],
 ['02', 23.810344827586206],
 ['03', 7.796296296296297],
 ['04', 7.170212765957447],
 ['05', 10.08695652173913],
 ['06', 9.022727272727273],
 ['07', 7.852941176470588],
 ['08', 10.25],
 ['09', 5.5777777777777775],
 ['10', 13.440677966101696],
 ['11', 11.051724137931034],
 ['12', 9.41095890410959],
 ['13', 14.741176470588234],
 ['14', 13.233644859813085],
 ['15', 38.5948275862069],
 ['16', 16.796296296296298],
 ['17', 11.46],
 ['18', 13.20183486238532],
 ['19', 10.8],
 ['20', 21.525],
 ['21', 16.009174311926607],
 ['22', 6.746478873239437],
 ['23', 7.985294117647059]]

In [24]:
swap_avg_by_hour = []
for row in avg_by_hour:
    swap_avg_by_hour.append([row[1],row[0]])
sorted_swap = sorted(swap_avg_by_hour, reverse = True)
print("Top 5 Hours for Ask Posts Comments")
for row in sorted_swap[0:5]:
    hour = dt.datetime.strptime(row[1],"%H").strftime("%H:%M")
    avgcomm = "{:.2f}".format(row[0])
    form = "{h} est: {av} average comments per post".format(h=hour, av=avgcomm)
    print(form)
    
    

Top 5 Hours for Ask Posts Comments
15:00 est: 38.59 average comments per post
02:00 est: 23.81 average comments per post
20:00 est: 21.52 average comments per post
16:00 est: 16.80 average comments per post
21:00 est: 16.01 average comments per post
