# Analyzing Hacker News Posts
### This analysis seeks to clarify the information about posts made to Hacker News. Specifically, do posts asking the HN community questions (starting with 'Ask HN') recieve more interactions that posts showing the community (starting with 'Show HN') and what time of day are the most popular posts shared. The data set is a random sample of 20,000 Hacker News posts that have received any interaction.

In [3]:
from csv import reader
opened_file = open('hacker_news.csv')
read_file = reader(opened_file)
hn = list(read_file)
list(hn[:5])

[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'],
 ['12224879',
  'Interactive Dynamic Video',
  'http://www.interactivedynamicvideo.com/',
  '386',
  '52',
  'ne0phyte',
  '8/4/2016 11:52'],
 ['10975351',
  'How to Use Open Source and Shut the Fuck Up at the Same Time',
  'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/',
  '39',
  '10',
  'josep2',
  '1/26/2016 19:30'],
 ['11964716',
  "Florida DJs May Face Felony for April Fools' Water Joke",
  'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/',
  '2',
  '1',
  'vezycash',
  '6/23/2016 22:20'],
 ['11919867',
  'Technology ventures: From Idea to Enterprise',
  'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429',
  '3',
  '1',
  'hswarna',
  '6/17/2016 0:01']]

In [4]:
headers = hn[0]
hn = hn[1:]
print(headers)
list(hn[:5])

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']


[['12224879',
  'Interactive Dynamic Video',
  'http://www.interactivedynamicvideo.com/',
  '386',
  '52',
  'ne0phyte',
  '8/4/2016 11:52'],
 ['10975351',
  'How to Use Open Source and Shut the Fuck Up at the Same Time',
  'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/',
  '39',
  '10',
  'josep2',
  '1/26/2016 19:30'],
 ['11964716',
  "Florida DJs May Face Felony for April Fools' Water Joke",
  'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/',
  '2',
  '1',
  'vezycash',
  '6/23/2016 22:20'],
 ['11919867',
  'Technology ventures: From Idea to Enterprise',
  'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429',
  '3',
  '1',
  'hswarna',
  '6/17/2016 0:01'],
 ['10301696',
  'Note by Note: The Making of Steinway L1037 (2007)',
  'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0',
  '8',
  '2',
  'walterbell',
  '9/30/2015 4:12']]

In [5]:
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
    title = row[1].lower()
    if title.startswith('ask hn'):
        ask_posts.append(row)
    elif title.startswith('show hn'):
        show_posts.append(row)
    else:
        other_posts.append(row)
        
print(len(ask_posts), len(show_posts), len(other_posts))

1744 1162 17194


In [6]:
print(headers)

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']


In [10]:
total_ask_comments = 0
total_show_comments = 0

for post in ask_posts:
    total_ask_comments += int(post[4])
    
for post in show_posts:
    total_show_comments += int(post[4])

avg_ask_comments = total_ask_comments / len(ask_posts)
avg_show_comments = total_show_comments / len(show_posts)
print('%.2f' %avg_ask_comments)
print('%.2f' %avg_show_comments)

14.04
10.32


### We can see that, on average, Hacker News posts asking the community a question recieve around 40% more comments than those showing the community something. Overall the averages for each are 14 and 10 comments, respectively

In [23]:
import datetime as dt
result_list = []
for row in ask_posts:
    result_list.append([row[6], row[4]])
    
counts_by_hour = {}
comments_by_hour = {}

for row in result_list:
    date_dt = dt.datetime.strptime(row[0], '%m/%d/%Y %H:%M')
    hour = date_dt.strftime('%H')
    if hour not in counts_by_hour:
        counts_by_hour[hour] = 1
        comments_by_hour[hour] = int(row[1])
    else:
        counts_by_hour[hour] += 1
        comments_by_hour[hour] += int(row[1])
        
print(counts_by_hour)
print(comments_by_hour)
        

{'15': 116, '22': 71, '05': 46, '09': 45, '02': 58, '06': 44, '23': 68, '10': 59, '01': 60, '21': 109, '14': 107, '18': 109, '00': 55, '16': 108, '13': 85, '19': 110, '03': 54, '17': 100, '11': 58, '07': 34, '12': 73, '20': 80, '08': 48, '04': 47}
{'15': 4477, '22': 479, '05': 464, '09': 251, '02': 1381, '06': 397, '23': 543, '10': 793, '01': 683, '21': 1745, '14': 1416, '18': 1439, '00': 447, '16': 1814, '13': 1253, '19': 1188, '03': 421, '17': 1146, '11': 641, '07': 267, '12': 687, '20': 1722, '08': 492, '04': 337}


In [24]:
avg_by_hour = []
for hour in counts_by_hour:
    avg_by_hour.append([hour, comments_by_hour[hour] / counts_by_hour[hour]])
    
print(avg_by_hour)


[['15', 38.5948275862069], ['22', 6.746478873239437], ['05', 10.08695652173913], ['09', 5.5777777777777775], ['02', 23.810344827586206], ['06', 9.022727272727273], ['23', 7.985294117647059], ['10', 13.440677966101696], ['01', 11.383333333333333], ['21', 16.009174311926607], ['14', 13.233644859813085], ['18', 13.20183486238532], ['00', 8.127272727272727], ['16', 16.796296296296298], ['13', 14.741176470588234], ['19', 10.8], ['03', 7.796296296296297], ['17', 11.46], ['11', 11.051724137931034], ['07', 7.852941176470588], ['12', 9.41095890410959], ['20', 21.525], ['08', 10.25], ['04', 7.170212765957447]]


In [29]:
swap_avg_by_hour = []
for row in avg_by_hour:
    swap_avg_by_hour.append([row[1], row[0]])

print(swap_avg_by_hour)
sorted_swap = sorted(swap_avg_by_hour, reverse=True)

print('Top 5 Hours for Ask Posts Comments')
for row in sorted_swap[:5]:
    time_dt = dt.datetime.strptime(row[1], '%H')
    time_dt = time_dt.strftime('%H:%M')
    print('{hour}: {avg:.2f} average comments per post'.format(hour=time_dt, avg=row[0]))
    

[[38.5948275862069, '15'], [6.746478873239437, '22'], [10.08695652173913, '05'], [5.5777777777777775, '09'], [23.810344827586206, '02'], [9.022727272727273, '06'], [7.985294117647059, '23'], [13.440677966101696, '10'], [11.383333333333333, '01'], [16.009174311926607, '21'], [13.233644859813085, '14'], [13.20183486238532, '18'], [8.127272727272727, '00'], [16.796296296296298, '16'], [14.741176470588234, '13'], [10.8, '19'], [7.796296296296297, '03'], [11.46, '17'], [11.051724137931034, '11'], [7.852941176470588, '07'], [9.41095890410959, '12'], [21.525, '20'], [10.25, '08'], [7.170212765957447, '04']]
Top 5 Hours for Ask Posts Comments
15:00: 38.59 average comments per post
02:00: 23.81 average comments per post
20:00: 21.52 average comments per post
16:00: 16.80 average comments per post
21:00: 16.01 average comments per post
