In [1]:
import pandas as pd
import praw
from praw.models import MoreComments
import json 
import numpy as np 

from config import *

import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/kayhan/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
reddit = praw.Reddit(
    client_id=my_client_id,
    client_secret=my_client_secret,
    user_agent=my_user_agent,
    username=my_username,
    password=my_password
 )


In [3]:
print(reddit.read_only)


False


In [4]:
import requests 
from bs4 import BeautifulSoup

In [5]:
with open('chapter_ids.txt') as f:
    chapter_list = f.readlines()

In [6]:
def find_chapter_id(text):
    text_split = text.split(' ')
    chapter = [x for x in text.split('>') if 'Chapter' in x]
    chapter = [x.strip('</a') for x in chapter]
    chapter = [x for x in chapter][0]
    text_split = [x for x in text_split if 'href=' in x][0]
    text_split = text_split.replace('href=','').replace('"','')
    text_split = text_split.replace('/','')

    return chapter,text_split

chapter_list_codes = {}
for x in chapter_list:
    chapter, code = find_chapter_id(x) 
    chapter_list_codes[code] = chapter

In [7]:
len(chapter_list_codes)

500

In [8]:
submission_ids = np.unique(list(chapter_list_codes.keys()))


In [9]:
test_id = submission_ids[0]
print(test_id)
test_submission = reddit.submission(test_id)

104l4a


In [10]:
def getSubmissionStats(submission_id):
    episode_info_json = {
        'submission_id': None,
        'title': None,
        'chapter': None,
        'created_utc': None,
        'total_upvotes': None,
        'num_of_top_comments': None,
        'upvote_ratio': None,
        'no_of_comments': None,
        'comment_sentiments': None
    }
    
    submission = reddit.submission(id=submission_id)
    
    episode_info_json['submission_id'] = submission.id
    episode_info_json['title'] = submission.title
    episode_info_json['chapter'] = chapter_list_codes[submission_id]

    episode_info_json['created_utc'] = submission.created_utc
    
    episode_info_json['total_upvotes'] = submission.score
    episode_info_json['upvote_ratio'] = submission.upvote_ratio
    episode_info_json['no_of_comments'] = submission.num_comments
    
    comment_list = []
    
    for comment in submission.comments:
        comment_json = {
            'comment': None,
            'upvotes':None,
            'sentiment':None
            }
        if isinstance(comment, MoreComments):
            continue
        comment_json['comment'] = comment.body
        comment_json['upvotes'] = comment.score
        
        sentiment = SentimentIntensityAnalyzer()
        comment_json['sentiment'] = sentiment.polarity_scores(comment.body)

        comment_list.append(comment_json)
    
    episode_info_json['comment_sentiments'] = comment_list
    episode_info_json['num_of_top_comments'] = len(comment_list)
     
    return episode_info_json



In [11]:
test_json_list = [getSubmissionStats(test_id)]

In [12]:
test_json_list[0]

{'submission_id': '104l4a',
 'title': 'One Piece Chapter 682',
 'chapter': 'Chapter 682',
 'created_utc': 1348041079.0,
 'total_upvotes': 77,
 'num_of_top_comments': 51,
 'upvote_ratio': 0.95,
 'no_of_comments': 215,
 'comment_sentiments': [{'comment': "The parts with DoFlamingo were hard to make out. So he has a bunch of people working with him (some might be okamas I think, the way they referred to Law) and one of his harem tries to revolt and attack him? And while this is going on and he is casually controlling and killing her he tells Vergo to kill Law, blow up the SAD stuff, but to beware of Luffy?\n\nI'm getting the feeling that DoFlamingo might be the strongest Warlord. Or at least the most influential.",
   'upvotes': 40,
   'sentiment': {'neg': 0.111,
    'neu': 0.774,
    'pos': 0.115,
    'compound': 0.3363}},
  {'comment': 'On a somewhat unrelated note, this week really shows how much more content OP has in an average chapter, when compared to, say, Bleach.',
   'upvotes': 

In [13]:
def convertToCSV(my_json_list):
    df = pd.DataFrame(my_json_list)
    comment_sentiments_list = list(df['comment_sentiments'])
    submission_id_list = list(df['submission_id'])

    id_col = []
    comment_col = []
    comment_upvotes_col = []
    comment_negative = []
    comment_neu = []
    comment_pos = []
    sentiment_compound = []

    df_1 = df[['submission_id','title','chapter','total_upvotes',
               'upvote_ratio','no_of_comments','created_utc']].copy()

    temp_df = pd.DataFrame()

    for i in range(len(submission_id_list)):

        for j in comment_sentiments_list[i]:

            id_col.append(submission_id_list[i])
            comment_col.append(j['comment'])
            comment_upvotes_col.append(j['upvotes'])

            comment_negative.append(j['sentiment']['neg'])
            comment_neu.append(j['sentiment']['neu'])
            comment_pos.append(j['sentiment']['pos'])

            sentiment_compound.append(j['sentiment']['compound'])

    temp_df['submission_id'] = id_col
    temp_df['comment'] = comment_col
    temp_df['comment_upvotes'] = comment_upvotes_col
    temp_df['negative_percent'] = comment_negative
    temp_df['neutral_percent'] = comment_neu
    temp_df['positive_percent'] = comment_pos
    temp_df['sentiment_score'] = sentiment_compound

    final_df = pd.merge(df_1,temp_df)

    return final_df

In [14]:
test_csv = convertToCSV(test_json_list)
test_csv.head()

Unnamed: 0,submission_id,title,chapter,total_upvotes,upvote_ratio,no_of_comments,created_utc,comment,comment_upvotes,negative_percent,neutral_percent,positive_percent,sentiment_score
0,104l4a,One Piece Chapter 682,Chapter 682,77,0.95,215,1348041000.0,The parts with DoFlamingo were hard to make ou...,40,0.111,0.774,0.115,0.3363
1,104l4a,One Piece Chapter 682,Chapter 682,77,0.95,215,1348041000.0,"On a somewhat unrelated note, this week really...",30,0.0,1.0,0.0,0.0
2,104l4a,One Piece Chapter 682,Chapter 682,77,0.95,215,1348041000.0,Doflamingo is living Sanji's dream with that h...,29,0.173,0.701,0.126,-0.8127
3,104l4a,One Piece Chapter 682,Chapter 682,77,0.95,215,1348041000.0,So Momonosuke was turned into a dragon. Brook ...,50,0.089,0.712,0.199,0.8793
4,104l4a,One Piece Chapter 682,Chapter 682,77,0.95,215,1348041000.0,Gonna have to re read this one when mangastrea...,16,0.305,0.695,0.0,-0.7311


In [15]:
total_submission_stats = []
errors_id = []
for sub_id in submission_ids:
    try:
        print(sub_id)
        total_submission_stats.append(getSubmissionStats(sub_id))
    except:
        errors_id.append(sub_id)

104l4a
10jwhl
10nybw
10t6nh
10xm21
114oow
118rfy
11gog2
11mffc
11qq7n
11zud8
12bwjg
12lwqb
12se1y
13i7vm
13xcyt
14bc2h
14pxek
151ui6
15i9z5
16ohym
1742un
17k0f1
17xddr
18vr1n
19bhb2
19rqjk
1al1bs
1b3on5
1bkuzu
1c2hhf
1cizbj
1cxf9w
1dxa7p
1edgsn
1fpqrp
1g6r2x
1h3llm
1hk2he
1hx9tr
1ih0u9
1jemn2
1jvb30
1kso6p
1l90vt
1lpglw
1m3hox
1n3bjc
1nkzcf
1nyz41
1ok913
1pgf11
1q0mfq
1qj2or
1qto08
1qwvsd
1r07su
1r1pr7
1r5zvl
1r8ke7
1rcyrv
1rg0bw
1riyqx
1rkzew
1rnxxl
1rpzhx
1rvhem
1rwtrs
1s064n
1s0q5j
1s7ld0
1s9n20
1sfby1
1sl3u5
1syxp3
1t33dk
1th0d0
1to1lq
1typye
1ukpbh
1v2hcd
1v9nu8
1vu7ur
1wgem9
1x2chr
1ybuia
1yz4d3
1zm2hm
207wo8
21egd9
21zypp
22lazb
23qzjs
24xq69
25ispy
263qtw
26ofvr
28fzo8
291ih8
29mwh1
2arbzy
2bhckd
2c4gmx
2crxte
2e2a3o
2etq0e
2fg6qr
2fzvvt
2gr3i2
2i30vp
2ink46
2jet56
2k37aw
2lh13t
2m63xl
2mriwa
2nkgrj
2oyp4m
2po62y
2qfrkf
2sia5n
2t9v5e
2u2h24
2vn0um
2weubd
2x7rcg
2zk3xe
30co9y
316e6k
31zd3s
33kmhb
355dhm
35x9ew
36qaus
37kf6j
38hplb
3a9cvr
3b1ohy
3budrz
3cnlx7
3dd3rr
3f4iu0
3fyvw3

In [16]:
with open('one_piece_data.json','w')as f:
    json.dump(total_submission_stats,f)

In [17]:
csv = convertToCSV(total_submission_stats)
csv.to_csv('one_piece_data.csv',index=False)

In [18]:
len(csv)

56812

In [19]:
csv

Unnamed: 0,submission_id,title,chapter,total_upvotes,upvote_ratio,no_of_comments,created_utc,comment,comment_upvotes,negative_percent,neutral_percent,positive_percent,sentiment_score
0,104l4a,One Piece Chapter 682,Chapter 682,79,0.95,215,1.348041e+09,The parts with DoFlamingo were hard to make ou...,37,0.111,0.774,0.115,0.3363
1,104l4a,One Piece Chapter 682,Chapter 682,79,0.95,215,1.348041e+09,"On a somewhat unrelated note, this week really...",29,0.000,1.000,0.000,0.0000
2,104l4a,One Piece Chapter 682,Chapter 682,79,0.95,215,1.348041e+09,Doflamingo is living Sanji's dream with that h...,29,0.173,0.701,0.126,-0.8127
3,104l4a,One Piece Chapter 682,Chapter 682,79,0.95,215,1.348041e+09,So Momonosuke was turned into a dragon. Brook ...,49,0.089,0.712,0.199,0.8793
4,104l4a,One Piece Chapter 682,Chapter 682,79,0.95,215,1.348041e+09,Gonna have to re read this one when mangastrea...,18,0.305,0.695,0.000,-0.7311
...,...,...,...,...,...,...,...,...,...,...,...,...,...
56807,zpb5x,One Piece Chapter 681,Chapter 681,105,0.98,264,1.347361e+09,[Zipped (MangaStream)](https://www.cubby.com/p...,1,0.000,1.000,0.000,0.0000
56808,zpb5x,One Piece Chapter 681,Chapter 681,105,0.98,264,1.347361e+09,Im just wanting to know who he and Luffy decid...,1,0.099,0.901,0.000,-0.6939
56809,zpb5x,One Piece Chapter 681,Chapter 681,105,0.98,264,1.347361e+09,"Oda is pulling a Dan Brown. Short chapters, st...",0,0.210,0.790,0.000,-0.6785
56810,zpb5x,One Piece Chapter 681,Chapter 681,105,0.98,264,1.347361e+09,"This is just a wild guess, but what if there a...",-1,0.342,0.598,0.061,-0.9587
