In [2]:
# Settings for notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# Show Python version
import platform
platform.python_version()

'3.6.3'

In [3]:
import scrapy
from scrapy.crawler import CrawlerProcess

In [4]:
import json

class JsonWriterPipeline(object):

    def open_spider(self, spider):
        self.file = open('quoteresult.jl', 'w')

    def close_spider(self, spider):
        self.file.close()

    def process_item(self, item, spider):
        line = json.dumps(dict(item)) + "\n"
        self.file.write(line)
        return item

In [5]:
import logging

class AtariSpider(scrapy.Spider):
    name = "atari"
    start_urls = [
        'http://atariage.com/forums/'
    ]
    custom_settings = {
        'LOG_LEVEL': logging.WARNING,
        'FEED_FORMAT':'json',                                 # Used for pipeline 2
        'FEED_URI': 'atari.json'                        # Used for pipeline 2
    }
    
    def parse(self, response):
        
        subforum_title = 'NA'
        subforum_code = 'NA'
        
        for line in response.xpath('//td[@class = "col_c_forum"]/h4/a'):
            
            forum_title = line.xpath('./@title').extract_first()
            
            url_threads = line.xpath('./@href').extract_first()
            
            forum_code = url_threads[url_threads.find("forum/")+6:]
            
            yield scrapy.Request(url = url_threads, 
                                 callback = self.parse_sublevel, 
                                 meta = {'forum_code': forum_code, 
                                         'forum_title': forum_title, 
                                         'subforum_title': subforum_title,
                                         'subforum_code': subforum_code})
    
    def parse_sublevel(self, response):
        
        forum_title = response.meta['forum_title']
        forum_code = response.meta['forum_code']
        subforum_title = response.meta['subforum_title']
        subforum_code = response.meta['subforum_code']

        #threads
        
        for line in response.xpath('//tr[starts-with(@id, "trow")]'):
            
            url_thread = line.xpath('./td/h4/a/@href').extract_first()
            
            title_thread = line.xpath('./td/h4/a/@title').extract_first()
            
            code_thread = line.xpath('./td/h4/a/@id').extract_first()
            
            code_url = url_thread[url_thread.find("topic/")+6:]
            
            ##################################################
            
            meta_sublevel = {
            'forum_code': forum_code,
            'forum_title': forum_title,
            'subforum_title': subforum_title,
            'subforum_code': subforum_code, 
            'thread_code': code_thread,
            'thread_title': title_thread, 
            'thread_code_url': code_url
            }
                                  
            yield scrapy.Request(url = url_thread, 
                                 callback=self.parse_thread, meta= meta_sublevel)
            
        # FOLLOW PAGINATION //*[@id="content"]/div[2]/div[2]/ul/li[9]/a
        if response.xpath('//li[@class = "page active"]'):     
            print(forum_code + ' ' + subforum_title + ' ' +  response.xpath('//li[@class = "page active"]/text()').extract_first())
        
        if response.xpath('//li[@class = "next"]'):
            next_thread_page_url = response.xpath('//li[@class = "next"]/a/@href').extract_first()
            yield scrapy.Request(url = next_thread_page_url, 
                                 callback = self.parse_sublevel, 
                                 meta = response.meta)
        
        #subforums sólo en página 1
        if response.xpath('//li[@class = "page active"]'):    
            if response.xpath('//li[@class = "page active"]/text()').extract_first() == '1':

                if response.xpath('//td[@class="col_c_forum"]'):
                    
                    for line in response.xpath('//td[@class="col_c_forum"]/h4/a'):
                    
                        subforum_title = line.xpath('@title').extract_first()

                        url_threads = line.xpath('@href').extract_first()

                        subforum_code = url_threads[url_threads.find("forum/")+6:]

                        yield scrapy.Request(url = url_threads, 
                                             callback = self.parse_sublevel, 
                                             meta = {'forum_code': forum_code, 
                                                     'forum_title': forum_title,
                                                     'subforum_title': subforum_title,
                                                     'subforum_code': subforum_code})

        
    def parse_thread(self, response):
        
        forum_code = response.meta['forum_code']
        forum_title = response.meta['forum_title']
        subforum_title = response.meta['subforum_title']
        subforum_code = response.meta['subforum_code']
        thread_code = response.meta['thread_code']
        thread_title = response.meta['thread_title']
        thread_code_url = response.meta['thread_code_url']
        
        for line in response.xpath('//div[starts-with(@class, "post_block")]'):         
            
            yield {  
                'forum_code': forum_code,
                'forum_title': forum_title,
                'subforum_title': subforum_title,
                'subforum_code': subforum_code,  
                'thread_code': thread_code,
                'thread_title': thread_title, 
                'thread_code_url':thread_code_url,
                'post_code': line.xpath('./@id').extract_first(),
                'author': line.xpath('.//span[@itemprop="name"]/text()').extract_first(),
                'user_id': line.xpath('.//span[@itemprop="creator name"]/a/@hovercard-id').extract_first(),
                'group': line.xpath('.//span[@itemprop="creator name"]/a/@hovercard-ref').extract_first(),
                'reputation': line.xpath('.//p[@class="desc member_title"]/text()').extract_first(), 
                'user_link': line.xpath('.//span[@itemprop="creator name"]/a/@href').extract_first(),
                'post_order': line.xpath('normalize-space(./div/h3/span/a/text())').extract_first(),
                'post_date': line.xpath('./div/div[@class="post_body"]/p/abbr/@title').extract_first(),
                'post_text': line.xpath('normalize-space(.//div[@class="post entry-content "])').extract_first(),
                'quote_user': line.xpath('.//blockquote/@data-author').extract(),
                'quote_post': line.xpath('.//blockquote/@data-cid').extract()
                }
        
        # FOLLOW PAGINATION
        if response.xpath('//li[@class = "next"]'):
            
            #print(response.xpath('//li[@class = "page active"]/text()').extract_first())
            
            next_thread_page_url = response.xpath('//li[@class = "next"]/a/@href').extract_first()
            yield scrapy.Request(url = next_thread_page_url, 
                                 callback = self.parse_thread, 
                                 meta = response.meta)
        
                
        # USER CENTER PER DATOS USER


In [6]:
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

process = CrawlerProcess(get_project_settings())

process.crawl(AtariSpider)
process.start()

2018-08-11 19:59:21 [scrapy.utils.log] INFO: Scrapy 1.4.0 started (bot: scrapybot)
2018-08-11 19:59:21 [scrapy.utils.log] INFO: Overridden settings: {}


<Deferred at 0x122a2352c88>

10-web-site-comments/ NA 1
15-rarity-guide/ NA 1
30-hacks/ NA 1
11-programming/ NA 1
26-announcements/ NA 1
29-homebrew-discussion/ NA 1
72-user-groups/ NA 1
25-events/ NA 1
61-poll-of-the-week/ NA 1
27-contests/ NA 1
85-high-score-clubs/ NA 1
6-marketplace/ NA 1
70-member-blogs/ NA 1
16-atari-2600/ NA 1
112-show-us-your-collection/ NA 1
9-international/ NA 1
23-gaming-publications-and-websites/ NA 1
8-hardware/ NA 1
5-emulation/ NA 1
72-user-groups/ New England Classic Gaming (NECG) 1
70-member-blogs/ NA 2
112-show-us-your-collection/ NA 2
70-member-blogs/ NA 3
112-show-us-your-collection/ NA 3
70-member-blogs/ NA 4
112-show-us-your-collection/ NA 4
70-member-blogs/ NA 5
112-show-us-your-collection/ NA 5
70-member-blogs/ NA 6
112-show-us-your-collection/ NA 6
70-member-blogs/ NA 7
112-show-us-your-collection/ NA 7
70-member-blogs/ NA 8
112-show-us-your-collection/ NA 8
70-member-blogs/ NA 9
112-show-us-your-collection/ NA 9
70-member-blogs/ NA 10
112-show-us-your-collection/ NA 10
70-

16-atari-2600/ NA 52
16-atari-2600/ NA 53
16-atari-2600/ NA 54
16-atari-2600/ NA 55
16-atari-2600/ NA 56
16-atari-2600/ NA 57
16-atari-2600/ NA 58
16-atari-2600/ NA 59
16-atari-2600/ NA 60
16-atari-2600/ NA 61
16-atari-2600/ NA 62
16-atari-2600/ NA 63
16-atari-2600/ NA 64
16-atari-2600/ NA 65
16-atari-2600/ NA 66
16-atari-2600/ NA 67
16-atari-2600/ NA 68
16-atari-2600/ NA 69
16-atari-2600/ NA 70
16-atari-2600/ NA 71
16-atari-2600/ NA 72
16-atari-2600/ NA 73
16-atari-2600/ NA 74
16-atari-2600/ NA 75
16-atari-2600/ NA 76
16-atari-2600/ NA 77
16-atari-2600/ NA 78
16-atari-2600/ NA 79
16-atari-2600/ NA 80
16-atari-2600/ NA 81
16-atari-2600/ NA 82
16-atari-2600/ NA 83
16-atari-2600/ NA 84
16-atari-2600/ NA 85
16-atari-2600/ NA 86
16-atari-2600/ NA 87
16-atari-2600/ NA 88
16-atari-2600/ NA 89
16-atari-2600/ NA 90
16-atari-2600/ NA 91
16-atari-2600/ NA 92
16-atari-2600/ NA 93
16-atari-2600/ NA 94
16-atari-2600/ NA 95
16-atari-2600/ NA 96
16-atari-2600/ NA 97
16-atari-2600/ NA 98
16-atari-2600

16-atari-2600/ NA 427
16-atari-2600/ NA 428
16-atari-2600/ NA 429
16-atari-2600/ NA 430
16-atari-2600/ NA 431
16-atari-2600/ NA 432
16-atari-2600/ NA 433
16-atari-2600/ NA 434
16-atari-2600/ NA 435
16-atari-2600/ NA 436
16-atari-2600/ NA 437
16-atari-2600/ NA 438
16-atari-2600/ NA 439
16-atari-2600/ NA 440
16-atari-2600/ NA 441
16-atari-2600/ NA 442
16-atari-2600/ NA 443
16-atari-2600/ NA 444
16-atari-2600/ NA 445
16-atari-2600/ NA 446
16-atari-2600/ NA 447
16-atari-2600/ NA 448
16-atari-2600/ NA 449
16-atari-2600/ NA 450
16-atari-2600/ NA 451
16-atari-2600/ NA 452
16-atari-2600/ NA 453
16-atari-2600/ NA 454
16-atari-2600/ NA 455
16-atari-2600/ NA 456
16-atari-2600/ NA 457
16-atari-2600/ NA 458
16-atari-2600/ NA 459
16-atari-2600/ NA 460
16-atari-2600/ NA 461
16-atari-2600/ NA 462
16-atari-2600/ NA 463
16-atari-2600/ NA 464
16-atari-2600/ NA 465
16-atari-2600/ NA 466
16-atari-2600/ NA 467
16-atari-2600/ NA 468
16-atari-2600/ NA 469
16-atari-2600/ NA 470
16-atari-2600/ NA 471
16-atari-2

85-high-score-clubs/ Arcade/MAME High Score Club 6
85-high-score-clubs/ Arcade/MAME High Score Club 7
85-high-score-clubs/ Arcade/MAME High Score Club 8
85-high-score-clubs/ 8-bit High Score Club 2
85-high-score-clubs/ 8-bit High Score Club 3
85-high-score-clubs/ 8-bit High Score Club 4
85-high-score-clubs/ 8-bit High Score Club 5
85-high-score-clubs/ 8-bit High Score Club 6
85-high-score-clubs/ 8-bit High Score Club 7
85-high-score-clubs/ 8-bit High Score Club 8
85-high-score-clubs/ 8-bit High Score Club 9
85-high-score-clubs/ 8-bit High Score Club 10
85-high-score-clubs/ 8-bit High Score Club 11
85-high-score-clubs/ 8-bit High Score Club 12
85-high-score-clubs/ 8-bit High Score Club 13
85-high-score-clubs/ 8-bit High Score Club 14
85-high-score-clubs/ 8-bit High Score Club 15
85-high-score-clubs/ 8-bit High Score Club 16
85-high-score-clubs/ 8-bit High Score Club 17
85-high-score-clubs/ NES High Score Club 2
85-high-score-clubs/ NES High Score Club 3
85-high-score-clubs/ NES High Sco

6-marketplace/ Auction Central 210
6-marketplace/ Auction Central 211
6-marketplace/ Auction Central 212
6-marketplace/ Auction Central 213
6-marketplace/ Auction Central 214
6-marketplace/ Auction Central 215
6-marketplace/ Auction Central 216
6-marketplace/ Auction Central 217
6-marketplace/ Auction Central 218
6-marketplace/ Auction Central 219
6-marketplace/ Auction Central 220
6-marketplace/ Auction Central 221
6-marketplace/ Auction Central 222
6-marketplace/ Auction Central 223
6-marketplace/ Auction Central 224
6-marketplace/ Auction Central 225
6-marketplace/ Auction Central 226
6-marketplace/ Auction Central 227
6-marketplace/ Auction Central 228
6-marketplace/ Auction Central 229
6-marketplace/ Auction Central 230
6-marketplace/ Auction Central 231
6-marketplace/ Auction Central 232
6-marketplace/ Auction Central 233
6-marketplace/ Auction Central 234
6-marketplace/ Auction Central 235
6-marketplace/ Auction Central 236
6-marketplace/ Auction Central 237
6-marketplace/ Aucti

6-marketplace/ Wanted 40
6-marketplace/ Wanted 41
6-marketplace/ Wanted 42
6-marketplace/ Wanted 43
6-marketplace/ Wanted 44
6-marketplace/ Wanted 45
6-marketplace/ Wanted 46
6-marketplace/ Wanted 47
6-marketplace/ Wanted 48
6-marketplace/ Wanted 49
6-marketplace/ Wanted 50
6-marketplace/ Wanted 51
6-marketplace/ Wanted 52
6-marketplace/ Wanted 53
6-marketplace/ Wanted 54
6-marketplace/ Wanted 55
6-marketplace/ Wanted 56
6-marketplace/ Wanted 57
6-marketplace/ Wanted 58
6-marketplace/ Wanted 59
6-marketplace/ Wanted 60
6-marketplace/ Wanted 61
6-marketplace/ Wanted 62
6-marketplace/ Wanted 63
6-marketplace/ Wanted 64
6-marketplace/ Wanted 65
6-marketplace/ Wanted 66
6-marketplace/ Wanted 67
6-marketplace/ Wanted 68
6-marketplace/ Wanted 69
6-marketplace/ Wanted 70
6-marketplace/ Wanted 71
6-marketplace/ Wanted 72
6-marketplace/ Wanted 73
6-marketplace/ Wanted 74
6-marketplace/ Wanted 75
6-marketplace/ Wanted 76
6-marketplace/ Wanted 77
6-marketplace/ Wanted 78
6-marketplace/ Wanted 79


6-marketplace/ NA 127
6-marketplace/ NA 128
6-marketplace/ NA 129
6-marketplace/ NA 130
6-marketplace/ NA 131
6-marketplace/ NA 132
6-marketplace/ NA 133
6-marketplace/ NA 134
6-marketplace/ NA 135
6-marketplace/ NA 136
6-marketplace/ NA 137
6-marketplace/ NA 138
6-marketplace/ NA 139
6-marketplace/ NA 140
6-marketplace/ NA 141
6-marketplace/ NA 142
6-marketplace/ NA 143
6-marketplace/ NA 144
6-marketplace/ NA 145
6-marketplace/ NA 146
6-marketplace/ NA 147
6-marketplace/ NA 148
6-marketplace/ NA 149
6-marketplace/ NA 150
6-marketplace/ NA 151
6-marketplace/ NA 152
6-marketplace/ NA 153
6-marketplace/ NA 154
6-marketplace/ NA 155
6-marketplace/ NA 156
6-marketplace/ NA 157
6-marketplace/ NA 158
6-marketplace/ NA 159
6-marketplace/ NA 160
6-marketplace/ NA 161
6-marketplace/ NA 162
6-marketplace/ NA 163
6-marketplace/ NA 164
6-marketplace/ NA 165
6-marketplace/ NA 166
6-marketplace/ NA 167
6-marketplace/ NA 168
6-marketplace/ NA 169
6-marketplace/ NA 170
6-marketplace/ NA 171
6-marketpl

6-marketplace/ NA 500
6-marketplace/ NA 501
6-marketplace/ NA 502
6-marketplace/ NA 503
6-marketplace/ NA 504
6-marketplace/ NA 505
6-marketplace/ NA 506
6-marketplace/ NA 507
6-marketplace/ NA 508
6-marketplace/ NA 509
6-marketplace/ NA 510
6-marketplace/ NA 511
6-marketplace/ NA 512
6-marketplace/ NA 513
6-marketplace/ NA 514
6-marketplace/ NA 515
6-marketplace/ NA 516
6-marketplace/ NA 517
6-marketplace/ NA 518
6-marketplace/ NA 519
6-marketplace/ NA 520
6-marketplace/ NA 521
6-marketplace/ NA 522
6-marketplace/ NA 523
6-marketplace/ NA 524
6-marketplace/ NA 525
6-marketplace/ NA 526
6-marketplace/ NA 527
6-marketplace/ NA 528
6-marketplace/ NA 529
6-marketplace/ NA 530
6-marketplace/ NA 531
6-marketplace/ NA 532
6-marketplace/ NA 533
6-marketplace/ NA 534
6-marketplace/ NA 535
6-marketplace/ NA 536
6-marketplace/ NA 537
6-marketplace/ NA 538
6-marketplace/ NA 539
6-marketplace/ NA 540
6-marketplace/ NA 541
6-marketplace/ NA 542
6-marketplace/ NA 543
6-marketplace/ NA 544
6-marketpl

11-programming/ TI-99/4A Programming 9
11-programming/ TI-99/4A Programming 10
11-programming/ TI-99/4A Programming 11
11-programming/ TI-99/4A Programming 12
11-programming/ TI-99/4A Programming 13
11-programming/ TI-99/4A Programming 14
11-programming/ TI-99/4A Programming 15
11-programming/ TI-99/4A Programming 16
11-programming/ TI-99/4A Programming 17
11-programming/ TI-99/4A Programming 18
11-programming/ TI-99/4A Programming 19
11-programming/ TI-99/4A Programming 20
11-programming/ TI-99/4A Programming 21
11-programming/ TI-99/4A Programming 22
11-programming/ TI-99/4A Programming 23
11-programming/ TI-99/4A Programming 24
11-programming/ TI-99/4A Programming 25
11-programming/ TI-99/4A Programming 26
11-programming/ TI-99/4A Programming 27
11-programming/ TI-99/4A Programming 28
11-programming/ TI-99/4A Programming 29
11-programming/ TI-99/4A Programming 30
11-programming/ TI-99/4A Programming 31
11-programming/ TI-99/4A Programming 32
11-programming/ TI-99/4A Programming 33
1

3-atari-5200/ NA 28
3-atari-5200/ NA 29
3-atari-5200/ NA 30
3-atari-5200/ NA 31
3-atari-5200/ NA 32
3-atari-5200/ NA 33
3-atari-5200/ NA 34
3-atari-5200/ NA 35
3-atari-5200/ NA 36
3-atari-5200/ NA 37
3-atari-5200/ NA 38
3-atari-5200/ NA 39
3-atari-5200/ NA 40
3-atari-5200/ NA 41
3-atari-5200/ NA 42
3-atari-5200/ NA 43
3-atari-5200/ NA 44
3-atari-5200/ NA 45
3-atari-5200/ NA 46
3-atari-5200/ NA 47
3-atari-5200/ NA 48
3-atari-5200/ NA 49
3-atari-5200/ NA 50
3-atari-5200/ NA 51
3-atari-5200/ NA 52
3-atari-5200/ NA 53
3-atari-5200/ NA 54
3-atari-5200/ NA 55
3-atari-5200/ NA 56
3-atari-5200/ NA 57
3-atari-5200/ NA 58
3-atari-5200/ NA 59
3-atari-5200/ NA 60
3-atari-5200/ NA 61
3-atari-5200/ NA 62
3-atari-5200/ NA 63
3-atari-5200/ NA 64
3-atari-5200/ NA 65
3-atari-5200/ NA 66
3-atari-5200/ NA 67
3-atari-5200/ NA 68
3-atari-5200/ NA 69
3-atari-5200/ NA 70
3-atari-5200/ NA 71
3-atari-5200/ NA 72
3-atari-5200/ NA 73
3-atari-5200/ NA 74
3-atari-5200/ NA 75
3-atari-5200/ NA 76
3-atari-5200/ NA 77


4-atari-7800/ NA 69
4-atari-7800/ NA 70
4-atari-7800/ NA 71
4-atari-7800/ NA 72
4-atari-7800/ NA 73
4-atari-7800/ NA 74
4-atari-7800/ NA 75
4-atari-7800/ NA 76
4-atari-7800/ NA 77
4-atari-7800/ NA 78
4-atari-7800/ NA 79
4-atari-7800/ NA 80
4-atari-7800/ NA 81
4-atari-7800/ NA 82
4-atari-7800/ NA 83
4-atari-7800/ NA 84
4-atari-7800/ NA 85
4-atari-7800/ NA 86
4-atari-7800/ NA 87
4-atari-7800/ NA 88
4-atari-7800/ NA 89
4-atari-7800/ NA 90
4-atari-7800/ NA 91
4-atari-7800/ NA 92
4-atari-7800/ NA 93
4-atari-7800/ NA 94
4-atari-7800/ NA 95
4-atari-7800/ NA 96
4-atari-7800/ NA 97
4-atari-7800/ NA 98
4-atari-7800/ NA 99
4-atari-7800/ NA 100
4-atari-7800/ NA 101
4-atari-7800/ NA 102
17-classic-gaming-general/ Sega Genesis 1
17-classic-gaming-general/ Odyssey 2 / Videopac 1
17-classic-gaming-general/ Bally Arcade/Astrocade  1
17-classic-gaming-general/ Intellivision / Aquarius 1
17-classic-gaming-general/ ColecoVision / Adam 1
17-classic-gaming-general/ NA 2
17-classic-gaming-general/ Odyssey 2 

17-classic-gaming-general/ NA 200
17-classic-gaming-general/ NA 201
17-classic-gaming-general/ NA 202
17-classic-gaming-general/ NA 203
17-classic-gaming-general/ NA 204
17-classic-gaming-general/ NA 205
17-classic-gaming-general/ NA 206
17-classic-gaming-general/ NA 207
17-classic-gaming-general/ NA 208
17-classic-gaming-general/ NA 209
17-classic-gaming-general/ NA 210
17-classic-gaming-general/ NA 211
17-classic-gaming-general/ NA 212
17-classic-gaming-general/ NA 213
17-classic-gaming-general/ NA 214
17-classic-gaming-general/ NA 215
17-classic-gaming-general/ NA 216
17-classic-gaming-general/ NA 217
17-classic-gaming-general/ NA 218
17-classic-gaming-general/ NA 219
17-classic-gaming-general/ NA 220
17-classic-gaming-general/ NA 221
17-classic-gaming-general/ NA 222
17-classic-gaming-general/ NA 223
17-classic-gaming-general/ NA 224
17-classic-gaming-general/ NA 225
17-classic-gaming-general/ NA 226
17-classic-gaming-general/ NA 227
17-classic-gaming-general/ NA 228
17-classic-gam

17-classic-gaming-general/ ColecoVision / Adam 58
17-classic-gaming-general/ ColecoVision / Adam 59
17-classic-gaming-general/ ColecoVision / Adam 60
17-classic-gaming-general/ ColecoVision / Adam 61
17-classic-gaming-general/ ColecoVision / Adam 62
17-classic-gaming-general/ ColecoVision / Adam 63
17-classic-gaming-general/ ColecoVision / Adam 64
17-classic-gaming-general/ ColecoVision / Adam 65
17-classic-gaming-general/ Intellivision / Aquarius 2
17-classic-gaming-general/ Intellivision / Aquarius 3
17-classic-gaming-general/ Intellivision / Aquarius 4
17-classic-gaming-general/ Intellivision / Aquarius 5
17-classic-gaming-general/ Intellivision / Aquarius 6
17-classic-gaming-general/ Intellivision / Aquarius 7
17-classic-gaming-general/ Intellivision / Aquarius 8
17-classic-gaming-general/ Intellivision / Aquarius 9
17-classic-gaming-general/ Intellivision / Aquarius 10
17-classic-gaming-general/ Intellivision / Aquarius 11
17-classic-gaming-general/ Intellivision / Aquarius 12
17-

13-atari-lynx/ NA 56
13-atari-lynx/ NA 57
13-atari-lynx/ NA 58
13-atari-lynx/ NA 59
13-atari-lynx/ NA 60
13-atari-lynx/ NA 61
13-atari-lynx/ NA 62
12-atari-8-bit-computers/ NA 2
12-atari-8-bit-computers/ NA 3
12-atari-8-bit-computers/ NA 4
12-atari-8-bit-computers/ NA 5
12-atari-8-bit-computers/ NA 6
12-atari-8-bit-computers/ NA 7
12-atari-8-bit-computers/ NA 8
12-atari-8-bit-computers/ NA 9
12-atari-8-bit-computers/ NA 10
12-atari-8-bit-computers/ NA 11
12-atari-8-bit-computers/ NA 12
12-atari-8-bit-computers/ NA 13
12-atari-8-bit-computers/ NA 14
12-atari-8-bit-computers/ NA 15
12-atari-8-bit-computers/ NA 16
12-atari-8-bit-computers/ NA 17
12-atari-8-bit-computers/ NA 18
12-atari-8-bit-computers/ NA 19
12-atari-8-bit-computers/ NA 20
12-atari-8-bit-computers/ NA 21
12-atari-8-bit-computers/ NA 22
12-atari-8-bit-computers/ NA 23
12-atari-8-bit-computers/ NA 24
12-atari-8-bit-computers/ NA 25
12-atari-8-bit-computers/ NA 26
12-atari-8-bit-computers/ NA 27
12-atari-8-bit-computers/ NA 

12-atari-8-bit-computers/ NA 250
12-atari-8-bit-computers/ NA 251
12-atari-8-bit-computers/ NA 252
12-atari-8-bit-computers/ NA 253
12-atari-8-bit-computers/ NA 254
12-atari-8-bit-computers/ NA 255
12-atari-8-bit-computers/ NA 256
12-atari-8-bit-computers/ NA 257
12-atari-8-bit-computers/ NA 258
12-atari-8-bit-computers/ NA 259
12-atari-8-bit-computers/ NA 260
12-atari-8-bit-computers/ NA 261
12-atari-8-bit-computers/ NA 262
12-atari-8-bit-computers/ NA 263
12-atari-8-bit-computers/ NA 264
12-atari-8-bit-computers/ NA 265
12-atari-8-bit-computers/ NA 266
12-atari-8-bit-computers/ NA 267
12-atari-8-bit-computers/ NA 268
12-atari-8-bit-computers/ NA 269
12-atari-8-bit-computers/ NA 270
12-atari-8-bit-computers/ NA 271
12-atari-8-bit-computers/ NA 272
12-atari-8-bit-computers/ NA 273
12-atari-8-bit-computers/ NA 274
12-atari-8-bit-computers/ NA 275
12-atari-8-bit-computers/ NA 276
12-atari-8-bit-computers/ NA 277
12-atari-8-bit-computers/ NA 278
12-atari-8-bit-computers/ NA 279
12-atari-8

116-classic-computing/ Apple II Computers 2
116-classic-computing/ NA 3
116-classic-computing/ Commodore 8-bit Computers 3
116-classic-computing/ Apple II Computers 3
116-classic-computing/ NA 4
116-classic-computing/ Apple II Computers 4
116-classic-computing/ NA 5
116-classic-computing/ Apple II Computers 5
116-classic-computing/ NA 6
116-classic-computing/ Apple II Computers 6
116-classic-computing/ NA 7
116-classic-computing/ Apple II Computers 7
116-classic-computing/ NA 8
116-classic-computing/ Apple II Computers 8
116-classic-computing/ NA 9
116-classic-computing/ Apple II Computers 9
116-classic-computing/ NA 10
116-classic-computing/ Apple II Computers 10
116-classic-computing/ NA 11
116-classic-computing/ Apple II Computers 11
116-classic-computing/ NA 12
116-classic-computing/ Apple II Computers 12
116-classic-computing/ NA 13
116-classic-computing/ Apple II Computers 13
116-classic-computing/ NA 14
116-classic-computing/ Apple II Computers 14
116-classic-computing/ NA 15
11

21-modern-gaming/ NA 89
21-modern-gaming/ NA 90
21-modern-gaming/ NA 91
21-modern-gaming/ NA 92
21-modern-gaming/ NA 93
21-modern-gaming/ NA 94
21-modern-gaming/ NA 95
21-modern-gaming/ NA 96
21-modern-gaming/ NA 97
21-modern-gaming/ NA 98
21-modern-gaming/ NA 99
21-modern-gaming/ NA 100
21-modern-gaming/ NA 101
21-modern-gaming/ NA 102
21-modern-gaming/ NA 103
21-modern-gaming/ NA 104
21-modern-gaming/ NA 105
21-modern-gaming/ NA 106
21-modern-gaming/ NA 107
21-modern-gaming/ NA 108
21-modern-gaming/ NA 109
21-modern-gaming/ NA 110
21-modern-gaming/ NA 111
21-modern-gaming/ NA 112
21-modern-gaming/ NA 113
21-modern-gaming/ NA 114
21-modern-gaming/ NA 115
21-modern-gaming/ NA 116
21-modern-gaming/ NA 117
21-modern-gaming/ NA 118
21-modern-gaming/ NA 119
21-modern-gaming/ NA 120
21-modern-gaming/ NA 121
21-modern-gaming/ NA 122
21-modern-gaming/ NA 123
21-modern-gaming/ NA 124
21-modern-gaming/ NA 125
21-modern-gaming/ NA 126
21-modern-gaming/ NA 127
21-modern-gaming/ NA 128
21-modern-g

70-member-blogs/ NA 217


In [7]:
import pandas as pd
dfjson = pd.read_json('atari.json')
dfjson

Unnamed: 0,author,forum_code,forum_title,group,post_code,post_date,post_order,post_text,quote_post,quote_user,reputation,subforum_code,subforum_title,thread_code,thread_code_url,thread_title,user_id,user_link
0,nitrofurano,10-web-site-comments/,Web Site Comments,member,post_id_4084972,2018-08-04T00:01:53+00:00,#1,recently i got surprised how amazing javatari....,[],[],Moonsweeper,,,tid-link-281502,281502-what-about-adding-html5js-emulators-off...,what about adding html5+js emulators officiall...,21957.0,http://atariage.com/forums/user/21957-nitrofur...
1,cybercylon,30-hacks/,Hacks,member,post_id_4035089,2018-05-26T21:38:33+00:00,#1,I don't have an account on one of the more Nin...,[],[],River Patroller,,,tid-link-279139,279139-applying-translation-patch-help-needed/,applying translation patch help needed - start...,20719.0,http://atariage.com/forums/user/20719-cybercylon/
2,FOX2600,72-user-groups/,User Groups,member,post_id_3727887,2017-03-27T08:35:27+00:00,#1,Anyone on here who attended SoCal retro gaming...,[],[],Chopper Commander,,,tid-link-263754,263754-greater-los-angelesinland-empire/,Greater Los Angeles/Inland Empire? - started ...,53447.0,http://atariage.com/forums/user/53447-fox2600/
3,RobertB,72-user-groups/,User Groups,member,post_id_3733521,2017-04-03T22:45:10+00:00,#2,"Well, there already is the SC3 Arcade Par...",[],[],Dragonstomper,,,tid-link-263754,263754-greater-los-angelesinland-empire/,Greater Los Angeles/Inland Empire? - started ...,35862.0,http://atariage.com/forums/user/35862-robertb/
4,doctorclu,72-user-groups/,User Groups,member,post_id_1635162,2008-12-08T14:09:50+00:00,#1,The next Amiga meeting will be on December 15t...,[],[],***Moon Patrol 5200*** ***Moon Master*** ...,,,tid-link-135685,135685-dallas-area-user-groups/,"Dallas Area User Groups - started Mon Dec 8,...",4709.0,http://atariage.com/forums/user/4709-doctorclu/
5,doctorclu,72-user-groups/,User Groups,member,post_id_1638454,2008-12-13T00:48:13+00:00,#2,(From the Dallas area Amiga club) Our next mee...,[],[],***Moon Patrol 5200*** ***Moon Master*** ...,,,tid-link-135685,135685-dallas-area-user-groups/,"Dallas Area User Groups - started Mon Dec 8,...",4709.0,http://atariage.com/forums/user/4709-doctorclu/
6,doctorclu,72-user-groups/,User Groups,member,post_id_3065079,2014-09-02T02:39:49+00:00,#3,(From the Dallas Amiga group MCCC (Metroplex C...,[],[],***Moon Patrol 5200*** ***Moon Master*** ...,,,tid-link-135685,135685-dallas-area-user-groups/,"Dallas Area User Groups - started Mon Dec 8,...",4709.0,http://atariage.com/forums/user/4709-doctorclu/
7,doctorclu,72-user-groups/,User Groups,member,post_id_3147258,2015-01-06T06:10:11+00:00,#4,Tonight we had our first victory of the new ye...,[],[],***Moon Patrol 5200*** ***Moon Master*** ...,,,tid-link-135685,135685-dallas-area-user-groups/,"Dallas Area User Groups - started Mon Dec 8,...",4709.0,http://atariage.com/forums/user/4709-doctorclu/
8,doctorclu,72-user-groups/,User Groups,member,post_id_3605495,2016-10-03T17:50:09+00:00,#5,"Our next meeting will be on Monday, October 3...",[],[],***Moon Patrol 5200*** ***Moon Master*** ...,,,tid-link-135685,135685-dallas-area-user-groups/,"Dallas Area User Groups - started Mon Dec 8,...",4709.0,http://atariage.com/forums/user/4709-doctorclu/
9,doctorclu,72-user-groups/,User Groups,member,post_id_3605665,2016-10-03T22:09:55+00:00,#6,Tonight’s meeting can be found at this link:,[],[],***Moon Patrol 5200*** ***Moon Master*** ...,,,tid-link-135685,135685-dallas-area-user-groups/,"Dallas Area User Groups - started Mon Dec 8,...",4709.0,http://atariage.com/forums/user/4709-doctorclu/


In [12]:
dfjson[dfjson['reputation'] == 'Deleted']

Unnamed: 0,author,forum_code,forum_title,group,post_code,post_date,post_order,post_text,quote_post,quote_user,reputation,subforum_code,subforum_title,thread_code,thread_code_url,thread_title,user_id,user_link


In [7]:
def conversion(x):
    if x!='Guest' and x!='Deleted':
        return x[1][1:]
    else:
        return x

In [8]:
dfjson['reputation'] = dfjson['reputation'].apply(lambda x: conversion(x))

In [9]:
dfjson['post_text'] = dfjson['post_text'].apply(lambda x: ' '.join(x))

In [7]:
dfjson.to_csv('atariage.csv', index=False)

In [9]:
json.dumps('quoteresult.json')

'"quoteresult.json"'

In [9]:
len(dfjson[dfjson['forum_title'] =="Emulation"])

25245

In [73]:
topic = 'viewtopic.php?topic_id=15845&forum=1'
topic[topic.find("topic_id=")+9:topic.find("&forum")]

'15845'

In [72]:
topic[topic.find("&forum=")+7:]

'1'

In [7]:
dfjson[dfjson['forum_code']=='board-20']

Unnamed: 0,author,forum_code,forum_title,post_date,post_text,thread_code,thread_title
55,ledzep,board-20,Vectrex,1513886349000,"[YouTube genius Arcade Jason, the guy who conn...",thread-2059,A color Vectrex?
56,VECTREXER,board-20,Vectrex,1513892484000,[If you're thinking Full color control by soft...,thread-2059,A color Vectrex?
57,ledzep,board-20,Vectrex,1513905439000,[Can you explain why not? The colors seem cons...,thread-2059,A color Vectrex?
58,Guest-Computer Nerd Kev,board-20,Vectrex,1513908207000,"[From first viewing, I understand he's using t...",thread-2059,A color Vectrex?
59,VectorX,board-20,Vectrex,1513911298000,[Too bad he didn't have a better camera for it...,thread-2059,A color Vectrex?
60,gauze,board-20,Vectrex,1484939923000,"[Hi,, I'm going to get some new springs manufa...",thread-1910,replacement joystick centering springs NOW AVA...
61,TrekMD,board-20,Vectrex,1484940548000,[How easy are these to replace? I have a joyst...,thread-1910,replacement joystick centering springs NOW AVA...
62,gauze,board-20,Vectrex,1484941968000,[It's a bit involved but it's not difficult. P...,thread-1910,replacement joystick centering springs NOW AVA...
63,TrekMD,board-20,Vectrex,1484942094000,[It does sound involved but not anything overw...,thread-1910,replacement joystick centering springs NOW AVA...
64,ls650,board-20,Vectrex,1484965199000,[I've replaced the spring in mine at one point...,thread-1910,replacement joystick centering springs NOW AVA...


In [61]:
forums = dfjson[['forum_title', 'url_thread']]

In [62]:
forums['forum_code'] = forums['url_thread'].str.split('=').str.get(-1)

In [63]:
forums = forums[['forum_title', 'forum_code']]

In [66]:
forums = forums.drop_duplicates()

In [67]:
forums

Unnamed: 0,forum_title,forum_code
0,Main Virtual Boy Discussion,1
3,Marketplace,3
17,Virtual Boy Development Board,2
60,Offtopic,4
80,PVB Coding Competition 2010,11
100,PVB Coding Competition 2008,8
120,FlashBoy,7
140,VB Dev Repository,9
145,PVB Coding Competition 2013,13
174,Feedback,6


In [68]:
forums.to_csv('forum_codes.csv', index=False)

In [11]:
dfjson

Unnamed: 0,author,forum_code,forum_title,post_date,post_text,reputation,thread_code,thread_title,user_link
0,kokovec,board-4,Past/classic gaming boards,1475346084000,It looks like Nintendo is also producing a Fam...,Space War(ped),thread-1794,Nintendo Selling Super Famicom Mini,/user/83
1,VectorX,board-4,Past/classic gaming boards,1475351233000,"^That's in French, by the way. Has the origina...",Administrator,thread-1794,Nintendo Selling Super Famicom Mini,/user/1
2,kokovec,board-4,Past/classic gaming boards,1475424891000,Sorry about that. I grew up in Quebec so I'm o...,Space War(ped),thread-1794,Nintendo Selling Super Famicom Mini,/user/83
3,TrekMD,board-4,Past/classic gaming boards,1460310317000,Atari Age member DaddaRuleKonge has just poste...,Global Moderator,thread-1661,Retro Console Encyclopedias,/user/48
4,VectorX,board-4,Past/classic gaming boards,1460311502000,"Thanks, definitely interested in the Mega Driv...",Administrator,thread-1661,Retro Console Encyclopedias,/user/1
5,TrekMD,board-4,Past/classic gaming boards,1460341275000,Absolutely. I'm hoping he makes a Vectrex Ency...,Global Moderator,thread-1661,Retro Console Encyclopedias,/user/48
6,TrekMD,board-4,Past/classic gaming boards,1460431335000,Just as an update. Two more encyclopedias are ...,Global Moderator,thread-1661,Retro Console Encyclopedias,/user/48
7,50tbrd,board-4,Past/classic gaming boards,1460580941000,A user on the AtariAge Forums by the name of D...,Causing Major Havoc,thread-1661,Retro Console Encyclopedias,/user/114
8,TrekMD,board-4,Past/classic gaming boards,1460585115000,"Ah, I had posted about this. I see he has alre...",Global Moderator,thread-1661,Retro Console Encyclopedias,/user/48
9,50tbrd,board-4,Past/classic gaming boards,1460592068000,Did you post it here? The author states he ma...,Causing Major Havoc,thread-1661,Retro Console Encyclopedias,/user/114


In [7]:
a = "http://atariage.com/forums/topic/62626-top-100-2600-games-of-all-time/"

In [8]:
a[a.find("topic/")+6:]

'62626-top-100-2600-games-of-all-time/'

In [11]:
len(dfjson["user_link"].unique())

19597