## Failed Approach (Using API)

The following blocks try to use the API approach, which failed fantastically. I suggest you go to the next section, which works well.

You can also use directly PushishiftAPI() without psaw.

```Python
from pushshift_py import PushshiftAPI
import datetime as dt
import psaw
import pandas as pd
import requests
import json
import csv
import time
api = psaw.PushshiftAPI()

startEpoch = int(dt.datetime(2020,1,1).timestamp())
```
    
The following block shows how we can get information using pushshift. It shows how we can specify the features and get them. The returned data type is a generator with "submission" type as elements, though we can certainly make them into a list.

```Python
features = ['url','author', 'title', 'subreddit', 'id', 'created', 'score']
subreddit = 'NBA'

data = api.search_submissions(after=startEpoch,
                            subreddit=subreddit,
                            filter= features,
                            limit=10)

for datum in data:
    print(datum.id, datum.subreddit, datum.title, datum.author, datum.url, datum.created, datum.score)

import praw

reddit = praw.Reddit(
    client_id="kxbUr-4PyE7DlQ",
    client_secret="Q5rIAPS9IHZ1QgOIkHNY09Y9VMxDsA",
    password="AACAXZDE",
    user_agent="testscript by u/kc_the_scraper",
    username="kc_the_scraper",
)
```

We can use praw to get the post body using the following block.
```Python
reddit.submission(id='eiev5d').selftext
```



In the following blocks, we create tables and store the information. For some reason, though, the api often acts up and freezes when we loop through the data.
```Python
import sqlite3

conn = sqlite3.connect('redditPosts.sqlite')
cur = conn.cursor()

cur.execute('''CREATE TABLE IF NOT EXISTS Posts(
                id TEXT PRIMARY KEY,
                subreddit TEXT,
                title TEXT,
                author TEXT,
                url TEXT,
                created int)
                ''')

features = ['url','author', 'title', 'subreddit', 'id', 'created']
subreddit = 'stocks'
latest = dt.datetime(2021,5,8).timestamp()
earliest = dt.datetime(2020,1,1).timestamp()

startEpoch = earliest

while startEpoch <= latest:
    data = api.search_submissions(after=startEpoch,
                            subreddit=subreddit,
                            filter= features,
                            limit=100)
    
    for datum in data:
        print('Got here 2.')
        cur.execute('''INSERT OR IGNORE INTO Posts VALUES (?,?,?,?,?,?)'''
                    , (datum.id, datum.subreddit, datum.title, datum.author, datum.url, datum.created))
        
        currentTime = datum.created
    
    conn.commit()
    if currentTime == startEpoch:
        break
    startEpoch = currentTime + 1
    print(dt.datetime.fromtimestamp(startEpoch))    

```

## Another Approach (Getting JSON)

The method above is shaky at best. A lot of times the api just freezes. On the other hand, I find using requests much easier. The following code blocks contain what you need for storing reddit data you need.

In [1]:
import requests
import datetime as dt
import sqlite3
import json
import time
import sys

In [2]:
def getPushShiftData(after,before, sub):
    url = 'https://api.pushshift.io/reddit/search/submission/?size=100&after='+str(int(after))+'&before='+str(int(before))+'&subreddit='+str(sub)
    r = requests.get(url)
    data = json.loads(r.text)
    return data['data']

def extractInfo(datum,features):
    info = {}
    
    for feature in features:
        info[feature] = datum[feature]
    
    return info

def getLatestTime(data):
    return data[-1]['created_utc']

def dataStoragePipeline(after, before, sub, conn):
    features = ['full_link','author', 'title', 'subreddit', 'id', 'created_utc']
    cursor = conn.cursor()
    while after < before:
        data = getPushShiftData(after, before, sub)
        if not data:
            print("There is no data anymore.")
            return 1
        for datum in data:
            cursor.execute('''INSERT OR IGNORE INTO Posts 
                                VALUES (?,?,?,?,?,?)'''
                              , (datum['id'], datum['subreddit'], datum['title'], datum['author'], datum['full_link'], datum['created_utc']))
        
        after = getLatestTime(data) + 1
        conn.commit()
        print("The latest post is submitted at", dt.datetime.fromtimestamp(after-1))
        time.sleep(0.1)
    
        
        

In [3]:
conn = sqlite3.connect('redditPosts.sqlite')
cur = conn.cursor()
subreddit = 'dogecoin'
end = int(time.time()-86400) #I subtracted by one day, so that we have some buffer.
start = dt.datetime(2021,1,1).timestamp()
cur.execute('''SELECT MIN(created), MAX(created) FROM Posts
                WHERE subreddit = ?''', (subreddit,))
datatimes = cur.fetchone()

if datatimes[0]:
    dataEarly, dataLate = datatimes
    if end < dataEarly:
        end = dataEarly
    elif start < dataLate:
        start =dataLate

In [33]:
while start < end:
    try:
        flag = dataStoragePipeline(after = start, before = end, sub = subreddit, conn = conn)
        if flag:
            break
    except KeyboardInterrupt:
        print("Interrupted by keyboard. Stopping.")
        break
        
    except:
        print("Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.")
        time.sleep(1)
        cur.execute('''SELECT MIN(created), MAX(created) FROM Posts
                        WHERE subreddit = ?''', (subreddit,))
        datatimes = cur.fetchone()
        
        if datatimes[0]:
            dataEarly, dataLate = datatimes
            if end < dataEarly:
                end = dataEarly
            elif start < dataLate:
                start =dataLate
        

The latest post is submitted at 2021-01-01 18:30:03
The latest post is submitted at 2021-01-01 22:35:02
The latest post is submitted at 2021-01-02 00:02:23
The latest post is submitted at 2021-01-02 07:48:57
The latest post is submitted at 2021-01-02 09:50:20
The latest post is submitted at 2021-01-02 11:09:15
The latest post is submitted at 2021-01-02 12:10:58
The latest post is submitted at 2021-01-02 12:57:15
The latest post is submitted at 2021-01-02 13:42:57
The latest post is submitted at 2021-01-02 14:49:28
The latest post is submitted at 2021-01-02 16:44:42
The latest post is submitted at 2021-01-02 19:55:39
The latest post is submitted at 2021-01-02 22:45:04
The latest post is submitted at 2021-01-03 00:59:35
The latest post is submitted at 2021-01-03 06:49:31
The latest post is submitted at 2021-01-03 10:19:34
The latest post is submitted at 2021-01-03 12:28:51
The latest post is submitted at 2021-01-03 16:02:46
The latest post is submitted at 2021-01-03 19:43:45
The latest p

The latest post is submitted at 2021-01-28 17:30:12
The latest post is submitted at 2021-01-28 17:36:55
The latest post is submitted at 2021-01-28 17:39:54
The latest post is submitted at 2021-01-28 17:41:50
The latest post is submitted at 2021-01-28 17:44:45
The latest post is submitted at 2021-01-28 17:47:41
The latest post is submitted at 2021-01-28 17:50:05
The latest post is submitted at 2021-01-28 17:53:31
The latest post is submitted at 2021-01-28 17:57:04
The latest post is submitted at 2021-01-28 18:00:33
The latest post is submitted at 2021-01-28 18:03:07
The latest post is submitted at 2021-01-28 18:06:04
The latest post is submitted at 2021-01-28 18:09:30
The latest post is submitted at 2021-01-28 18:13:05
The latest post is submitted at 2021-01-28 18:16:46
The latest post is submitted at 2021-01-28 18:19:24
The latest post is submitted at 2021-01-28 18:22:35
The latest post is submitted at 2021-01-28 18:24:55
The latest post is submitted at 2021-01-28 18:26:48
The latest p

The latest post is submitted at 2021-01-28 22:31:54
The latest post is submitted at 2021-01-28 22:34:05
The latest post is submitted at 2021-01-28 22:35:48
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-28 22:37:54
The latest post is submitted at 2021-01-28 22:39:52
The latest post is submitted at 2021-01-28 22:41:57
The latest post is submitted at 2021-01-28 22:44:30
The latest post is submitted at 2021-01-28 22:46:49
The latest post is submitted at 2021-01-28 22:49:16
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-28 22:51:08
The latest post is submitted at 2021-01-28 22:53:02
The latest post is submitted at 2021-01-28 22:54:56
The latest post is submitted at 2021-01-28 22:57:17
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-28 22:59:20
The latest post i

The latest post is submitted at 2021-01-29 05:44:05
The latest post is submitted at 2021-01-29 05:49:11
The latest post is submitted at 2021-01-29 05:53:48
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 05:58:31
The latest post is submitted at 2021-01-29 06:02:40
The latest post is submitted at 2021-01-29 06:06:54
The latest post is submitted at 2021-01-29 06:10:54
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 06:16:04
The latest post is submitted at 2021-01-29 06:20:29
The latest post is submitted at 2021-01-29 06:24:14
The latest post is submitted at 2021-01-29 06:28:15
The latest post is submitted at 2021-01-29 06:31:22
The latest post is submitted at 2021-01-29 06:35:05
The latest post is submitted at 2021-01-29 06:37:48
The latest post is submitted at 2021-01-29 06:40:40
The latest post is submitted at 2021-01-29 06:43:4

The latest post is submitted at 2021-01-29 12:06:20
The latest post is submitted at 2021-01-29 12:08:40
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 12:11:13
The latest post is submitted at 2021-01-29 12:13:57
The latest post is submitted at 2021-01-29 12:17:12
The latest post is submitted at 2021-01-29 12:19:58
The latest post is submitted at 2021-01-29 12:22:35
The latest post is submitted at 2021-01-29 12:24:55
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 12:27:25
The latest post is submitted at 2021-01-29 12:30:05
The latest post is submitted at 2021-01-29 12:32:30
The latest post is submitted at 2021-01-29 12:34:42
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 12:36:48
The latest post is submitted at 2021-01-29 12:39:16
The latest post i

The latest post is submitted at 2021-01-29 18:51:39
The latest post is submitted at 2021-01-29 18:55:46
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 19:01:21
The latest post is submitted at 2021-01-29 19:06:18
The latest post is submitted at 2021-01-29 19:10:27
The latest post is submitted at 2021-01-29 19:15:34
The latest post is submitted at 2021-01-29 19:19:32
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 19:23:53
The latest post is submitted at 2021-01-29 19:28:52
The latest post is submitted at 2021-01-29 19:32:42
The latest post is submitted at 2021-01-29 19:37:07
The latest post is submitted at 2021-01-29 19:41:15
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-29 19:45:23
The latest post is submitted at 2021-01-29 19:50:12
The latest post i

The latest post is submitted at 2021-01-30 05:55:34
The latest post is submitted at 2021-01-30 06:10:20
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-30 06:24:57
The latest post is submitted at 2021-01-30 06:38:03
The latest post is submitted at 2021-01-30 06:48:51
The latest post is submitted at 2021-01-30 07:01:02
The latest post is submitted at 2021-01-30 07:11:38
The latest post is submitted at 2021-01-30 07:22:10
The latest post is submitted at 2021-01-30 07:31:47
The latest post is submitted at 2021-01-30 07:43:56
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-30 07:53:44
The latest post is submitted at 2021-01-30 08:01:44
The latest post is submitted at 2021-01-30 08:10:35
The latest post is submitted at 2021-01-30 08:20:44
The latest post is submitted at 2021-01-30 08:30:02
The latest post is submitted at 2021-01-30 08:39:2

The latest post is submitted at 2021-01-30 19:38:09
The latest post is submitted at 2021-01-30 19:46:47
The latest post is submitted at 2021-01-30 19:56:42
The latest post is submitted at 2021-01-30 20:06:57
The latest post is submitted at 2021-01-30 20:13:43
The latest post is submitted at 2021-01-30 20:23:06
The latest post is submitted at 2021-01-30 20:31:47
The latest post is submitted at 2021-01-30 20:42:45
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-30 20:53:39
The latest post is submitted at 2021-01-30 21:03:06
The latest post is submitted at 2021-01-30 21:13:07
The latest post is submitted at 2021-01-30 21:21:51
The latest post is submitted at 2021-01-30 21:32:26
The latest post is submitted at 2021-01-30 21:41:36
The latest post is submitted at 2021-01-30 21:53:30
The latest post is submitted at 2021-01-30 22:06:39
The latest post is submitted at 2021-01-30 22:18:42
Error occurred. Probably due to

The latest post is submitted at 2021-01-31 15:17:02
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-31 15:21:13
The latest post is submitted at 2021-01-31 15:25:55
The latest post is submitted at 2021-01-31 15:31:46
The latest post is submitted at 2021-01-31 15:37:29
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-01-31 15:42:46
The latest post is submitted at 2021-01-31 15:49:19
The latest post is submitted at 2021-01-31 15:55:11
The latest post is submitted at 2021-01-31 16:00:48
The latest post is submitted at 2021-01-31 16:07:08
The latest post is submitted at 2021-01-31 16:13:36
The latest post is submitted at 2021-01-31 16:19:30
The latest post is submitted at 2021-01-31 16:27:20
The latest post is submitted at 2021-01-31 16:34:41
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post i

The latest post is submitted at 2021-02-01 13:51:05
The latest post is submitted at 2021-02-01 13:59:29
The latest post is submitted at 2021-02-01 14:08:42
The latest post is submitted at 2021-02-01 14:19:59
The latest post is submitted at 2021-02-01 14:30:20
The latest post is submitted at 2021-02-01 14:39:48
The latest post is submitted at 2021-02-01 14:49:13
The latest post is submitted at 2021-02-01 15:01:56
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-01 15:11:41
The latest post is submitted at 2021-02-01 15:20:45
The latest post is submitted at 2021-02-01 15:30:32
The latest post is submitted at 2021-02-01 15:40:07
The latest post is submitted at 2021-02-01 15:49:32
The latest post is submitted at 2021-02-01 16:00:15
The latest post is submitted at 2021-02-01 16:08:22
The latest post is submitted at 2021-02-01 16:16:27
The latest post is submitted at 2021-02-01 16:25:05
The latest post is submitted at

The latest post is submitted at 2021-02-03 04:03:39
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-03 04:37:20
The latest post is submitted at 2021-02-03 05:14:38
The latest post is submitted at 2021-02-03 05:56:53
The latest post is submitted at 2021-02-03 06:25:56
The latest post is submitted at 2021-02-03 06:52:42
The latest post is submitted at 2021-02-03 07:14:29
The latest post is submitted at 2021-02-03 07:40:00
The latest post is submitted at 2021-02-03 08:06:27
The latest post is submitted at 2021-02-03 08:26:13
The latest post is submitted at 2021-02-03 08:49:29
The latest post is submitted at 2021-02-03 09:06:41
The latest post is submitted at 2021-02-03 09:25:42
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-03 09:43:13
The latest post is submitted at 2021-02-03 09:59:42
The latest post is submitted at 2021-02-03 10:19:5

The latest post is submitted at 2021-02-04 02:06:50
The latest post is submitted at 2021-02-04 02:08:47
The latest post is submitted at 2021-02-04 02:10:30
The latest post is submitted at 2021-02-04 02:12:23
The latest post is submitted at 2021-02-04 02:14:27
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-04 02:16:33
The latest post is submitted at 2021-02-04 02:17:57
The latest post is submitted at 2021-02-04 02:19:23
The latest post is submitted at 2021-02-04 02:21:20
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-04 02:23:12
The latest post is submitted at 2021-02-04 02:25:20
The latest post is submitted at 2021-02-04 02:27:19
The latest post is submitted at 2021-02-04 02:29:20
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-04 02:31:22
The latest post i

The latest post is submitted at 2021-02-04 16:15:47
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-04 16:27:21
The latest post is submitted at 2021-02-04 16:40:19
The latest post is submitted at 2021-02-07 17:22:15
The latest post is submitted at 2021-02-07 17:26:57
The latest post is submitted at 2021-02-07 17:31:06
The latest post is submitted at 2021-02-07 17:36:59
The latest post is submitted at 2021-02-07 17:43:33
The latest post is submitted at 2021-02-07 17:49:23
The latest post is submitted at 2021-02-07 17:54:47
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-07 18:01:28
The latest post is submitted at 2021-02-07 18:05:32
The latest post is submitted at 2021-02-07 18:10:25
The latest post is submitted at 2021-02-07 18:13:17
The latest post is submitted at 2021-02-07 18:17:29
The latest post is submitted at 2021-02-07 18:22:5

The latest post is submitted at 2021-02-08 20:31:02
The latest post is submitted at 2021-02-08 20:42:43
The latest post is submitted at 2021-02-08 20:55:50
The latest post is submitted at 2021-02-08 21:05:34
The latest post is submitted at 2021-02-08 21:15:46
The latest post is submitted at 2021-02-08 21:25:37
The latest post is submitted at 2021-02-08 21:35:56
The latest post is submitted at 2021-02-08 21:49:33
The latest post is submitted at 2021-02-08 22:00:52
The latest post is submitted at 2021-02-08 22:13:10
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-08 22:23:59
The latest post is submitted at 2021-02-08 22:37:25
The latest post is submitted at 2021-02-08 22:50:25
The latest post is submitted at 2021-02-08 23:03:54
The latest post is submitted at 2021-02-08 23:16:19
The latest post is submitted at 2021-02-08 23:33:55
The latest post is submitted at 2021-02-08 23:46:10
The latest post is submitted at

The latest post is submitted at 2021-02-10 09:20:17
The latest post is submitted at 2021-02-10 09:23:43
The latest post is submitted at 2021-02-10 09:26:43
The latest post is submitted at 2021-02-10 09:31:33
The latest post is submitted at 2021-02-10 09:36:48
The latest post is submitted at 2021-02-10 09:41:46
The latest post is submitted at 2021-02-10 09:47:45
The latest post is submitted at 2021-02-10 09:53:32
The latest post is submitted at 2021-02-10 10:00:38
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-10 10:10:43
The latest post is submitted at 2021-02-10 10:19:57
The latest post is submitted at 2021-02-10 10:31:28
The latest post is submitted at 2021-02-10 10:46:09
The latest post is submitted at 2021-02-10 10:58:44
The latest post is submitted at 2021-02-10 11:11:50
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-10 11:27:1

The latest post is submitted at 2021-02-11 23:58:56
The latest post is submitted at 2021-02-12 00:23:58
The latest post is submitted at 2021-02-12 00:57:57
The latest post is submitted at 2021-02-12 01:33:54
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-12 02:09:12
The latest post is submitted at 2021-02-12 02:44:02
The latest post is submitted at 2021-02-12 03:16:55
The latest post is submitted at 2021-02-12 03:55:13
The latest post is submitted at 2021-02-12 04:43:05
The latest post is submitted at 2021-02-12 05:36:59
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-12 06:21:04
The latest post is submitted at 2021-02-12 06:58:45
The latest post is submitted at 2021-02-12 07:30:13
The latest post is submitted at 2021-02-12 07:58:01
The latest post is submitted at 2021-02-12 08:22:14
The latest post is submitted at 2021-02-12 08:50:0

The latest post is submitted at 2021-02-14 14:51:43
The latest post is submitted at 2021-02-14 15:21:55
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-02-14 15:47:19
The latest post is submitted at 2021-02-14 16:17:07
The latest post is submitted at 2021-02-14 16:40:49
The latest post is submitted at 2021-02-14 17:04:05
The latest post is submitted at 2021-02-14 17:26:29
The latest post is submitted at 2021-02-14 17:30:22
The latest post is submitted at 2021-02-14 17:38:08
The latest post is submitted at 2021-02-14 17:49:47
The latest post is submitted at 2021-02-14 18:03:50
The latest post is submitted at 2021-02-14 18:17:22
The latest post is submitted at 2021-02-14 18:32:52
The latest post is submitted at 2021-02-14 18:54:14
The latest post is submitted at 2021-02-14 19:14:32
The latest post is submitted at 2021-02-14 19:27:24
The latest post is submitted at 2021-02-14 19:40:47
The latest post is submitted at

The latest post is submitted at 2021-02-18 15:04:13
The latest post is submitted at 2021-02-18 15:34:02
The latest post is submitted at 2021-02-18 16:00:48
The latest post is submitted at 2021-02-18 16:27:18
The latest post is submitted at 2021-02-18 16:44:35
The latest post is submitted at 2021-02-18 17:03:45
The latest post is submitted at 2021-02-18 17:29:27
The latest post is submitted at 2021-02-18 17:56:11
The latest post is submitted at 2021-02-18 18:33:26
The latest post is submitted at 2021-02-18 19:08:49
The latest post is submitted at 2021-02-18 19:51:09
The latest post is submitted at 2021-02-18 20:38:40
The latest post is submitted at 2021-02-18 21:18:08
The latest post is submitted at 2021-02-18 22:06:57
The latest post is submitted at 2021-02-18 23:06:23
The latest post is submitted at 2021-02-19 00:05:09
The latest post is submitted at 2021-02-19 01:26:27
The latest post is submitted at 2021-02-19 03:13:31
The latest post is submitted at 2021-02-19 04:40:58
The latest p

The latest post is submitted at 2021-02-24 06:20:57
The latest post is submitted at 2021-02-24 07:06:28
The latest post is submitted at 2021-02-24 07:26:07
The latest post is submitted at 2021-02-24 07:47:49
The latest post is submitted at 2021-02-24 08:09:38
The latest post is submitted at 2021-02-24 08:32:18
The latest post is submitted at 2021-02-24 08:55:55
The latest post is submitted at 2021-02-24 09:34:53
The latest post is submitted at 2021-02-24 10:21:09
The latest post is submitted at 2021-02-24 11:04:36
The latest post is submitted at 2021-02-24 11:50:30
The latest post is submitted at 2021-02-24 12:39:17
The latest post is submitted at 2021-02-24 13:28:47
The latest post is submitted at 2021-02-24 14:23:40
The latest post is submitted at 2021-02-24 15:54:15
The latest post is submitted at 2021-02-24 16:44:46
The latest post is submitted at 2021-02-24 17:39:53
The latest post is submitted at 2021-02-24 18:28:25
The latest post is submitted at 2021-02-24 19:32:29
The latest p

The latest post is submitted at 2021-03-09 09:45:56
The latest post is submitted at 2021-03-09 10:45:59
The latest post is submitted at 2021-03-09 12:11:59
The latest post is submitted at 2021-03-09 13:38:29
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-03-09 15:03:16
The latest post is submitted at 2021-03-09 16:27:55
The latest post is submitted at 2021-03-09 17:34:51
The latest post is submitted at 2021-03-09 19:00:15
The latest post is submitted at 2021-03-09 20:01:28
The latest post is submitted at 2021-03-09 20:59:56
The latest post is submitted at 2021-03-09 22:33:54
The latest post is submitted at 2021-03-10 00:51:50
The latest post is submitted at 2021-03-10 03:25:22
The latest post is submitted at 2021-03-10 06:19:48
The latest post is submitted at 2021-03-10 08:26:10
The latest post is submitted at 2021-03-10 09:45:14
The latest post is submitted at 2021-03-10 11:12:06
The latest post is submitted at

The latest post is submitted at 2021-03-31 08:47:56
The latest post is submitted at 2021-03-31 11:53:38
The latest post is submitted at 2021-03-31 14:59:19
The latest post is submitted at 2021-03-31 18:07:09
The latest post is submitted at 2021-03-31 20:55:15
The latest post is submitted at 2021-03-31 23:25:01
The latest post is submitted at 2021-04-01 03:26:51
The latest post is submitted at 2021-04-01 05:41:13
The latest post is submitted at 2021-04-01 05:56:59
The latest post is submitted at 2021-04-01 06:21:39
The latest post is submitted at 2021-04-01 06:41:54
The latest post is submitted at 2021-04-01 06:58:45
The latest post is submitted at 2021-04-01 07:24:34
The latest post is submitted at 2021-04-01 08:12:02
The latest post is submitted at 2021-04-01 09:03:11
The latest post is submitted at 2021-04-01 09:52:26
The latest post is submitted at 2021-04-01 10:47:44
The latest post is submitted at 2021-04-01 11:41:07
The latest post is submitted at 2021-04-01 12:57:13
The latest p

The latest post is submitted at 2021-04-14 22:16:50
The latest post is submitted at 2021-04-14 23:05:59
The latest post is submitted at 2021-04-14 23:40:10
The latest post is submitted at 2021-04-14 23:54:15
The latest post is submitted at 2021-04-15 00:14:04
The latest post is submitted at 2021-04-15 00:37:16
The latest post is submitted at 2021-04-15 01:09:09
The latest post is submitted at 2021-04-15 02:05:36
The latest post is submitted at 2021-04-15 03:05:12
The latest post is submitted at 2021-04-15 04:36:34
The latest post is submitted at 2021-04-15 06:30:17
The latest post is submitted at 2021-04-15 07:32:33
The latest post is submitted at 2021-04-15 08:23:21
The latest post is submitted at 2021-04-15 09:09:05
The latest post is submitted at 2021-04-15 09:44:08
The latest post is submitted at 2021-04-15 10:30:12
The latest post is submitted at 2021-04-15 11:18:01
The latest post is submitted at 2021-04-15 11:42:16
The latest post is submitted at 2021-04-15 12:14:52
The latest p

The latest post is submitted at 2021-04-15 22:06:26
The latest post is submitted at 2021-04-15 22:09:53
The latest post is submitted at 2021-04-15 22:13:28
The latest post is submitted at 2021-04-15 22:16:26
The latest post is submitted at 2021-04-15 22:20:02
The latest post is submitted at 2021-04-15 22:23:28
The latest post is submitted at 2021-04-15 22:27:18
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-04-15 22:31:33
The latest post is submitted at 2021-04-15 22:35:19
The latest post is submitted at 2021-04-15 22:40:43
The latest post is submitted at 2021-04-15 22:45:46
The latest post is submitted at 2021-04-15 22:50:34
The latest post is submitted at 2021-04-15 22:56:35
The latest post is submitted at 2021-04-15 23:02:14
The latest post is submitted at 2021-04-15 23:07:25
The latest post is submitted at 2021-04-15 23:13:04
The latest post is submitted at 2021-04-15 23:18:30
The latest post is submitted at

The latest post is submitted at 2021-04-16 13:46:45
The latest post is submitted at 2021-04-16 13:54:43
The latest post is submitted at 2021-04-16 14:01:00
The latest post is submitted at 2021-04-16 14:08:49
The latest post is submitted at 2021-04-16 14:14:19
The latest post is submitted at 2021-04-16 14:20:46
The latest post is submitted at 2021-04-16 14:26:03
The latest post is submitted at 2021-04-16 14:30:49
The latest post is submitted at 2021-04-16 14:37:07
The latest post is submitted at 2021-04-16 14:43:10
The latest post is submitted at 2021-04-16 14:48:26
The latest post is submitted at 2021-04-16 14:55:30
The latest post is submitted at 2021-04-16 15:01:42
The latest post is submitted at 2021-04-16 15:09:26
The latest post is submitted at 2021-04-16 15:17:34
The latest post is submitted at 2021-04-16 15:23:05
The latest post is submitted at 2021-04-16 15:29:58
The latest post is submitted at 2021-04-16 15:37:06
The latest post is submitted at 2021-04-16 15:44:57
The latest p

The latest post is submitted at 2021-04-17 16:40:54
The latest post is submitted at 2021-04-17 16:55:25
The latest post is submitted at 2021-04-17 17:12:13
The latest post is submitted at 2021-04-17 17:30:47
The latest post is submitted at 2021-04-17 17:45:12
The latest post is submitted at 2021-04-17 18:00:01
The latest post is submitted at 2021-04-17 18:16:41
Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.
The latest post is submitted at 2021-04-17 18:36:20
The latest post is submitted at 2021-04-17 18:51:47
The latest post is submitted at 2021-04-17 19:09:49
The latest post is submitted at 2021-04-17 19:25:03
The latest post is submitted at 2021-04-17 19:41:17
The latest post is submitted at 2021-04-17 19:58:23
The latest post is submitted at 2021-04-17 20:12:51
The latest post is submitted at 2021-04-17 20:26:44
The latest post is submitted at 2021-04-17 20:42:46
The latest post is submitted at 2021-04-17 21:04:49
The latest post is submitted at

The latest post is submitted at 2021-04-19 05:53:03
The latest post is submitted at 2021-04-19 05:58:17
The latest post is submitted at 2021-04-19 06:05:28
The latest post is submitted at 2021-04-19 06:13:55
The latest post is submitted at 2021-04-19 06:22:43
The latest post is submitted at 2021-04-19 06:31:18
The latest post is submitted at 2021-04-19 06:40:48
The latest post is submitted at 2021-04-19 06:48:22
The latest post is submitted at 2021-04-19 06:57:19
The latest post is submitted at 2021-04-19 07:08:47
The latest post is submitted at 2021-04-19 07:19:16
The latest post is submitted at 2021-04-19 07:28:59
The latest post is submitted at 2021-04-19 07:38:02
The latest post is submitted at 2021-04-19 07:46:32
The latest post is submitted at 2021-04-19 07:57:11
The latest post is submitted at 2021-04-19 08:07:01
The latest post is submitted at 2021-04-19 08:17:05
The latest post is submitted at 2021-04-19 08:26:16
The latest post is submitted at 2021-04-19 08:35:29
The latest p

The latest post is submitted at 2021-04-20 00:25:49
The latest post is submitted at 2021-04-20 00:31:44
The latest post is submitted at 2021-04-20 00:36:44
The latest post is submitted at 2021-04-20 00:42:03
The latest post is submitted at 2021-04-20 00:47:31
The latest post is submitted at 2021-04-20 00:54:53
The latest post is submitted at 2021-04-20 01:02:49
The latest post is submitted at 2021-04-20 01:09:53
The latest post is submitted at 2021-04-20 01:17:58
The latest post is submitted at 2021-04-20 01:25:56
The latest post is submitted at 2021-04-20 01:33:44
The latest post is submitted at 2021-04-20 01:41:50
The latest post is submitted at 2021-04-20 01:48:32
The latest post is submitted at 2021-04-20 01:58:07
The latest post is submitted at 2021-04-20 02:06:50
The latest post is submitted at 2021-04-20 02:16:23
The latest post is submitted at 2021-04-20 02:28:06
The latest post is submitted at 2021-04-20 02:38:11
The latest post is submitted at 2021-04-20 02:48:44
The latest p

The latest post is submitted at 2021-04-20 16:09:12
The latest post is submitted at 2021-04-20 16:12:48
The latest post is submitted at 2021-04-20 16:15:43
The latest post is submitted at 2021-04-20 16:18:15
The latest post is submitted at 2021-04-20 16:21:01
The latest post is submitted at 2021-04-20 16:22:57
The latest post is submitted at 2021-04-20 16:25:34
The latest post is submitted at 2021-04-20 16:28:18
The latest post is submitted at 2021-04-20 16:30:41
The latest post is submitted at 2021-04-20 16:32:55
The latest post is submitted at 2021-04-20 16:34:39
The latest post is submitted at 2021-04-20 16:36:56
The latest post is submitted at 2021-04-20 16:39:53
The latest post is submitted at 2021-04-20 16:43:01
The latest post is submitted at 2021-04-20 16:47:23
The latest post is submitted at 2021-04-20 16:51:54
The latest post is submitted at 2021-04-20 16:56:58
The latest post is submitted at 2021-04-20 17:01:29
The latest post is submitted at 2021-04-20 17:06:55
The latest p

The latest post is submitted at 2021-04-22 04:17:21
The latest post is submitted at 2021-04-22 04:35:24
The latest post is submitted at 2021-04-22 04:53:28
The latest post is submitted at 2021-04-22 05:16:40
The latest post is submitted at 2021-04-22 05:34:42
The latest post is submitted at 2021-04-22 05:59:06
The latest post is submitted at 2021-04-22 06:18:23
The latest post is submitted at 2021-04-22 06:35:06
The latest post is submitted at 2021-04-22 06:49:49
The latest post is submitted at 2021-04-22 07:05:24
The latest post is submitted at 2021-04-22 07:20:38
The latest post is submitted at 2021-04-22 07:32:58
The latest post is submitted at 2021-04-22 07:47:32
The latest post is submitted at 2021-04-22 07:57:46
The latest post is submitted at 2021-04-22 08:02:52
The latest post is submitted at 2021-04-22 08:08:53
The latest post is submitted at 2021-04-22 08:17:37
The latest post is submitted at 2021-04-22 08:30:56
The latest post is submitted at 2021-04-22 08:44:06
The latest p

The latest post is submitted at 2021-04-23 11:43:37
The latest post is submitted at 2021-04-23 12:00:20
The latest post is submitted at 2021-04-23 12:17:28
The latest post is submitted at 2021-04-23 12:34:28
The latest post is submitted at 2021-04-23 12:53:24
The latest post is submitted at 2021-04-23 13:19:53
The latest post is submitted at 2021-04-23 13:41:04
The latest post is submitted at 2021-04-23 14:00:15
The latest post is submitted at 2021-04-23 14:20:48
The latest post is submitted at 2021-04-23 14:39:44
The latest post is submitted at 2021-04-23 14:59:32
The latest post is submitted at 2021-04-23 15:15:53
The latest post is submitted at 2021-04-23 15:32:18
The latest post is submitted at 2021-04-23 15:48:14
The latest post is submitted at 2021-04-23 16:08:54
The latest post is submitted at 2021-04-23 16:25:27
The latest post is submitted at 2021-04-23 16:44:31
The latest post is submitted at 2021-04-23 17:04:54
The latest post is submitted at 2021-04-23 17:23:58
The latest p

The latest post is submitted at 2021-04-26 13:18:36
The latest post is submitted at 2021-04-26 13:48:57
The latest post is submitted at 2021-04-26 14:24:04
The latest post is submitted at 2021-04-26 14:53:50
The latest post is submitted at 2021-04-26 15:19:12
The latest post is submitted at 2021-04-26 15:49:19
The latest post is submitted at 2021-04-26 16:25:27
The latest post is submitted at 2021-04-26 16:52:22
The latest post is submitted at 2021-04-26 17:24:04
The latest post is submitted at 2021-04-26 17:55:07
The latest post is submitted at 2021-04-26 18:20:57
The latest post is submitted at 2021-04-26 18:44:28
The latest post is submitted at 2021-04-26 19:06:37
The latest post is submitted at 2021-04-26 19:39:18
The latest post is submitted at 2021-04-26 20:14:40
The latest post is submitted at 2021-04-26 20:43:21
The latest post is submitted at 2021-04-26 21:07:57
The latest post is submitted at 2021-04-26 21:35:26
The latest post is submitted at 2021-04-26 22:13:56
The latest p

The latest post is submitted at 2021-04-30 03:36:32
The latest post is submitted at 2021-04-30 04:48:07
The latest post is submitted at 2021-04-30 05:56:28
The latest post is submitted at 2021-04-30 06:49:20
The latest post is submitted at 2021-04-30 07:44:01
The latest post is submitted at 2021-04-30 08:29:18
The latest post is submitted at 2021-04-30 09:05:28
The latest post is submitted at 2021-04-30 09:49:32
The latest post is submitted at 2021-04-30 10:25:35
The latest post is submitted at 2021-04-30 11:04:38
The latest post is submitted at 2021-04-30 11:39:26
The latest post is submitted at 2021-04-30 12:17:28
The latest post is submitted at 2021-04-30 12:45:43
The latest post is submitted at 2021-04-30 13:11:30
The latest post is submitted at 2021-04-30 13:20:18
The latest post is submitted at 2021-04-30 13:24:52
The latest post is submitted at 2021-04-30 13:30:55
The latest post is submitted at 2021-04-30 13:41:52
The latest post is submitted at 2021-04-30 13:55:09
The latest p

The latest post is submitted at 2021-05-02 18:30:22
The latest post is submitted at 2021-05-02 18:58:05
The latest post is submitted at 2021-05-02 19:25:25
The latest post is submitted at 2021-05-02 19:52:05
The latest post is submitted at 2021-05-02 20:24:22
The latest post is submitted at 2021-05-02 20:57:42
The latest post is submitted at 2021-05-02 21:30:40
The latest post is submitted at 2021-05-02 22:10:41
The latest post is submitted at 2021-05-02 22:52:09
The latest post is submitted at 2021-05-02 23:34:34
The latest post is submitted at 2021-05-03 00:30:29
The latest post is submitted at 2021-05-03 01:26:20
The latest post is submitted at 2021-05-03 02:24:01
The latest post is submitted at 2021-05-03 03:13:22
The latest post is submitted at 2021-05-03 03:56:37
The latest post is submitted at 2021-05-03 04:56:50
The latest post is submitted at 2021-05-03 05:45:30
The latest post is submitted at 2021-05-03 06:39:42
The latest post is submitted at 2021-05-03 07:17:02
The latest p

The latest post is submitted at 2021-05-04 07:09:12
The latest post is submitted at 2021-05-04 07:10:52
The latest post is submitted at 2021-05-04 07:12:54
The latest post is submitted at 2021-05-04 07:14:51
The latest post is submitted at 2021-05-04 07:16:54
The latest post is submitted at 2021-05-04 07:18:54
The latest post is submitted at 2021-05-04 07:21:21
The latest post is submitted at 2021-05-04 07:23:52
The latest post is submitted at 2021-05-04 07:26:44
The latest post is submitted at 2021-05-04 07:28:46
The latest post is submitted at 2021-05-04 07:30:32
The latest post is submitted at 2021-05-04 07:32:55
The latest post is submitted at 2021-05-04 07:35:08
The latest post is submitted at 2021-05-04 07:37:20
The latest post is submitted at 2021-05-04 07:40:12
The latest post is submitted at 2021-05-04 07:42:45
The latest post is submitted at 2021-05-04 07:45:29
The latest post is submitted at 2021-05-04 07:48:38
The latest post is submitted at 2021-05-04 07:51:57
The latest p

The latest post is submitted at 2021-05-04 22:26:51
The latest post is submitted at 2021-05-04 22:30:17
The latest post is submitted at 2021-05-04 22:33:37
The latest post is submitted at 2021-05-04 22:35:34
The latest post is submitted at 2021-05-04 22:37:51
The latest post is submitted at 2021-05-04 22:40:18
The latest post is submitted at 2021-05-04 22:42:46
The latest post is submitted at 2021-05-04 22:45:13
The latest post is submitted at 2021-05-04 22:48:28
The latest post is submitted at 2021-05-04 22:52:12
The latest post is submitted at 2021-05-04 22:55:51
The latest post is submitted at 2021-05-04 22:59:39
The latest post is submitted at 2021-05-04 23:03:30
The latest post is submitted at 2021-05-04 23:06:59
The latest post is submitted at 2021-05-04 23:09:58
The latest post is submitted at 2021-05-04 23:12:55
The latest post is submitted at 2021-05-04 23:15:09
The latest post is submitted at 2021-05-04 23:16:52
The latest post is submitted at 2021-05-04 23:17:28
The latest p

The latest post is submitted at 2021-05-05 13:53:12
The latest post is submitted at 2021-05-05 14:02:02
The latest post is submitted at 2021-05-05 14:10:39
The latest post is submitted at 2021-05-05 14:19:25
The latest post is submitted at 2021-05-05 14:29:53
The latest post is submitted at 2021-05-05 14:39:54
The latest post is submitted at 2021-05-05 14:50:03
The latest post is submitted at 2021-05-05 14:59:49
The latest post is submitted at 2021-05-05 15:09:52
The latest post is submitted at 2021-05-05 15:19:04
The latest post is submitted at 2021-05-05 15:28:30
The latest post is submitted at 2021-05-05 15:35:16
The latest post is submitted at 2021-05-05 15:43:13
The latest post is submitted at 2021-05-05 15:51:46
The latest post is submitted at 2021-05-05 16:00:43
The latest post is submitted at 2021-05-05 16:10:45
The latest post is submitted at 2021-05-05 16:20:41
The latest post is submitted at 2021-05-05 16:29:29
The latest post is submitted at 2021-05-05 16:38:59
The latest p

The latest post is submitted at 2021-05-06 21:00:44
The latest post is submitted at 2021-05-06 21:07:52
The latest post is submitted at 2021-05-06 21:12:57
The latest post is submitted at 2021-05-06 21:17:03
The latest post is submitted at 2021-05-06 21:24:05
The latest post is submitted at 2021-05-06 21:31:20
The latest post is submitted at 2021-05-06 21:37:53
The latest post is submitted at 2021-05-06 21:45:00
The latest post is submitted at 2021-05-06 21:52:04
The latest post is submitted at 2021-05-06 22:00:08
The latest post is submitted at 2021-05-06 22:07:50
The latest post is submitted at 2021-05-06 22:14:45
The latest post is submitted at 2021-05-06 22:22:38
The latest post is submitted at 2021-05-06 22:30:11
The latest post is submitted at 2021-05-06 22:37:48
The latest post is submitted at 2021-05-06 22:47:51
The latest post is submitted at 2021-05-06 22:58:38
The latest post is submitted at 2021-05-06 23:11:12
The latest post is submitted at 2021-05-06 23:25:45
The latest p

The latest post is submitted at 2021-05-08 00:21:51
The latest post is submitted at 2021-05-08 00:33:43
The latest post is submitted at 2021-05-08 00:45:32
The latest post is submitted at 2021-05-08 00:55:51
The latest post is submitted at 2021-05-08 01:07:29
The latest post is submitted at 2021-05-08 01:23:15
The latest post is submitted at 2021-05-08 01:36:28
The latest post is submitted at 2021-05-08 01:51:47
The latest post is submitted at 2021-05-08 02:08:08
The latest post is submitted at 2021-05-08 02:22:37
The latest post is submitted at 2021-05-08 02:40:13
The latest post is submitted at 2021-05-08 02:59:33
The latest post is submitted at 2021-05-08 03:20:02
The latest post is submitted at 2021-05-08 03:41:10
The latest post is submitted at 2021-05-08 04:05:26
The latest post is submitted at 2021-05-08 04:30:27
The latest post is submitted at 2021-05-08 04:52:13
The latest post is submitted at 2021-05-08 05:14:07
The latest post is submitted at 2021-05-08 05:29:52
The latest p

The latest post is submitted at 2021-05-08 22:29:21
The latest post is submitted at 2021-05-08 22:31:35
The latest post is submitted at 2021-05-08 22:33:39
The latest post is submitted at 2021-05-08 22:35:28
The latest post is submitted at 2021-05-08 22:36:48
The latest post is submitted at 2021-05-08 22:38:29
The latest post is submitted at 2021-05-08 22:40:22
The latest post is submitted at 2021-05-08 22:43:51
The latest post is submitted at 2021-05-08 22:44:57
The latest post is submitted at 2021-05-08 22:45:37
The latest post is submitted at 2021-05-08 22:46:34
The latest post is submitted at 2021-05-08 22:47:27
The latest post is submitted at 2021-05-08 22:48:59
The latest post is submitted at 2021-05-08 22:51:04
The latest post is submitted at 2021-05-08 22:52:46
The latest post is submitted at 2021-05-08 22:53:51
The latest post is submitted at 2021-05-08 22:54:59
The latest post is submitted at 2021-05-08 22:55:50
The latest post is submitted at 2021-05-08 22:56:42
The latest p

The latest post is submitted at 2021-05-09 06:36:20
The latest post is submitted at 2021-05-09 06:42:20
The latest post is submitted at 2021-05-09 06:48:27
The latest post is submitted at 2021-05-09 06:55:07
The latest post is submitted at 2021-05-09 06:59:50
The latest post is submitted at 2021-05-09 07:05:08
The latest post is submitted at 2021-05-09 07:08:12
The latest post is submitted at 2021-05-09 07:11:02
The latest post is submitted at 2021-05-09 07:14:35
The latest post is submitted at 2021-05-09 07:17:56
The latest post is submitted at 2021-05-09 07:21:22
The latest post is submitted at 2021-05-09 07:24:30
The latest post is submitted at 2021-05-09 07:28:03
The latest post is submitted at 2021-05-09 07:30:55
The latest post is submitted at 2021-05-09 07:34:37
The latest post is submitted at 2021-05-09 07:38:19
The latest post is submitted at 2021-05-09 07:42:03
The latest post is submitted at 2021-05-09 07:45:22
The latest post is submitted at 2021-05-09 07:48:36
The latest p

In [34]:
cur.execute('''SELECT subreddit,COUNT(*), COUNT(DISTINCT author) FROM Posts
                GROUP BY subreddit''')

print(cur.fetchall())

[('CryptoCurrency', 172461, 84948), ('GME', 145611, 40526), ('Superstonks', 146, 64), ('dogecoin', 576871, 169071), ('finance', 2941, 1267), ('options', 16589, 10251), ('pennystocks', 48514, 24938), ('stock', 100, 74), ('stocks', 23200, 13268), ('wallstreetbets', 954311, 499428)]


## Update the database by adding links.

In [4]:
cur.execute('''ALTER TABLE Posts
                ADD COLUMN ext_link TEXT''')


<sqlite3.Cursor at 0x2116f30cea0>

In [45]:
def dataUpdatePipeline(after, before, sub, conn):
    features = ['url', 'id']
    cursor = conn.cursor()
    while after < before:
        data = getPushShiftData(after, before, sub)
        if not data:
            print("There are no data anymore.")
            return 1
        for datum in data:
            cursor.execute('''UPDATE Posts
                                SET ext_link = ?
                                WHERE id = ? AND subreddit = ?'''
                              , (datum['url'], datum['id'], sub))
        
        after = getLatestTime(data) + 1
        conn.commit()
        print("The latest post is submitted at", dt.datetime.fromtimestamp(after-1))
        time.sleep(0.1)

def findStartingTime(cursor, subreddit):
    cursor.execute('''SELECT MAX(created) FROM Posts
                    WHERE subreddit = ? AND ext_link IS NOT NULL''', (subreddit,))
    datatimes = cursor.fetchone()
    
    return datatimes[0]

def getYourExistingSubs(cursor):
    cur.execute('''SELECT subreddit FROM Posts GROUP BY subreddit ORDER BY COUNT(*) ASC''')

    subreddits = list(map(lambda x: x[0], cur.fetchall()))
    
    return subreddits


In [46]:
#This chunk gives you all the subreddits you have scraped up to this point. Makes your life easier.
subreddits = getYourExistingSubs(cur)


In [40]:
for subreddit in subreddits:
    flag = 0
    print("Start working on {}".format(subreddit))
    end = int(time.time()-86400) #I subtracted by one day, so that we have some buffer.
    start = dt.datetime(2021,1,1).timestamp()

    dataTime = findStartingTime(cur,subreddit)
    if dataTime:
        start = max(start, dataTime)
    while True:
        try:
            dataUpdatePipeline(start, end, subreddit, conn)
            break
        except KeyboardInterrupt:
            print("Interrupted by keyboard. Stopping.")
            flag = 1
            break
        except:
            print("Error occurred. Probably due to frequent requests. Will resume working in 1 seconds.")
            dataTime = findStartingTime(cur,subreddit)
            if dataTime:
                start = max(start, dataTime)
            time.sleep(1)
    
    if flag:
        break
            
            

Start working on Superstonks
The latest post is submitted at 2021-04-25 20:14:12
The latest post is submitted at 2021-05-09 10:09:15
There are no data anymore.
Start working on finance
The latest post is submitted at 2021-05-10 08:35:06
There are no data anymore.
Start working on options
The latest post is submitted at 2021-05-10 09:56:31
There are no data anymore.
Start working on pennystocks
The latest post is submitted at 2021-05-10 10:01:37
There are no data anymore.
Start working on GME
The latest post is submitted at 2021-05-09 19:10:47
The latest post is submitted at 2021-05-10 00:54:21
The latest post is submitted at 2021-05-10 07:37:07
The latest post is submitted at 2021-05-10 09:46:56
The latest post is submitted at 2021-05-10 10:01:04
There are no data anymore.
Start working on CryptoCurrency
The latest post is submitted at 2021-04-14 15:45:13
The latest post is submitted at 2021-04-14 16:57:51
The latest post is submitted at 2021-04-14 18:25:35
The latest post is submitted

In [37]:
cur.execute('''SELECT subreddit FROM Posts GROUP BY subreddit ORDER BY COUNT(*) ASC''')

subreddits = list(map(lambda x: x[0], cur.fetchall()))

subreddits

['Superstonks',
 'finance',
 'options',
 'pennystocks',
 'GME',
 'CryptoCurrency',
 'dogecoin',
 'wallstreetbets']

# Using PRAW to Download Additional Features

In this section, we will use praw to scrap post body, score, and upvote_ratio.

**Please use your own client_id, secret, etc. since we might be scraping at the same time. I don't want to get this account locked up.**


In [41]:
import praw

reddit = praw.Reddit(
    client_id="kxbUr-4PyE7DlQ",
    client_secret="Q5rIAPS9IHZ1QgOIkHNY09Y9VMxDsA",
    password="AACAXZDE",
    user_agent="testscript by u/kc_the_scraper",
    username="kc_the_scraper",
)

In [42]:
Sub = reddit.submission(id='eiev5d')
print(Sub.title, Sub.score, Sub.upvote_ratio)

#print(vars(Sub))

ben simmons needs a new team. 196 0.75


In [43]:
subreddit = 'options'
cur.execute('''SELECT id FROM Posts
                WHERE subreddit == ?
                LIMIT 10''', (subreddit,))

ids = cur.fetchall()


In [44]:
tStart = time.time()
for i,postId in enumerate(ids):
    Sub = reddit.submission(id=postId[0])
    print(Sub.selftext, Sub.score, Sub.upvote_ratio, Sub.url)
    

tEnd = time.time()

print(tEnd-tStart)

This morning I created a new scanner that alerts me when MACD gives a trend reversal signal and $STX was one of those that came up. I looked at chart, looks good. I go to buy call contracts to see what that looks like. Then I see every strike selling for .50+ but then there is one selling for .01 that went down -.50 that day even though the stock was having a good day. I'm just curious as to what happened here. I of course bought ASAP because I can get 50 contracts instead of 1 for the same price  and almost same OTM strike. (Expiration is Jan 08). Also, happy new years! 36 0.83 https://www.reddit.com/r/options/comments/ko6pbd/stx_69_call_dropped_from_60_premium_to_01_premium/
[removed] 1 1.0 https://www.reddit.com/r/options/comments/ko70kp/my_small_options_account/
[deleted] 5 0.74 https://www.reddit.com/r/options/comments/ko70sb/tax_considerations_for_assignment_on_ccs/
[removed] 1 1.0 https://www.reddit.com/r/options/comments/ko93x0/hk/
[removed] 1 1.0 https://www.reddit.com/r/optio

In [46]:
cur.execute('''CREATE TABLE IF NOT EXISTS PostBodyAndScore (
                    id TEXT PRIMARY KEY,
                    body TEXT,
                    score INT,
                    upvote_ratio FLOAT)''')

<sqlite3.Cursor at 0x13c979e88f0>

In [65]:
def timeLookupPRAW(cursor, subreddit):
    cursor.execute('''SELECT MIN(P.created), MAX(P.created)
                        FROM Posts P JOIN PostBodyAndScore B
                        ON P.id = B.id
                        WHERE subreddit = ?''', (subreddit,))
    return cursor.fetchone()

def retrieveID(cursor, num, subreddit, start, end):
    cursor.execute('''SELECT P.id
                        FROM (SELECT id, created FROM Posts
                                WHERE created >= ? AND created <= ? AND subreddit = ?) P LEFT JOIN PostBodyAndScore B
                        ON P.id = B.id
                        WHERE B.score IS NULL
                        ORDER BY P.created ASC
                        LIMIT ?''', (start,end, subreddit, num))
    
    return cursor.fetchall()

def storePostBodyAndScore(cursor, reddit, ids):
    for i,postId in enumerate(ids):
        Sub = reddit.submission(id=postId[0])
        cursor.execute('''INSERT OR IGNORE INTO PostBodyAndScore
                            VALUES (?,?,?,?)''', (postId[0],Sub.selftext,Sub.score,Sub.upvote_ratio))
        
    print("Finished adding post info for this batch.")
    conn.commit()

def fillOutTable(cursor, reddit, subreddit, start, end, num = 500):
    delta = 86400
    
    _, latest = timeLookupPRAW(cursor,subreddit)
    
    start = max(start,latest)
    
    while start < end:
        ids = retrieveID(cursor, num, subreddit, start, start + delta)
        print(ids)
        if not ids:
            start += delta
            print("==Finished all posts up to {}==".format(dt.datetime.fromtimestamp(start)))
            continue
        
        storePostBodyAndScore(cursor, reddit, ids)

    

In [66]:
subreddits = getYourExistingSubs(cur)
end = int(time.time()-86400)
start = dt.datetime(2021,1,1).timestamp()

for subreddit in subreddits:
    fillOutTable(cur, reddit, subreddit, start, end)


[]
==Finished all posts up to 2021-05-10 11:13:30==
[]
==Finished all posts up to 2021-05-10 09:31:19==
[('lg7dbe',), ('lg7dme',), ('lg7do5',), ('lg7emf',), ('lg7er1',), ('lg7er4',), ('lg7f35',), ('lg7glc',), ('lg7guh',), ('lg7h7d',), ('lg7hkm',), ('lg7i28',), ('lg7ii0',), ('lg7int',), ('lg7iw0',), ('lg7jev',), ('lg7k37',), ('lg7kdx',), ('lg7kwj',), ('lg7lv6',), ('lg7lw6',), ('lg7mng',), ('lg7mwp',), ('lg7myp',), ('lg7n31',), ('lg7nrc',), ('lg7nrt',), ('lg7o4m',), ('lg7o8y',), ('lg7obh',), ('lg7owb',), ('lg7owc',), ('lg7p6y',), ('lg7pau',), ('lg7pd7',), ('lg7ptl',), ('lg7pu8',), ('lg7qv7',), ('lg7qx9',), ('lg7r5q',), ('lg7rnb',), ('lg7ro3',), ('lg7rq6',), ('lg7rsv',), ('lg7s4t',), ('lg7sx4',), ('lg7t9x',), ('lg7ted',), ('lg7tks',), ('lg7unx',), ('lg7wf0',), ('lg7wqj',), ('lg7ww0',), ('lg7wwt',), ('lg7wx0',), ('lg7xif',), ('lg7xjo',), ('lg7xjp',), ('lg7y0t',), ('lg7ycw',), ('lg7yhy',), ('lg7yn9',), ('lg7yol',), ('lg7z32',), ('lg7zml',), ('lg80k8',), ('lg80w9',), ('lg8134',), ('lg81ij',)

KeyboardInterrupt: 