In [None]:
#!pip install py2neo
#!pip install nlp_rake
#!pip install pytextrank
#!python3 -m pip install -U pip
#!python3 -m pip install -r requirements.txt
#!python3 -m spacy download en_core_web_sm

In [6]:
import pandas as pd
from py2neo import Graph,Node,Relationship
from py2neo.bulk import create_nodes
import re
from py2neo.bulk import create_relationships
import nltk
import re
import collections 
import datetime

In [22]:
# Read Reddit post on wallstreetbets subreddit
df_p = pd.read_csv('data/wsb-aug-2021-posts.csv', usecols=None,names=['type', 'p_id', 'subid',
                    'name', 'nsfw','p_created','p_permalink','domain','url','selftext','p_title','p_score'],skiprows=1)

In [23]:
# Retain relevant columns
df_p = df_p[['p_id','p_created','p_permalink','p_title','p_score']]
df_p['p_created'] = df_p['p_created'].apply(lambda x: datetime.datetime.fromtimestamp(x).strftime('%d'))
df_p.shape

(25751, 5)

In [25]:
# Read Comments to reddit posts file
df_c = pd.read_csv('data/wsb-aug-2021-comments.csv', usecols=None, names=['type', 'c_id', 'subid',
                    'name', 'nsfw','c_created','c_permalink','c_body','c_sentiment','c_score'],skiprows=1)

In [26]:
# Retain relevant columns
df_c=df_c[['c_id','c_created','c_permalink','c_body','c_score']]
df_c['parentid']=df_c.c_permalink.str.slice(49,55) # Extract original post id from permalink
df_c['c_created'] = df_c['c_created'].apply(lambda x: datetime.datetime.fromtimestamp(x).strftime('%d'))
df_c.shape

(1001160, 6)

In [27]:
df_p.head()

Unnamed: 0,p_id,p_created,p_permalink,p_title,p_score
0,pfi0x7,31,https://old.reddit.com/r/wallstreetbets/commen...,Is BABA the next?,1
1,pfhz92,31,https://old.reddit.com/r/wallstreetbets/commen...,$TELL- According to Wall Street Journal its a ...,1
2,pfhxzc,31,https://old.reddit.com/r/wallstreetbets/commen...,IS BABA next?,1
3,pfhw6s,31,https://old.reddit.com/r/wallstreetbets/commen...,1.4K to 7.K overnight on FIVN puts. Thanks ZM!,79
4,pfhtyf,31,https://old.reddit.com/r/wallstreetbets/commen...,1.4K to 7.1K overnight on FIVN puts. Thanks ZM!,2


In [28]:
df_c.head()

Unnamed: 0,c_id,c_created,c_permalink,c_body,c_score,parentid
0,hb4hdm3,31,https://old.reddit.com/r/wallstreetbets/commen...,What's updog,3,pfdkjw
1,hb4hdm8,31,https://old.reddit.com/r/wallstreetbets/commen...,Don’t tell em,1,pfdkjw
2,hb4hdjc,31,https://old.reddit.com/r/wallstreetbets/commen...,"I realize this, ive been losing thousands shoo...",2,pfdkjw
3,hb4hdgo,31,https://old.reddit.com/r/wallstreetbets/commen...,then it tanks after earnings,4,pfgr1h
4,hb4hdeh,31,https://old.reddit.com/r/wallstreetbets/commen...,Are you saying I should or shouldn’t yolo my l...,1,pf3xee


### Data Cleanup and merging

In [29]:
df_p.shape, df_c.shape

((25751, 5), (1001160, 6))

In [30]:
#Dropping null values
df_c.dropna(inplace=True)

In [31]:
df_p.shape, df_c.shape

((25751, 5), (1001158, 6))

In [32]:
# Dropping rows with body and title as [removed]
df_p=df_p[~df_p.p_title.str.match(pat='\[?removed\]')]
df_c=df_c[~df_c.c_body.str.match(pat='\[?removed\]')]
df_p.shape,df_c.shape

((25751, 5), (857417, 6))

In [33]:
# Dropping rows with body and title as [deleted]
df_p=df_p[~df_p.p_title.str.match(pat='\[?deleted\]')]
df_c=df_c[~df_c.c_body.str.match(pat='\[?deleted\]')]
df_p.shape,df_c.shape

((25751, 5), (813950, 6))

In [34]:
# Dropping rows with body and title as [deleted]
df_p=df_p[~df_p.p_title.str.contains(pat='Your submission was removed')]
df_c=df_c[~df_c.c_body.str.contains(pat='Your submission was removed')]
df_p.shape,df_c.shape

((25751, 5), (798002, 6))

In [36]:
# Dropping rows with body and title as [deleted]
df_p=df_p[~df_p.p_title.str.contains('I am a bot from')]
df_c=df_c[~df_c.c_body.str.match('I am a bot from')]
df_p.shape,df_c.shape

((25751, 5), (796535, 6))

In [37]:
df_p.reset_index(inplace=True,drop=True)
df_c.reset_index(inplace=True,drop=True)

In [38]:
#Merge the dataframes
df_m=df_p.merge(df_c, left_on='p_id', right_on='parentid')

In [39]:
df_m.shape

(782307, 11)

In [40]:
df_m.reset_index(inplace=True,drop=True)
df_m.head()

Unnamed: 0,p_id,p_created,p_permalink,p_title,p_score,c_id,c_created,c_permalink,c_body,c_score,parentid
0,pfhw6s,31,https://old.reddit.com/r/wallstreetbets/commen...,1.4K to 7.K overnight on FIVN puts. Thanks ZM!,79,hb4grlq,31,https://old.reddit.com/r/wallstreetbets/commen...,How do people do this?,4,pfhw6s
1,pfhw6s,31,https://old.reddit.com/r/wallstreetbets/commen...,1.4K to 7.K overnight on FIVN puts. Thanks ZM!,79,hb4ga59,31,https://old.reddit.com/r/wallstreetbets/commen...,\n**User Report**| | | |\n:--|:--|:--|:--\n**T...,1,pfhw6s
2,pfhtyf,31,https://old.reddit.com/r/wallstreetbets/commen...,1.4K to 7.1K overnight on FIVN puts. Thanks ZM!,2,hb4ft1v,31,https://old.reddit.com/r/wallstreetbets/commen...,\n**User Report**| | | |\n:--|:--|:--|:--\n**T...,1,pfhtyf
3,pfhq3j,31,https://old.reddit.com/r/wallstreetbets/commen...,Does anyone know what the first stock symbol i...,4,hb4gzf2,31,https://old.reddit.com/r/wallstreetbets/commen...,"That company was delisted, the value is prob a...",3,pfhq3j
4,pfhq3j,31,https://old.reddit.com/r/wallstreetbets/commen...,Does anyone know what the first stock symbol i...,4,hb4g8h8,31,https://old.reddit.com/r/wallstreetbets/commen...,definitely means you’re gay bro.,6,pfhq3j


### Most popular Post in Dataset

In [41]:
print(f'''Most popular posts is {df_p[df_p.p_score == df_p.p_score.max()].p_title.values} 
with the score of {df_p.p_score.max()}''')

Most popular posts is ['My portfolio after discovering wsb'] 
with the score of 45414


### Most popular Comment in Dataset

In [42]:
print(f'''Most popular posts is {df_c[df_c.c_score == df_c.c_score.max()].c_body.values} 
      with the score of {df_c.c_score.max()}''')

Most popular posts is ['Ok so I\'d just remove the letter "k" from the post because if you do that then you\'ll only be down $15 instead of $15k, and that\'s a really manageable loss.'] 
      with the score of 21129


### Most commented post in Dataset

In [43]:
a=df_m['parentid'].mode()
title=df_m.p_title[df_m.p_id == 'p3sv76']
print(f'Most commented post is: {title[title.index[0]]}')

Most commented post is: Weekend Discussion Thread for the Weekend of August 13, 2021


## Keyword Extraction

In [44]:
# We had a problem of scale. Running keyword extraction, validation
# was not possible for 1M+ records so we decided to curtail our dataset

v = df_m.p_id.value_counts()
df_m=df_m[df_m.p_id.isin(v.index[v.gt(12000)])]
df_m.reset_index(drop=True, inplace=True)

In [45]:
print('Number of Posts:', len(df_m.p_id.unique()))
print('Number of Comments:', len(df_m.c_id.unique()))

Number of Posts: 8
Number of Comments: 122471


### Using RAKE (We decided to use rack over spacy for better output and performance)

In [46]:
#Download stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/asachan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [47]:
from nlp_rake import Rake
r = Rake(
    min_chars=2,
    max_words=5,
    min_freq=1
)

In [48]:
posts = df_m.p_title.unique()
titles = df_m.c_body.values
print('Number of unique posts: ',len(posts))
print('Number of unique titles: ',len(titles))

Number of unique posts:  8
Number of unique titles:  122471


In [49]:
%%time
# Get all stock symbol from title of the post.
title_ent=[]
for i in range(len(posts)):   
    keywords = r.apply(posts[i])
    if len(keywords) > 0:
        result = re.search("\$(\w+)", keywords[0][0])
        if result:
            title_ent.append(result.group(1))
print('Number of keywords extracted:', len(title_ent))

Number of keywords extracted: 0
CPU times: user 390 ms, sys: 127 ms, total: 518 ms
Wall time: 553 ms


In [50]:
%%time
# Get all stock symbol from the body of the comments.
body_ent=[]
for i in range(len(titles)):    
    keywords = r.apply(titles[i])
    if len(keywords) > 0:
        for i in range(len(keywords)):
            result = re.search("\$(\w+)", keywords[i][0])
            if result and result.group(1).isalpha():
                body_ent.append(result.group(1))
print('Number of keywords extracted:', len(body_ent))

Number of keywords extracted: 1456
CPU times: user 10min 53s, sys: 4.43 s, total: 10min 58s
Wall time: 11min 3s


In [154]:
title_ent=set(title_ent)
body_ent=set(body_ent)
m_com=title_ent.union(body_ent)
m_com = list(body_ent)

print('Number of companies mentioned in comments',len(body_ent))
print('Sample of companies stock symbol extracted', m_com[100:150])

Number of companies mentioned in comments 363
Sample of companies stock symbol extracted ['nvida', 'play', 'luv', 'bili', 'nflx', 'fvrr', 'maxr', 'ejh', 'geni', 'mdlz', 'fsm', 'amc', 'khc', 'ater', 'b', 'baba', 'cat', 'ge', 'jmia', 'hit', 'jack', 'su', 'vzio', 'deez', 'amzn', 'xl', 'ed', 'azz', 'uwmc', 'webr', 'spy', 'tsla', 'qs', 'zbra', 'afrm', 'pbya', 'pton', 'stla', 'manu', 'clne', 'root', 'gaymf', 'gld', 'mmnd', 'asml', 'tali', 'mu', 'et', 'atvi', 'ej']


### Using pytextrank and spacy (Performance and accuracy is not as good as RACK)

In [None]:
#https://towardsdatascience.com/keyword-extraction-a-benchmark-of-7-algorithms-in-python-8a905326d93f - Keyword extraction comparison
# import pytextrank
# import spacy

In [None]:
# nlp = spacy.load("en_core_web_sm")
# nlp.add_pipe("textrank")

In [None]:
# %%time
# ## Extract keywords from title of the post
# p_ent=[]
# for i in range(len(df_p)):
#     doc=nlp(df_p.p_title[i])
#     for ent in doc.ents:
#         p_ent.append(ent.text) # named entity and noun chunk yields same result

In [None]:
# %%time
# ## Extract keywords from body of the comments
# c_ent=[]
# for i in range(len(df_c)):
#     doc=nlp(df_c.c_body[i])
#     for ent in doc.ents:
#         c_ent.append(ent.text) # named entity and noun chunk yields same result

### Merging output of LDA with the dataframe

In [69]:
topics=pd.read_csv('Topics.csv')
topics['c_id']=topics['id']
topics = topics[['c_id','topic_label']]
temp1 = df_m
temp2= temp1.merge(topics,left_on='c_id', right_on='c_id')
temp2.shape,df_m.shape, topics.shape

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


((246881, 12), (122471, 11), (322607, 2))

In [71]:
temp2.dropna(inplace=True)

In [72]:
temp2.shape

(122467, 12)

In [73]:
df_m=temp2
df_m.reset_index(drop=True,inplace=True)

### Validating and getting stock price from finance.yahoo.com

In [74]:
%%time
import yfinance as yf
comp_prices = pd.DataFrame()
for comp in m_com:
    tick = yf.Ticker(comp)
    if tick:
        dat = tick.history(start='2021-08-01', end='2021-08-31')
        dat['Delta'] = dat['Close']-dat['Open']
        dat['Ticker'] = comp
        comp_prices = comp_prices.append(dat)
comp_prices = comp_prices.reset_index(drop=False)
df_prices=comp_prices[['Ticker','Date','Open','Close','Delta']]

- HOODRATS: No data found, symbol may be delisted
- CHOWN: No data found, symbol may be delisted
- MARGIN: No data found, symbol may be delisted
- FGAN: No data found, symbol may be delisted
- JUMP: No data found, symbol may be delisted
- SMOL: No data found for this date range, symbol may be delisted
- BOLDNESS: No data found, symbol may be delisted
- APPL: No data found for this date range, symbol may be delisted
- CLOVPOSTING: No data found, symbol may be delisted
- ATE: No data found for this date range, symbol may be delisted
- MOUTH: No data found, symbol may be delisted
- JIQQ: No data found, symbol may be delisted
- XX: No data found for this date range, symbol may be delisted
- AFGAN: No data found, symbol may be delisted
- POOP: No data found, symbol may be delisted
- NVIDA: No data found, symbol may be delisted
- HIT: No data found for this date range, symbol may be delisted
- DEEZ: No data found, symbol may be delisted
- TALI: No data found, symbol may be delisted
- EJ: No 

In [75]:
df_prices.head()

Unnamed: 0,Ticker,Date,Open,Close,Delta
0,sdc,2021-08-02,7.05,7.23,0.18
1,sdc,2021-08-03,7.21,6.98,-0.23
2,sdc,2021-08-04,6.87,6.77,-0.1
3,sdc,2021-08-05,6.81,6.95,0.14
4,sdc,2021-08-06,6.91,6.83,-0.08


In [76]:
print('Number of companies after validation: ', len(df_prices.Ticker.unique()))
comps=df_prices.Ticker.unique()

Number of companies after validation:  294


In [158]:
comps

array(['sdc', 'leg', 'adm', 'crm', 'grwg', 'd', 'tt', 'coty', 'cpop',
       'open', 'gdrx', 'abnb', 'amd', 'cvna', 'rgbp', 'pcg', 'fubo',
       'jblu', 'dq', 'nio', 'mj', 'u', 'rrgb', 'alb', 'wmt', 'crsr',
       'low', 'zim', 'psfe', 'dfen', 'roku', 'tup', 'panw', 'gme', 'dnut',
       'clov', 'cvs', 'paya', 'wix', 'dash', 'zm', 'spce', 'sndl', 'bark',
       'aso', 'unh', 'bud', 'ms', 'coin', 'base', 'mrna', 'lmt', 'ecvt',
       'spot', 'nkla', 'tal', 'lc', 'nvda', 'vxx', 'tr', 'corn', 'sony',
       'csco', 's', 'astr', 'dkng', 'uvxy', 'save', 'mnst', 'x', 'eat',
       'dis', 'pypl', 'snps', 'sq', 'igt', 'crnt', 'carr', 'coke', 'rblx',
       'me', 'clf', 'hood', 'azn', 'grom', 'play', 'luv', 'bili', 'nflx',
       'fvrr', 'maxr', 'ejh', 'geni', 'mdlz', 'fsm', 'amc', 'khc', 'ater',
       'b', 'baba', 'cat', 'ge', 'jmia', 'jack', 'su', 'vzio', 'amzn',
       'xl', 'ed', 'azz', 'uwmc', 'webr', 'spy', 'tsla', 'qs', 'zbra',
       'afrm', 'pbya', 'pton', 'stla', 'manu', 'clne', 'ro

## Graph Section

#### Project and dabase was manually created

In [77]:
#g = Graph("bolt://localhost:11006", auth=('neo4j', 'admin'))
g = Graph("bolt://localhost:11005", auth=('new', 'admin'))

In [78]:
# # Create Post nodes
# data=[]
# posts=df_p
# for i in range(len(posts)):
#     x=posts.id[i]
#     y=posts.title[i]
#     z=str(posts.score[i])
#     dict1 = {'id':x, 'title':y, 'score':z}
#     data.append(dict1)
# create_nodes(g.auto(), data, labels={"Post"})

# #Create comments nodes
# data=[]
# comments=df_c
# for i in range(len(comments)):
#     x=comments.id[i]
#     y=comments.body[i]
#     z=str(comments.score[i])
#     zz=comments.parentid[i]
#     dict1 = {'id':x, 'title':y, 'score':z,'parentid':zz}
#     data.append(dict1)

# from itertools import islice
# stream = iter(data)
# batch_size = 10000
# while True:
#     batch = islice(stream, batch_size)
#     if batch:
#         create_nodes(g.auto(), batch, labels={"Comment"})
#     else:
#         break


In [79]:
df_m.shape

(122467, 12)

In [80]:
# more cleanup to avoid special character issues.

df_m=df_m[~df_m.c_body.str.contains('User Report')]
df_m.reset_index(drop=True,inplace=True)
df_m.shape

(122467, 12)

In [93]:
df_m.head()

Unnamed: 0,p_id,p_created,p_permalink,p_title,p_score,c_id,c_created,c_permalink,c_body,c_score,parentid,topic_label
0,pcuv2j,27,https://old.reddit.com/r/wallstreetbets/commen...,Weekend Discussion Thread for the Weekend of A...,404,hb423kx,31,https://old.reddit.com/r/wallstreetbets/commen...,Sauce?,1,pcuv2j,Administrative Removal of Content
1,pcuv2j,27,https://old.reddit.com/r/wallstreetbets/commen...,Weekend Discussion Thread for the Weekend of A...,404,hb2eohe,31,https://old.reddit.com/r/wallstreetbets/commen...,Yep enjoying the hurrication 🌀,2,pcuv2j,Gamestop & AMC
2,pcuv2j,27,https://old.reddit.com/r/wallstreetbets/commen...,Weekend Discussion Thread for the Weekend of A...,404,hb08nna,30,https://old.reddit.com/r/wallstreetbets/commen...,"&amp;#x200B;\n\nSettle down, Nancy.",1,pcuv2j,Todays Occurrences
3,pcuv2j,27,https://old.reddit.com/r/wallstreetbets/commen...,Weekend Discussion Thread for the Weekend of A...,404,hb08dl8,30,https://old.reddit.com/r/wallstreetbets/commen...,So is Vandelay Industries,1,pcuv2j,Administrative Removal of Content
4,pcuv2j,27,https://old.reddit.com/r/wallstreetbets/commen...,Weekend Discussion Thread for the Weekend of A...,404,hazb0e6,30,https://old.reddit.com/r/wallstreetbets/commen...,I just want you to know that having not bought...,1,pcuv2j,Timing


In [127]:
%%time
# This code creates nodes and relationship between nodes
dict1={x:None for x in comps}
for i in range(len(df_m)):
    p_id = df_m.p_id[i]
    pnode = g.nodes.match("POST", p_id=p_id).first()
    a #node variable
    if not pnode:
        a = Node("POST", p_id=df_m.p_id[i], 
                 title=df_m.p_title[i], 
                 score=str(df_m.p_score[i]), 
                 created=str(df_m.p_created[i]))
        a.__primarylabel__ = "POST"
        a.__primarykey__ = "p_id"
    else:
        a=pnode
    
    b = Node("COMMENT", c_id=df_m.c_id[i], 
             body=df_m.c_body[i], 
             score=str(df_m.c_score[i]), 
             parentid=df_m.parentid[i], 
             created=str(df_m.c_created[i]),
             topic=df_m.topic_label[i])
    b.__primarylabel__ = "COMMENT"
    b.__primarykey__ = "c_id"
    HAS = Relationship.type("HAS")
    g.merge(HAS(a, b)) # Relations between post and comment
    
    
# Post has no company mentioned hence removing that block of code:

# Matching company mentioned in comment's body with extracted and validated company list
    
    keywords = r.apply(df_m.c_body[i])
    m_com=[]
     
    if len(keywords) > 0:
        for i in range(len(keywords)):
            result = re.search("\$(\w+)", keywords[i][0])
            if result and result.group(1).isalpha():
                m_com.append(result.group(1))

        for i in m_com:
            tt=[]
            for x in i.split():
                if x in comps:  
                    compnode = g.nodes.match("COMPANY", name=x).first()
                    c=None # company node variable
                    if not compnode:
                        c = Node("COMPANY", name=x)
                        c.__primarylabel__ = "COMPANY"
                        c.__primarykey__ = "name"
                    else:
                        c=compnode
                    
                    MENTIONS = Relationship.type("MENTIONS")
                    g.merge(MENTIONS(b, c)) # Relations between comment and companies mentioned
                    
                    sp=df_prices[df_prices.Ticker==x]
                    sp.reset_index(drop=True,inplace=True)
                    if not dict1[x]:
                        
                        dict1[x]=True
                        for j in range(len(sp)):
                            d = Node("STOCKPRICE", created=str(sp.Date[j]), 
                                     name=x+str(sp.Date[j]),
                                     sopen=str(sp.Open[j]), 
                                     sclose=str(sp.Close[j]), 
                                     diff=str(sp.Delta[j]))
                            d.__primarylabel__ = "STOCKPRICE"
                            d.__primarykey__ = "name"
                            ACTIVITY = Relationship.type("ACTIVITY")
                            g.merge(ACTIVITY(c, d)) # Relations between company and price movement

CPU times: user 26min 59s, sys: 50.7 s, total: 27min 50s
Wall time: 2h 1min 56s


In [125]:
# g.delete_all()

In [129]:
g.nodes.match("COMMENT").count()

122467

## Graph Queries

### Most popular Post

In [130]:
g.run("match (p:POST)  return p.p_id, p.title,p.score order by p.score desc limit 1")

p.p_id,p.title,p.score
pak91h,"Daily Discussion Thread for August 24, 2021",502


### Most popular Comment

In [131]:
g.run("match (p:COMMENT) return p.c_id,p.body, p.score order by p.score desc limit 1")

p.c_id,p.body,p.score
halme9b,"Hit half mil net worth today, thank you JPOW and WSB.",98


### Most Commented Post

In [132]:
g.run("match (p:POST) -[e:HAS] -> (c:COMMENT) return p.p_id,p.title, count(e) as num order by num desc limit 1")

p.p_id,p.title,num
p3sv76,"Weekend Discussion Thread for the Weekend of August 13, 2021",19259


### Most mentioned company

In [133]:
g.run("match (c:COMMENT) -[e:MENTIONS] -> (com:COMPANY) return com.name as Name, count(e) as num order by num desc limit 1")

Name,num
hood,199


### Lifetime of posts

In [134]:

g.run("match (p:POST) -[e:HAS] ->(c:COMMENT) return p.title,toInteger(c.created)-toInteger(p.created), count(c) as num order by num desc")


p.title,toInteger(c.created)-toInteger(p.created),num
"Daily Discussion Thread for August 19, 2021",0,13272
"Daily Discussion Thread for August 24, 2021",0,12550
"Daily Discussion Thread for August 04, 2021",0,12277


### Stock price - social activity corelation

In [152]:
g.run("match (po:POST) -[e:HAS] ->(c:COMMENT),\
(com:COMPANY) -[e1:ACTIVITY]-(s:STOCKPRICE),\
(c) - [e2:MENTIONS] -> (com)\
where toFloat(s.sopen) > 0  and po.created = substring(s.created,8,2)\
return po.title,count(com) as numComments, \
      com.name,po.created, \
      substring(s.created,8,2), \
      (toFloat(s.diff) / toFloat(s.sopen)) * 100 as val \
      order by val desc ")

po.title,numComments,com.name,po.created,"substring(s.created,8,2)",val
"Weekend Discussion Thread for the Weekend of August 27, 2021",5,bbig,27,27,50.14164967962419
"Weekend Discussion Thread for the Weekend of August 27, 2021",1,ater,27,27,47.14423931778589
"Daily Discussion Thread for August 04, 2021",164,hood,4,4,29.27456088771682


In [150]:
g.run("match (po:POST) -[e:HAS] ->(c:COMMENT),\
(com:COMPANY) -[e1:ACTIVITY]-(s:STOCKPRICE),\
(c) - [e2:MENTIONS] -> (com)\
where toFloat(s.sopen) > 0  and po.created = substring(s.created,8,2)\
return po.title,count(com) as numComments, \
      com.name,po.created, \
      substring(s.created,8,2), \
      (toFloat(s.diff) / toFloat(s.sopen)) * 100 as val \
      order by val asc ")

po.title,numComments,com.name,po.created,"substring(s.created,8,2)",val
"Weekend Discussion Thread for the Weekend of August 13, 2021",1,grwg,13,13,-12.308549415414372
"Daily Discussion Thread for August 24, 2021",1,dats,24,24,-12.011576585155805
"Daily Discussion Thread for August 04, 2021",1,ejh,4,4,-10.319094991031694
