In [123]:
from config import db_config
from collections import defaultdict
import pyodbc
from beem.account import Account

In [124]:
connection = pyodbc.connect(driver=db_config['driver'],
                            server=db_config['server'],
                            database=db_config['database'],
                            uid=db_config['uid'],
                            pwd=db_config['pwd'])
cursor = connection.cursor()

In [125]:
def to_sql_list(x):
    return "('" + "', '".join(x) + "')"

In [126]:
query = """\
SELECT TOP 100 author
FROM Comments (NOLOCK)
WHERE depth = 0 AND
      category in ('utopian-io', 'dtube', 'dlive')
ORDER BY NEWID()"""

content_creators = [row[0] for row in cursor.execute(query)]
with open('content_creators.txt', 'w') as f:
    f.write('\n'.join(content_creators))

In [127]:
query = """\
SELECT TOP 100 account
FROM (
       SELECT SUBSTRING(body, CHARINDEX('@', body) + 1, CHARINDEX(' leads', body) - CHARINDEX('@', body) - 1) as account, *
       FROM Comments (NOLOCK)
       WHERE depth > 0 AND
             author = 'guard' AND CONTAINS(body, 'phishing')) C
GROUP BY account
ORDER BY COUNT(*) DESC"""

scammers = [row[0] for row in cursor.execute(query)]
with open('scammers.txt', 'w') as f:
    f.write('\n'.join(scammers))

In [128]:
query = """\
SELECT TOP 100 author
FROM Comments (NOLOCK)
WHERE depth = 1 AND
      created BETWEEN GETUTCDATE() - 90 AND GETUTCDATE() AND
      ((CONTAINS (body, 'nice') AND body LIKE 'nice') OR
       (CONTAINS (body, 'beautiful') AND body LIKE 'beautiful') OR
       (CONTAINS (body, 'upvoted') AND body LIKE 'upvoted'))
GROUP BY author
ORDER BY COUNT(*) DESC"""

comment_spammers = [row[0] for row in cursor.execute(query)]
with open('comment_spammers.txt', 'w') as f:
    f.write('\n'.join(comment_spammers))

In [129]:
bid_bots = list(map(str.strip, open('bid_bots.txt', 'r').readlines()))
full_list = content_creators + scammers + comment_spammers + bid_bots
d = defaultdict(lambda: defaultdict(int))

In [130]:
for name in full_list:
    account = Account(name)
    foll = account.get_follow_count()
    d[name]['name'] = name
    d[name]['followers'] = foll['follower_count']
    d[name]['followings'] = foll['following_count']
    d[name]['follow ratio'] = foll['following_count'] / foll['follower_count']
    d[name]['muters'] = len(account.get_muters())
    d[name]['reputation'] = account.get_reputation()
    d[name]['effective sp'] = account.get_steem_power()
    own_sp = account.get_steem_power(onlyOwnSP=True)
    d[name]['own sp'] = own_sp
    d[name]['sp ratio'] = account.get_steem_power() / own_sp if own_sp > 0 else 0

In [131]:
query = """\
SELECT
  name,
  curation_rewards,
  posting_rewards,
  witnesses_voted_for
FROM Accounts (NOLOCK) a
WHERE name in """ + to_sql_list(full_list)

for row in cursor.execute(query):
    name = row[0]
    curation_rewards = row[1] / 1000.0
    posting_rewards = row[2] / 1000.0
    witnesses_voted_for = row[3]   
    d[name]['curation_rewards'] = curation_rewards
    d[name]['posting_rewards'] = posting_rewards
    d[name]['witnesses_voted_for'] = witnesses_voted_for

In [132]:
query = """\
SELECT
  author,
  COUNT(*),
  AVG(LEN(body))
FROM Comments (NOLOCK) a
WHERE depth = 0 AND
      created BETWEEN GETUTCDATE() - 90 AND GETUTCDATE()
      AND author in """ + to_sql_list(full_list) + """
GROUP BY author"""

In [133]:
for row in cursor.execute(query):
    name = row[0]
    posts = row[1]
    average_post_len = row[2]
    d[name]['posts'] = posts
    d[name]['average_post_len'] = average_post_len

In [134]:
query="""\
SELECT
  author,
  COUNT(*),
  AVG(LEN(body)),
  CAST(SUM(CASE WHEN body LIKE '%http%' THEN 1 ELSE 0 END) as DECIMAL(10, 3)) / COUNT(*)
FROM Comments (NOLOCK) a
WHERE depth > 0 AND
      created BETWEEN GETUTCDATE() - 90 AND GETUTCDATE()
      AND author in """ + to_sql_list(full_list) + """
GROUP BY author
"""

for row in cursor.execute(query):
    name = row[0]
    comments = row[1]
    average_comment_len = row[2]
    comments_with_link_ratio = row[3]
    d[name]['comments'] = comments
    d[name]['average_comment_len'] = average_comment_len
    d[name]['comments_with_link_ratio'] = comments_with_link_ratio
    d[name]['posts_to_comments_ratio'] = d[name]['posts'] / comments if comments > 0 else 0

In [135]:
def to_class(name):
    if name in content_creators:
        return 0
    elif name in scammers:
        return 1
    elif name in comment_spammers:
        return 2
    else:
        return 3
    
for name in full_list:
    d[name]['class'] = to_class(name)

In [136]:
columns = ['name', 'followers', 'followings', 'follow ratio', 'muters',
           'reputation', 'effective sp', 'own sp', 'sp ratio', 'curation_rewards',
          'posting_rewards', 'witnesses_voted_for', 'posts', 'average_post_len', 'comments',
          'average_comment_len', 'comments_with_link_ratio', 'posts_to_comments_ratio', 'class']

with open ('data.csv' , 'w') as f:
    f.write(','.join(columns) + '\n')
    for name in full_list:
        row = [d[name][column] for column in columns]
        f.write(','.join(map(str, row)) + '\n')

In [137]:
import pandas as pd
df = pd.read_csv('data.csv', index_col=None, sep=",")

In [138]:
df

Unnamed: 0,name,followers,followings,follow ratio,muters,reputation,effective sp,own sp,sp ratio,curation_rewards,posting_rewards,witnesses_voted_for,posts,average_post_len,comments,average_comment_len,comments_with_link_ratio,posts_to_comments_ratio,class
0,alucare,556,119,0.214029,1,58.640845,4.417161e+02,441.716135,1.000000,21.275,433.544,4,111,630,387,97,0.074935,0.286822,0
1,ayufitri,644,318,0.493789,4,46.679335,1.503135e+01,8.926123,1.683973,0.037,17.479,30,301,1016,717,125,0.054393,0.419805,0
2,imh3ll,87,26,0.298851,0,53.589527,5.795137e-01,67.930278,0.008531,0.545,132.534,0,0,0,2,24,0.000000,0.000000,0
3,andeladenaro,205,75,0.365854,0,56.414916,9.938178e+01,99.381785,1.000000,0.158,195.882,1,0,0,0,0,0.000000,0.000000,0
4,shenoy,1225,1576,1.286531,3,65.416593,2.577685e+03,2063.983517,1.248888,46.552,2581.548,0,88,568,41,34,0.048780,2.146341,0
5,tidylive,542,70,0.129151,2,64.583380,8.252583e+02,825.258289,1.000000,18.131,1891.259,1,88,496,61,99,0.245902,1.442623,0
6,arpita182,26,0,0.000000,0,27.253055,1.505696e+01,0.173166,86.950855,0.000,0.143,0,0,0,0,0,0.000000,0.000000,0
7,world-of-music,45,1,0.022222,5,15.326558,5.008056e+00,0.100673,49.745673,0.000,0.000,0,0,0,0,0,0.000000,0.000000,0
8,mirnasahara,1145,2038,1.779913,2,47.750057,1.563195e+01,15.631949,1.000000,0.128,29.303,0,46,74,12,29,0.000000,3.833333,0
9,haveaniceday,57,2,0.035088,0,42.550673,8.587180e-01,0.858718,1.000000,0.000,2.137,0,0,0,0,0,0.000000,0.000000,0
