In [1]:
import json
import re
import operator
import string


# Class to append value in a dictionary
class MyDictionary(dict):

    # __init__ function
    def __init__(self):
        self = dict()

    # Function to add key:value
    def add(self, key, value):
        self[key] = value


class Relevancy(object):
    def __init__(self, file_name, tag):
        self.tag = tag
        self.file_name = file_name
        with open(self.file_name, "r") as f:
            self.data = json.load(f)

    # Python3 code to pre-processing the string
    # Intending to remove all punctuation and common words
    def pre_process(self, str):
        # removing punctuation ---> using string module
        translator = str.maketrans('', '', string.punctuation)
        str = str.translate(translator)

        # converting all into lower cases
        str = str.lower()
        
        # removing numbers and digits
        # str = ''.join([i for i in str if not i.isdigit()])
        regex = re.compile('[^a-zA-Z]')
        #First parameter is the replacement, second parameter is your input string
        str = regex.sub(' ', str)

        # removing prep, conj, articles ---> using re module
        str = re.sub('(\s+)(a|an|and|the|this|that|these|those|i|would|could|should|m|ve)(\s+)', ' ', str)
        str = re.sub('(\s+)(to|for|from|in|into|under|with|within|below|up|down|of|on|s|t)(\s+)', ' ', str)
        str = re.sub('(\s+)(are|may|by|as|we|or|it|be|which|the|when|make|no|set|your|its|it\'s)(\s+)', ' ', str)
        str = re.sub('(\s+)(if|any|used|all|has|have|new|data|at|code|node|state|-|they|our)(\s+)', ' ', str)
        str = re.sub('(\s+)(you|must|every|each|not|what|one|then|way|so|will|also|is|can|"|")(\s+)', ' ', str)
        str = re.sub('(\s+)(their|was|more|other|use|do|need|my|some|get|out|many|had|here|over)(\s+)', ' ', str)
        return str

    # Python3 code to find frequency of each word
    # function for calculating the frequency
    def freq(self, str):
        str = self.pre_process(str)

        # break the string into list of words
        str_list = str.split()

        # gives set of unique words
        unique_words = set(str_list)
        frequency = MyDictionary()
        for word in unique_words:
            frequency.add(word, str_list.count(word))

        # sort by value (downwards) ---> using operator module
        sorted_freq = sorted(frequency.items(), key=operator.itemgetter(1), reverse=True)

        # collect first 10 items in dictionary
        # first_few_items = {k: sorted_freq[k] for k in list(sorted_freq)[:10]}
        # btw, sorted_freq is a list
        return sorted_freq[0:20]  # , len(sorted_freq)

    # merge two lists toether without appending same word
    # if same word found, increases count
    def merge_two_lists(self, a, b):
        new_list = {}
        for pair in a + b:
            key, value = pair
            new_list[key] = new_list.get(key, 0) + value
        new_list = [[key, value] for key, value in new_list.items()]
        return new_list

    # convert a tuple into a list
    def tuple_to_list(self, listname):
        a = []
        for i in range(0, len(listname)):
            a.append(list(listname[i]))
        return a

    # For sorting a list of lists using 2nd item
    def second_item(self, item):
        return item[1]

    # for getting a list of words for a particular tag
    def list_of_most_used_words(self):
        freq_list = []
        for key in self.data:
            if self.tag in key['title'].lower():
                str = key['content']
                freq_list = self.merge_two_lists(freq_list, self.tuple_to_list(self.freq(str)))

        freq_list.sort(key=self.second_item, reverse=True)
        reduced_list = []
        for i in freq_list:
            if i[1] > 1000:
                reduced_list.append(i)
        return reduced_list

    # Return number of Relevant posts
    def relevant_post(self):
        relevant = 0
        for key in self.data:
            if self.tag in key['title'].lower():
                relevant += 1
        return relevant

In [4]:
# application code
if __name__ == "__main__":
    file_name = "../data/individual_tag_data/ethereum.json"
    tag = 'ethereum'
    keyword = Relevancy(file_name, tag)
    print("Number of Relevant post is:" + str(keyword.relevant_post()))
    print(keyword.list_of_most_used_words())

Number of Relevant post is:882
[['ethereum', 10881], ['blockchain', 3915], ['contract', 3501], ['network', 2199], ['but', 2152], ['smart', 2118], ['bitcoin', 1917], ['transaction', 1607], ['like', 1390], ['block', 1254], ['token', 1168], ['transactions', 1126], ['tokens', 1126], ['contracts', 1116], ['c', 1100], ['x', 1026], ['there', 1015]]


## Relevancy Search in percentage

In [5]:
import json
from math import floor
count = 0
file_name = "../data/individual_tag_data/ethereum.json"
with open(file_name, "r") as f:
    data = json.load(f)
relevant = 0
for key in data:
    if 'ethereum' in key['title'].lower():
        relevant += 1
for key in data:
    count += 1      
p = floor(relevant/count * 100)
print(relevant)
print(count)

882
901


## Better relevancy search using finding tags

In [6]:
import json
from math import floor
count = 0
file_name = "../data/individual_tag_data/ethereum.json"
with open(file_name, "r") as f:
    data = json.load(f)
relevant = 0
for key in data:
    count += 1
    tags = key['tags']
    for item in tags: 
        if 'ethereum' in item.lower():
            relevant += 1
print(relevant)
print(count)

882
901


In [None]:
# input all relevant posts into a single file

In [17]:
import json
import os

final_count = 0
final_json_data = []

file_name_list = ["ethereum.json", "blockchain.json", "smart-contract.json",
                      "solidity.json", "vyper.json",
                      "metamask.json", "bitcoin.json"]
tag_list = ["ethereum", "blockchain", "smart contract", "solidity", "vyper",
                "metamask", "bitcoin"]

for file_name, tag in zip(file_name_list, tag_list):

    json_data  = json.load(open(os.path.join("../data/individual_tag_data/", file_name)))

    for post in json_data:
        try:
            tags = post['tags']
        except:
            pass
        for item in tags: 
            if tag in item.lower():
                final_json_data.append(key)
                count += 1
    print("Number of total post for ", tag, "is =", count)

    # Output the updated file with pretty JSON                                      
open("final_all_post_data.json", "w").write(
        json.dumps(final_json_data, sort_keys=True, indent=4, separators=(',', ': '))
    )
print("The number of total post is: ", count)

Number of total post for  ethereum is = 13166
Number of total post for  blockchain is = 13517
Number of total post for  smart contract is = 13866
Number of total post for  solidity is = 14650
Number of total post for  vyper is = 14678
Number of total post for  metamask is = 14974
Number of total post for  bitcoin is = 15255
The number of total post is:  15255


In [18]:
### just some testing, deleted file later

# import json
# data  = json.load(open("_test_.json"))
# new = []

# for key in data:
#     count += 1
#     tags = key['ename']
#     for item in tags: 
#         if 'egg' in item.lower():
#             new.append(key)

# new = json.dumps(new)
# new

In [None]:
# file_tag = {"ethereum-390.json":"ethereum", "vyper-272.json":"vyper",
#                 "bitcoin-281.json":"bitcoin", "smart-contract-369":"smart contract",
#                 "blockchain-351.json":"blockchain", "solidity-374":"solidity"}
# for x,y in file_tag.items():
#     print(x, y)

## Posts over time

In [156]:
#year_list = ['2019', '2018', '2017', '2016', '2015', '2014']
#month_list = ['January', 'February', 'March', 'April', 'May', 'June', 'July',
            # 'August', 'September', 'October', 'November', 'December']
freq = {"2019":0, "2018":0, "2017":0, "2016":0, "2015":0, "2014":0, "missing":0}
date_list = ['27 April, 2018', '17 December', '12 May, 2017', '17 January',
            '16 June, 2018', '16 April', '', '16 April', '11 May, 2016', '', '17 June, 2018',
             '11 May, 2016', '27 April, 2015', '16 April']
for date in date_list:
    if ',' in date:
        y = date.split(',')[1]
        y = y.lstrip()
        freq[y] += 1
    elif date=='':
        freq["missing"] += 1
    else:
        freq["2019"] += 1
        
freq

{'2019': 5,
 '2018': 3,
 '2017': 1,
 '2016': 2,
 '2015': 1,
 '2014': 0,
 'missing': 2}

In [23]:
a = '2016-11-27'
a = a.split("-")
a

['2016', '11', '27']

In [None]:
# Panda DataFrame for Medium Posts
file_name = "metamask-350.json"
dates = []
with open(file_name, "r") as f:
    data = json.load(f)
import pandas as pd
df = pd.DataFrame.from_dict(data, orient='columns')
#df[['post_date', 'upvotes']]
for key in data:
    dates.append(key['post_date'])
if dates[26]=='':
    print(True)

In [29]:
te = [
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
          "Name": "Bala1",
          "phone": "None"
      }      
    ]

unique = { each['Name'] : each for each in te }
a = list(unique.values())
with open("test_3.json","w") as fp:
    json.dump(a, fp)
print(a)

[{'Name': 'Bala', 'phone': 'None'}, {'Name': 'Bala1', 'phone': 'None'}]


In [19]:
### Removing Duplicates

In [20]:
with open("../data/final_all_post_data.json","r") as f:
    data = json.load(f)
unique_post = {each['title'] : each for each in data}
listed_dictionary_items = list(unique_post.values())
with open("../data/final_data_removing_duplicacy.json","w") as f:
    json.dump(listed_dictionary_items, f)
print("# of post after removing duplicay = ", len(listed_dictionary_items))

# of post after removing duplicay =  2156


## Associative Tag analysis

In [21]:
import json
with open("../data/related_data_rm_duplicacy.json","r") as f:
    data = json.load(f)
count = 0
tag_list = []
for key in data:
    tag_list.append([i for i in key['tags']])
    count += 1

# 2 ways to remove nested lists
from itertools import chain
break_nested_list = list(chain(*tag_list))
print(len(break_nested_list))
# flattened  = [val for sublist in list_of_lists for val in sublist]
unique_list_of_tags = list(set(break_nested_list))
print(len(unique_list_of_tags))

from collections import Counter
associated_with_tag = list(set([item for items in tag_list if "Solidity" in items for item in items]))
top_tags_for_tag = Counter([item for items in tag_list if "Solidity" in items for item in items])

top_tags_only = Counter([item for items in tag_list for item in items])

associated_with_smart_contract = list(set([item for items in tag_list if "Smart Contracts"\
                                           in items for item in items or "Smart Contract" in items for item in items]))
top_tags_for_smart_contracts = Counter(([item for items in tag_list if "Smart Contracts"\
                                           in items for item in items or "Smart Contract" in items for item in items]))
#print(associated_with_tag)
print(top_tags_for_smart_contracts)

18866
2590
Counter({'Smart Contracts': 5818, 'Ethereum': 4168, 'Blockchain': 3880, 'Solidity': 1871, 'Security': 1442, 'Cryptocurrency': 675, 'Audit': 420, 'Bitcoin': 392, 'ICO': 315, 'Tutorial': 309, 'Dapps': 294, 'Truffle': 232, 'Web3': 227, 'Erc20': 220, 'Programming': 203, 'Technology': 194, 'Development': 183, 'Smart Contract Security': 153, 'Crypto': 147, 'Token Sale': 139, 'Blockchain Technology': 132, 'Blockchain Development': 123, 'Token': 104, 'Decentralization': 90, 'Fintech': 90, 'JavaScript': 86, 'Insurance': 80, 'Open Source': 75, 'Ethereum Blockchain': 71, 'Cybersecurity': 64, 'Neo': 59, 'Startup': 55, 'Hacking': 54, 'Vulnerability': 50, 'Cryptography': 49, 'Smart Contracts Tutorial': 45, 'Real Estate': 45, 'Solidity Tutorial': 45, 'Tech Blog': 45, 'Tokenization': 45, 'Software Development': 44, 'Legaltech': 44, 'Eos': 44, 'Vyper': 44, 'Remix': 42, 'Security Token': 40, 'Law': 39, 'Openzeppelin': 38, 'Gochain': 35, 'Tech': 35, 'Security Audit': 35, 'Finance': 34, 'Coding

### Tag frequency count

In [22]:
import json
with open("../data/related_data_rm_duplicacy.json","r") as f:
    data = json.load(f)
    
tag_list = ["ethereum", "blockchain", "smart contract", "smart contracts", "solidity", "erc20",\
            "web3", "security", "cryptocurrency", "metamask", "dapp", "dapps", "truffle",\
            "tokens",  "vyper",  "ethereum blockchain",\
            "bitcoin", "ico", "programming", "audit", "decentralization", \
             ]

for i in tag_list:
    count = 0
    for key in data:
        if i in [item.lower() for item in key['tags']]:
            count += 1
            # print(key['title'])
    print(i, "=", count)

ethereum = 2643
blockchain = 2585
smart contract = 46
smart contracts = 1228
solidity = 907
erc20 = 467
web3 = 401
security = 476
cryptocurrency = 659
metamask = 292
dapp = 15
dapps = 235
truffle = 185
tokens = 4
vyper = 27
ethereum blockchain = 76
bitcoin = 369
ico = 265
programming = 132
audit = 119
decentralization = 117


In [23]:
import json
with open("../data/related_data_rm_duplicacy.json","r") as f:
    data = json.load(f)
response_list = []
for key in data:
    response_list.append(key['responses'])
    count += 1
response_freq = Counter([item for item in response_list])
print(len(response_list))
print("Average= ", sum(response_list)/len(response_list))
print(response_freq.most_common())
# print(sorted(response_list, reverse=True))

4398
Average=  1.7710322874033653
[(0, 2900), (1, 634), (2, 269), (3, 156), (4, 107), (5, 72), (7, 43), (8, 39), (6, 38), (9, 25), (10, 17), (11, 15), (12, 11), (14, 9), (17, 5), (13, 5), (21, 4), (16, 4), (23, 3), (15, 3), (18, 3), (33, 2), (20, 2), (39, 2), (26, 2), (152, 1), (43, 1), (79, 1), (25, 1), (73, 1), (32, 1), (19, 1), (601, 1), (68, 1), (24, 1), (22, 1), (31, 1), (55, 1), (51, 1), (53, 1), (72, 1), (29, 1), (40, 1), (165, 1), (106, 1), (62, 1), (27, 1), (28, 1), (130, 1), (682, 1), (57, 1), (135, 1), (139, 1)]


### Avg claps and voters for individual tag

In [24]:
import json
from collections import Counter
with open("../data/related_data_rm_duplicacy.json","r") as f:
    data = json.load(f)

clap_list = []
voter_list = []
data_dict = dict()
for key in data:
    clap_list.append(key['claps'])
    voter_list.append(key['voters'])
for i,j in zip(clap_list,voter_list):
    data_dict[i] = j

def resp_clap_voter_1(data, tag):
    new_clap = []
    new_voter = []
    new_response = []
    for key in data:
        for i in key['tags']:
            if tag in i:
                new_clap.append(key['claps'])
                new_voter.append(key['voters'])
                new_response.append(key['responses'])
    
    max_response = max(new_response)
    total_response = sum(new_response)
    avg_response = sum(new_response)/len(new_response)
    
    max_clap = max(new_clap)
    total_clap = sum(new_clap)
    avg_clap = sum(new_clap)/len(new_clap)
    
    max_voter = max(new_voter)
    total_voter = sum(new_voter)
    avg_voter = sum(new_voter)/len(new_voter)
    
    return total_response, round(avg_response,2), total_clap, round(avg_clap,2),\
                         total_voter, round(avg_voter,2)

tags_list = ["Solidity", "Web3", "Ethereum", "Truffle", "Security", "Metamask", "Remix",\
             "Ethereum Blockchain", "Blockchain", "Myetherwallet", "Erc20", "Token Sale",\
             "Cryptocurrency", "Bitcoin", "Ico", "ICO", "Programming", "Audit", "Decentralization", "Crypto"]
for tag in tags_list:
    a,b,c,d,e,f = resp_clap_voter_1(data, tag)
    print(tag, "-->", a, "&", b, "&", c, "&", d, "&", e, "&", f)

Solidity --> 1544 & 1.62 & 277219 & 290.28 & 27946 & 29.26
Web3 --> 793 & 1.75 & 194062 & 429.34 & 18545 & 41.03
Ethereum --> 6609 & 2.37 & 1079876 & 388.03 & 103255 & 37.1
Truffle --> 270 & 1.13 & 24904 & 104.64 & 2832 & 11.9
Security --> 533 & 0.81 & 126802 & 193.59 & 10808 & 16.5
Metamask --> 223 & 0.76 & 63727 & 216.02 & 5005 & 16.97
Remix --> 20 & 1.18 & 2696 & 158.59 & 260 & 15.29
Ethereum Blockchain --> 170 & 2.24 & 54727 & 720.09 & 3781 & 49.75
Blockchain --> 6048 & 2.06 & 1240826 & 423.2 & 105287 & 35.91
Myetherwallet --> 19 & 0.76 & 3476 & 139.04 & 198 & 7.92
Erc20 --> 1805 & 3.63 & 415720 & 836.46 & 27067 & 54.46
Token Sale --> 108 & 0.84 & 49746 & 385.63 & 3340 & 25.89
Cryptocurrency --> 1728 & 2.48 & 401889 & 577.43 & 28758 & 41.32
Bitcoin --> 1917 & 5.04 & 277454 & 730.14 & 28147 & 74.07
Ico --> 5 & 1.0 & 2018 & 403.6 & 149 & 29.8
ICO --> 635 & 2.4 & 173873 & 656.12 & 12223 & 46.12
Programming --> 250 & 1.71 & 60490 & 414.32 & 6205 & 42.5
Audit --> 52 & 0.31 & 23434 & 141

In [25]:
def resp_clap_voter_1(data, tag1, tag2):
    new_clap = []
    new_voter = []
    new_response = []
    for key in data:
        for i in key['tags']:
            if tag1 in i or tag2 in i:
                new_clap.append(key['claps'])
                new_voter.append(key['voters'])
                new_response.append(key['responses'])
    
    max_response = max(new_response)
    total_response = sum(new_response)
    avg_response = sum(new_response)/len(new_response)
    
    max_clap = max(new_clap)
    total_clap = sum(new_clap)
    avg_clap = sum(new_clap)/len(new_clap)
    
    max_voter = max(new_voter)
    total_voter = sum(new_voter)
    avg_voter = sum(new_voter)/len(new_voter)
    
    return total_response, round(avg_response), total_clap, round(avg_clap),\
                            total_voter, round(avg_voter)

tags_list = [["Smart Contracts", "Smart Contract"], ["Dapps", "Dapp"], ["Token", "Tokens"]]
for tags in tags_list:
    a,b,c,d,e,f = resp_clap_voter_1(data, tags[0], tags[1])
    print(tags[0], "-->", a, "&", b, "&", c, "&", d, "&", e, "&", f)

Smart Contracts --> 2287 & 2 & 436707 & 302 & 44678 & 31
Dapps --> 450 & 2 & 105216 & 384 & 8973 & 33
Token --> 612 & 2 & 198150 & 503 & 13264 & 34


## Vulnerability Search

In [26]:
import json
with open("../data/related_data_rm_duplicacy.json") as f:
    post_data = json.load(f)

word_list = ["security", "vulnerability", "vulnerable", "reentrancy", "re entrancy", "re-entrancy", "race condition",\
             "denial of service", "DoS", "transaction order", "transactions order", "trasaction order depend",\
             "transaction-ordering dependence", "timestamp dependence", "integer overflow", "integer underflow",\
             "data validation", "access control", "authentication", "api inconsistency", "food", "life", "television"]
# word_list = ["transaction order"]
for i in word_list:
    count = 0
    for key in post_data:
        if i in key['content'] or i in key['title'].lower() or i in key['tags']:
            count += 1
            # print(key['title'])
    print(i, "=", count)

security = 1429
vulnerability = 266
vulnerable = 204
reentrancy = 73
re entrancy = 0
re-entrancy = 28
race condition = 29
denial of service = 25
DoS = 73
transaction order = 18
transactions order = 0
trasaction order depend = 0
transaction-ordering dependence = 6
timestamp dependence = 13
integer overflow = 54
integer underflow = 10
data validation = 0
access control = 42
authentication = 104
api inconsistency = 0
food = 125
life = 452
television = 9


In [3]:
word_list = ["security", "vulnerability"]
# word_list = ["transaction order"]

count = 0
for key in post_data:
    if (word_list[0] in key['content'] or i in key['title'].lower() or i in key['tags'])\
                and (word_list[1] in key['content'] or i in key['title'].lower() or i in key['tags']):
        count += 1
        # print(key['title'])
print("Sec and Vul", "=", count)

Sec and Vul = 226


In [28]:
import json
with open("../data/related_data_rm_duplicacy.json") as f:
    post_data = json.load(f)

word_list = ["chocolate"]
# word_list = ["transaction order"]
for i in word_list:
    count = 0
    for key in post_data:
        if i in key['content'] or i in key['title'].lower() or i in key['tags']:
            count += 1
            print(key['title'])
    print(i, "=", count)

Blockchain Oracles Will Make Smart Contracts Fly
The smart contracts powering blockimmo
Building Blocks — Chocolatey
How Truffle Teams Will Change Enterprise Blockchain Development
Red Elephant Eggnog Truffle Review
The Truffle: Both Chocolate and Other
Looking for a new favorite ‘go to’ snack? Give herbal truffles a try
Water Truffles Go Boozy!
Pastry Chef School: Painting Truffles, Making Lollipops & Caramels
Pastry Chef School: Chocolate Truffles
Green Tea Dark Chocolate Truffles
These PROTEIN Chocolate Truffles Basically Make Themselves
50 States & 50 Foods ~ Luscious Oregon Hazelnut Truffles
Ethereum Development On Windows
Rum truffles
Truffle Deep Dive: Ethereum’s Swiss Army Knife
Using Truffle with Quorum on SAP Cloud Platform
3 ingredient Oreo Truffle Pops — Little pieces of heaven
Chocolate truffles that are made with real truffles!!
Almond chocolate truffle Easter eggs
Truffle
Healthy Festive Truffle Recipe
Make Like It’s Your Birthday With These No-Bake Oreo Truffles
SkinnyM

In [29]:
import json
with open("../data/related_data_rm_duplicacy.json") as f:
    post_data = json.load(f)

word_list = ["x86"]
# word_list = ["transaction order"]
for i in word_list:
    count = 0
    for key in post_data:
        if i in key['content'] or i in key['title'].lower() or i in key['tags']:
            count += 1
            print(key['title'])
    print(i, "=", count)

Cirrus Sidechain & Smart Contracts in C# Released
Qtum’s One-Year Anniversary — Looking Back and Ahead
ContractPedia: An Encyclopedia of 40+ Smart Contract Platforms
Smart Contract and Virtual Machine
Upgradable Solidity Contract Design
HackPedia: 16 Solidity Hacks/Vulnerabilities, their Fixes and Real World Examples
Making a Birthday Contract.
Parity Wallet Security Alert — Vulnerability in the Parity Wallet library contract
The Concept of Progress, Farming Truffles, and Saving Tuna: Lux Recommends #59
Ganache — Truffle Framework
Building Wholly Graal with Truffle!
【Ethereum 智能合約開發筆記】不用自己跑節點，使用 Infura 和 web3.js 呼叫合約
What’s New in the Web3.py v4 Beta
Build An Etheruem Bridge -Part 1
How to Compile and Deploy an ERC20 Contract on Ewasm
ChangeNOW’s speaking: SONM is onboard!
Introducing the New Swace Token (ERC20)
A Practical Guide to Smart Contract Security Tools. Part 3: Mythril
Analysis on KickICO hack: part 1
Analysis on KickICO hack: part 2
Parity Wallet Hack 2: Electric Boogaloo
x8

In [30]:
import json
with open("../data/related_data_rm_duplicacy.json") as f:
    post_data = json.load(f)

word_dict = {"food": "truffle"}
# word_list = ["transaction order"]
for i in word_dict:
    count = 0
    for key in post_data:
        if i in key['content'] and word_dict[i] in key['content']:
            count += 1
            print(key['title'])
    print(i, "=", count)

GoNetwork Tutorial — Welcome to the first of a series of technical blog posts from GoNetwork.
Istria: The Land of Wine, Truffles, and Olive Oil
A Beginner’s Guide to Truffles (and ‘Truffle Oil’)
The Journey of Truffle Salt
Black Truffles Imperiled by Climate Change
My Last Hundred Bucks: Drinks, Truffle Oil, and Sushi
Hunting for Truffles in France
The Truffle: Both Chocolate and Other
Looking for a new favorite ‘go to’ snack? Give herbal truffles a try
Water Truffles Go Boozy!
Green Tea Dark Chocolate Truffles
What Do Tulips, Truffles, Diamonds, and Speakeasies Have in Common?
50 States & 50 Foods ~ Luscious Oregon Hazelnut Truffles
How Kung Fu Fighting, Truffles, And A Piece Of Ham Can Teach Us All We Need To Know About Making Vast Amounts Of Money
The Importance of sharing your ideas
Rum truffles
Debugging Smart Contracts with Truffle Debugger: A Practical Approach
A Black Truffle Tale from France
Behind the scenes of that favourite “truffle beef bowl” — a UX case study
Praise the I

### Mention of Individual tools

In [31]:
sec_tool_list = ["mythril", "mythx", "mythos", "oyente", "solhint", "solium", "ethlint",\
                 "securify", "teether", "smartcheck", "manticore", "sonarsolidity", "ethir",\
                 "maian", "solcheck", "solgraph", "solint", "vandal", "contractfuzzer",\
                 "rattle", "sasc", "zeus", "contractlarva", "echinda", "ethertrust", "fsolidm",\
                 "octopus", "osiris", "reguard", "scompile", "slither", "surya", "sūrya", "verisolid",\
                 "verx", "vultron", "checks-effects-interactions", "ethertrust", "smartanvil"]
def tool_mention(data, tool):
    count = 0
    for key in post_data:
        if tool in key['content'].lower() or i in key['title'].lower() or i in key['tags']:
            count += 1
            # print(key['title'])
    print(tool, "=", count)
    
for i in sec_tool_list:
    tool_mention(post_data, i)

mythril = 58
mythx = 35
mythos = 5
oyente = 64
solhint = 39
solium = 31
ethlint = 5
securify = 46
teether = 2
smartcheck = 57
manticore = 16
sonarsolidity = 0
ethir = 2
maian = 3
solcheck = 5
solgraph = 7
solint = 6
vandal = 2
contractfuzzer = 1
rattle = 1
sasc = 2
zeus = 1
contractlarva = 1
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 3
osiris = 0
reguard = 0
scompile = 33
slither = 10
surya = 5
sūrya = 1
verisolid = 2
verx = 0
vultron = 0
checks-effects-interactions = 17
ethertrust = 0
smartanvil = 0


In [42]:
import json
with open("../data/related_data_rm_duplicacy.json") as f:
    post_data = json.load(f)

sec_tool_list = ["mythril", "mythx", "mythos", "oyente", "solhint", "solium", "ethlint",\
                 "securify", "teether", "smartcheck", "manticore", "sonarsolidity", "ethir",\
                 "maian", "solcheck", "solgraph", "solint", "vandal", "contractfuzzer",\
                 "rattle", "sasc", "zeus", "contractlarva", "echinda", "ethertrust", "fsolidm",\
                 "octopus", "osiris", "reguard", "scompile", "slither", "surya", "sūrya", "verisolid",\
                 "verx", "vultron", "ethertrust", "smartanvil"]
# sec_tool_list = ["sūrya"]
for i in sec_tool_list:
    count = 0
    for key in post_data:
        if (i in key['content'].lower() or i in key['title'].lower() or i in key['tags']) \
                            and (("transaction ordering dependency" in key['content'].lower() or i in key['title'].lower())\
                                or ("transaction-ordering dependency" in key['content'].lower() or i in key['title'].lower())\
                                or ("transaction ordering dependent" in key['content'].lower() or i in key['title'].lower())):
                                 #or ("timestamp-depend" in key['content'].lower() or i in key['title'].lower())):
            count += 1
            
    print(i, "=", count)

mythril = 3
mythx = 14
mythos = 0
oyente = 3
solhint = 1
solium = 3
ethlint = 0
securify = 3
teether = 0
smartcheck = 2
manticore = 0
sonarsolidity = 0
ethir = 0
maian = 1
solcheck = 0
solgraph = 0
solint = 0
vandal = 0
contractfuzzer = 0
rattle = 0
sasc = 1
zeus = 0
contractlarva = 0
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 0
osiris = 0
reguard = 0
scompile = 0
slither = 2
surya = 0
sūrya = 0
verisolid = 0
verx = 0
vultron = 0
ethertrust = 0
smartanvil = 0


In [2]:
for i in sec_tool_list:
    count = 0
    for key in post_data:
        if (i in key['content'].lower() or i in key['title'].lower() or i in key['tags']) \
                            and (("timestamp dependency" in key['content'].lower() or i in key['title'].lower())\
                                or ("timestamp dependence" in key['content'].lower() or i in key['title'].lower())\
                                or ("timestamp-dependency" in key['content'].lower() or i in key['title'].lower())\
                                or ("timestamp-dependence" in key['content'].lower() or i in key['title'].lower())):
                                 #or ("timestamp-depend" in key['content'].lower() or i in key['title'].lower())):
            count += 1
            
    print(i, "=", count)

mythril = 5
mythx = 14
mythos = 0
oyente = 26
solhint = 26
solium = 6
ethlint = 0
securify = 20
teether = 0
smartcheck = 34
manticore = 1
sonarsolidity = 0
ethir = 0
maian = 1
solcheck = 1
solgraph = 1
solint = 0
vandal = 0
contractfuzzer = 1
rattle = 0
sasc = 1
zeus = 0
contractlarva = 0
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 0
osiris = 0
reguard = 0
scompile = 29
slither = 3
surya = 1
sūrya = 0
verisolid = 0
verx = 0
vultron = 0
ethertrust = 0
smartanvil = 0


In [33]:
for i in sec_tool_list:
    count = 0
    for key in post_data:
        if (i in key['content'].lower() or i in key['title'].lower() or i in key['tags']) \
                            and (("reentrancy" in key['content'].lower() or i in key['title'].lower())\
                                or ("re-entrancy" in key['content'].lower() or i in key['title'].lower())\
                                or ("reentrance" in key['content'].lower() or i in key['title'].lower())\
                                or ("re-entrance" in key['content'].lower() or i in key['title'].lower())):
                                 #or ("timestamp-depend" in key['content'].lower() or i in key['title'].lower())):
            count += 1
            
    print(i, "=", count)

mythril = 18
mythx = 18
mythos = 0
oyente = 37
solhint = 34
solium = 10
ethlint = 0
securify = 26
teether = 0
smartcheck = 45
manticore = 5
sonarsolidity = 0
ethir = 1
maian = 2
solcheck = 2
solgraph = 4
solint = 1
vandal = 1
contractfuzzer = 1
rattle = 0
sasc = 1
zeus = 0
contractlarva = 1
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 0
osiris = 0
reguard = 0
scompile = 29
slither = 7
surya = 2
sūrya = 0
verisolid = 1
verx = 0
vultron = 0
checks-effects-interactions = 15
ethertrust = 0
smartanvil = 0


In [8]:
for i in sec_tool_list:
    count = 0
    for key in post_data:
        if (i in key['content'].lower() or i in key['title'].lower() or i in key['tags']) \
                            and (("security" in key['content'].lower() or i in key['title'].lower())\
                                or ("vulnerability" in key['content'].lower() or i in key['title'].lower())\
                                or ("secure" in key['content'].lower() or i in key['title'].lower())\
                                or ("vulnerable" in key['content'].lower() or i in key['title'].lower())):
                                 #or ("timestamp-depend" in key['content'].lower() or i in key['title'].lower())):
            count += 1
            
    print(i, "=", count)

mythril = 56
mythx = 34
mythos = 5
oyente = 63
solhint = 38
solium = 23
ethlint = 5
securify = 45
teether = 1
smartcheck = 57
manticore = 16
sonarsolidity = 0
ethir = 1
maian = 3
solcheck = 4
solgraph = 7
solint = 5
vandal = 2
contractfuzzer = 1
rattle = 1
sasc = 2
zeus = 1
contractlarva = 1
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 3
osiris = 0
reguard = 0
scompile = 30
slither = 10
surya = 5
sūrya = 1
verisolid = 2
verx = 0
vultron = 0


## Number of mentions of popular tags

In [34]:
tags = ["solidity", "goethereum", "go-ethereum", "web3", "web3js", "contract development", "blockchain", "ethereum", "truffle",\
        "transaction", "transactions", "remix", "contract design", "token", "tokens", "ether", "erc20", "erc-20",\
        "metamask", "mining", "mine", "javascript", "private blockchain", "wallet", "wallets", "gas", "parity", "parities"]

for i in tags:
    count = 0
    for key in post_data:
        counter = Counter(key['content'].lower().split(" "))
        count += counter[i]
            # print(key['title'])
    print(i, "=", count)

solidity = 5033
goethereum = 0
go-ethereum = 21
web3 = 2123
web3js = 57
contract development = 0
blockchain = 8546
ethereum = 9081
truffle = 3809
transaction = 4321
transactions = 2161
remix = 512
contract design = 0
token = 8696
tokens = 6451
ether = 2405
erc20 = 3141
erc-20 = 565
metamask = 5167
mining = 341
mine = 112
javascript = 832
private blockchain = 0
wallet = 3296
wallets = 571
gas = 2830
parity = 358
parities = 0


### Reading time

In [36]:
import json
with open("../data/related_data_rm_duplicacy.json") as f:
    post_data = json.load(f)
    
read_list = []
for key in post_data:
    read_list.append(key['readtime'])

_0_2 = 0
_2_5 = 0
_5_10 = 0
_10_15 = 0
_15 = 0
for i in read_list:
    i = float(i)
    if i<2:
        _0_2 += 1
    elif i>=2 and i<5:
        _2_5 += 1
    elif i>=5 and i<10:
        _5_10 += 1
    elif i>=10 and i<15:
        _10_15 += 1
    else:
        _15 += 1

read_count = [_0_2, _2_5, _5_10, _10_15, _15]
read_count

[1352, 1947, 868, 165, 66]

In [37]:
import pandas as pd
df = pd.read_json('../data/related_data_rm_duplicacy.json')
QATags = df.content

In [38]:
QATags = list(QATags)
QATags

['The Truth about Smart ContractsJimmy SongFollowJun 11, 2018 · 9 min readMuch like the words “blockchain”, “AI” and “cloud”, “smart contract” is one of those phrases that get a lot of hype.After all, what can be better than being able to trust what will happen instead of using the judicial system? The promises of smart contracts include:Enforcing contracts automatically, trustlessly and impartiallyTaking out the middle men in contract construction, contract execution and contract enforcement(By implication) Removing lawyersI sympathize with the hype. After all, how much more efficient could things be if we could just remove the need for trusting the other party to execute?What the heck is a smart contract, anyway? And isn’t that the domain of Ethereum? Isn’t this the way of the future? Why would you stand in the way of progress?In this article, I’m going to examine what smart contracts are and the engineering reality that goes with it (spoiler: it’s not so simple and very hard to secu

In [39]:
len(QATags)

4398

In [40]:
# with open("../medium_post_and_author_analysis_conf/related_data_rm_duplicacy.json", "r") as read_file:
#     data1 = json.load(read_file)
# data2 = []
# for item in data1[1:3]:
#     data2.append(item)
# with open("../medium_post_and_author_analysis_conf/test_data_file.json", "w") as write_file:
#     json.dump(data2, write_file)


In [41]:
# with open("../medium_post_and_author_analysis_conf/test_data_file.json", "r") as f:
#     test = json.load(f)
# for item in test:
#     print(item['title'])