In [17]:
import json
import re
import operator
import string


# Class to append value in a dictionary
class MyDictionary(dict):

    # __init__ function
    def __init__(self):
        self = dict()

    # Function to add key:value
    def add(self, key, value):
        self[key] = value


class Relevancy(object):
    def __init__(self, file_name, tag):
        self.tag = tag
        self.file_name = file_name
        with open(self.file_name, "r") as f:
            self.data = json.load(f)

    # Python3 code to pre-processing the string
    # Intending to remove all punctuation and common words
    def pre_process(self, str):
        # removing punctuation ---> using string module
        translator = str.maketrans('', '', string.punctuation)
        str = str.translate(translator)

        # converting all into lower cases
        str = str.lower()
        
        # removing numbers and digits
        # str = ''.join([i for i in str if not i.isdigit()])
        regex = re.compile('[^a-zA-Z]')
        #First parameter is the replacement, second parameter is your input string
        str = regex.sub(' ', str)

        # removing prep, conj, articles ---> using re module
        str = re.sub('(\s+)(a|an|and|the|this|that|these|those|i|would|could|should|m|ve)(\s+)', ' ', str)
        str = re.sub('(\s+)(to|for|from|in|into|under|with|within|below|up|down|of|on|s|t)(\s+)', ' ', str)
        str = re.sub('(\s+)(are|may|by|as|we|or|it|be|which|the|when|make|no|set|your|its|it\'s)(\s+)', ' ', str)
        str = re.sub('(\s+)(if|any|used|all|has|have|new|data|at|code|node|state|-|they|our)(\s+)', ' ', str)
        str = re.sub('(\s+)(you|must|every|each|not|what|one|then|way|so|will|also|is|can|"|")(\s+)', ' ', str)
        str = re.sub('(\s+)(their|was|more|other|use|do|need|my|some|get|out|many|had|here|over)(\s+)', ' ', str)
        return str

    # Python3 code to find frequency of each word
    # function for calculating the frequency
    def freq(self, str):
        str = self.pre_process(str)

        # break the string into list of words
        str_list = str.split()

        # gives set of unique words
        unique_words = set(str_list)
        frequency = MyDictionary()
        for word in unique_words:
            frequency.add(word, str_list.count(word))

        # sort by value (downwards) ---> using operator module
        sorted_freq = sorted(frequency.items(), key=operator.itemgetter(1), reverse=True)

        # collect first 10 items in dictionary
        # first_few_items = {k: sorted_freq[k] for k in list(sorted_freq)[:10]}
        # btw, sorted_freq is a list
        return sorted_freq[0:20]  # , len(sorted_freq)

    # merge two lists toether without appending same word
    # if same word found, increases count
    def merge_two_lists(self, a, b):
        new_list = {}
        for pair in a + b:
            key, value = pair
            new_list[key] = new_list.get(key, 0) + value
        new_list = [[key, value] for key, value in new_list.items()]
        return new_list

    # convert a tuple into a list
    def tuple_to_list(self, listname):
        a = []
        for i in range(0, len(listname)):
            a.append(list(listname[i]))
        return a

    # For sorting a list of lists using 2nd item
    def second_item(self, item):
        return item[1]

    # for getting a list of words for a particular tag
    def list_of_most_used_words(self):
        freq_list = []
        for key in self.data:
            if self.tag in key['title'].lower():
                str = key['content']
                freq_list = self.merge_two_lists(freq_list, self.tuple_to_list(self.freq(str)))

        freq_list.sort(key=self.second_item, reverse=True)
        reduced_list = []
        for i in freq_list:
            if i[1] > 1000:
                reduced_list.append(i)
        return reduced_list

    # Return number of Relevant posts
    def relevant_post(self):
        relevant = 0
        for key in self.data:
            if self.tag in key['title'].lower():
                relevant += 1
        return relevant

In [25]:
# application code
if __name__ == "__main__":
    file_name = "ethereum.json"
    tag = 'ethereum'
    keyword = Relevancy(file_name, tag)
    print("Number of Relevant post is:" + str(keyword.relevant_post()))
    print(keyword.list_of_most_used_words())

Number of Relevant post is:373
[['ethereum', 4558], ['blockchain', 1641], ['contract', 1259], ['but', 1057]]


## Relevancy Search in percentage

In [14]:
import json
from math import floor
count = 0
file_name = "data/ethereum.json"
with open(file_name, "r") as f:
    data = json.load(f)
relevant = 0
for key in data:
    if 'ethereum' in key['title'].lower():
        relevant += 1
for key in data:
    count += 1      
p = floor(relevant/count * 100)
print(relevant)
print(count)

475
485


## Better relevancy search using finding tags

In [13]:
import json
from math import floor
count = 0
file_name = "data/ethereum.json"
with open(file_name, "r") as f:
    data = json.load(f)
relevant = 0
for key in data:
    count += 1
    tags = key['tags']
    for item in tags: 
        if 'ethereum' in item.lower():
            relevant += 1
print(relevant)
print(count)

482
485


In [None]:
# input all relevant posts into a single file

In [44]:
import json
final_count = 0
final_json_data = []

file_name_list = ["ethereum.json", "blockchain.json", "smart-contract.json",
                      "solidity.json", "vyper.json", "ripple.json",
                      "remix.json", "metamask.json", "bitcoin.json"]
tag_list = ["ethereum", "blockchain", "smart contract", "solidity", "vyper", "ripple",
                "remix", "metamask", "bitcoin"]

for file_name, tag in zip(file_name_list, tag_list):

    json_data  = json.load(open(filename))

    for key in json_data:
        tags = key['tags']
        for item in tags: 
            if tag in item.lower():
                final_json_data.append(key)
                count += 1
    print("Number of total post for ", tag, "is =", count)

    # Output the updated file with pretty JSON                                      
open("final_all_post_data.json", "w").write(
        json.dumps(final_json_data, sort_keys=True, indent=4, separators=(',', ': '))
    )
print("The number of total post is: ", count)

In [20]:
import json
data  = json.load(open("_test_.json"))
new = []

for key in data:
    count += 1
    tags = key['ename']
    for item in tags: 
        if 'egg' in item.lower():
            new.append(key)

new = json.dumps(new)
new

'[{"ename": ["mark", "egg"], "url": "Lennon.com"}, {"ename": ["egg"], "url": "Lennon.com"}]'

In [None]:
file_tag = {"ethereum-390.json":"ethereum", "vyper-272.json":"vyper",
                "bitcoin-281.json":"bitcoin", "smart-contract-369":"smart contract",
                "blockchain-351.json":"blockchain", "solidity-374":"solidity"}
for x,y in file_tag.items():
    print(x, y)

## Posts over time

In [156]:
#year_list = ['2019', '2018', '2017', '2016', '2015', '2014']
#month_list = ['January', 'February', 'March', 'April', 'May', 'June', 'July',
            # 'August', 'September', 'October', 'November', 'December']
freq = {"2019":0, "2018":0, "2017":0, "2016":0, "2015":0, "2014":0, "missing":0}
date_list = ['27 April, 2018', '17 December', '12 May, 2017', '17 January',
            '16 June, 2018', '16 April', '', '16 April', '11 May, 2016', '', '17 June, 2018',
             '11 May, 2016', '27 April, 2015', '16 April']
for date in date_list:
    if ',' in date:
        y = date.split(',')[1]
        y = y.lstrip()
        freq[y] += 1
    elif date=='':
        freq["missing"] += 1
    else:
        freq["2019"] += 1
        
freq

{'2019': 5,
 '2018': 3,
 '2017': 1,
 '2016': 2,
 '2015': 1,
 '2014': 0,
 'missing': 2}

In [23]:
a = '2016-11-27'
a = a.split("-")
a

['2016', '11', '27']

In [None]:
# Panda DataFrame for Medium Posts
file_name = "metamask-350.json"
dates = []
with open(file_name, "r") as f:
    data = json.load(f)
import pandas as pd
df = pd.DataFrame.from_dict(data, orient='columns')
#df[['post_date', 'upvotes']]
for key in data:
    dates.append(key['post_date'])
if dates[26]=='':
    print(True)

In [29]:
te = [
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
        "Name": "Bala",
        "phone": "None"
      },
      {
          "Name": "Bala1",
          "phone": "None"
      }      
    ]

unique = { each['Name'] : each for each in te }
a = list(unique.values())
with open("test_3.json","w") as fp:
    json.dump(a, fp)
print(a)

[{'Name': 'Bala', 'phone': 'None'}, {'Name': 'Bala1', 'phone': 'None'}]


In [32]:
with open("final_all_post_data.json","r") as f:
    data = json.load(f)
unique_post = {each['title'] : each for each in data}
listed_dictionary_items = list(unique_post.values())
with open("final_data_removing_duplicacy.json","w") as f:
    json.dump(listed_dictionary_items, f)
print("# of post after removing duplicay = ", len(listed_dictionary_items))

# of post after removing duplicay =  2156


## Associative Tag analysis

In [127]:
import json
with open("../conference_medium_data/new_data/related_data_rm_duplicacy_conf_final_2.json","r") as f:
    data = json.load(f)
count = 0
tag_list = []
for key in data:
    tag_list.append([i for i in key['tags']])
    count += 1

# 2 ways to remove nested lists
from itertools import chain
break_nested_list = list(chain(*tag_list))
print(len(break_nested_list))
# flattened  = [val for sublist in list_of_lists for val in sublist]
unique_list_of_tags = list(set(break_nested_list))
print(len(unique_list_of_tags))

from collections import Counter
associated_with_tag = list(set([item for items in tag_list if "Solidity" in items for item in items]))
top_tags_for_tag = Counter([item for items in tag_list if "Solidity" in items for item in items])

top_tags_only = Counter([item for items in tag_list for item in items])

associated_with_smart_contract = list(set([item for items in tag_list if "Smart Contracts"\
                                           in items for item in items or "Smart Contract" in items for item in items]))
top_tags_for_smart_contracts = Counter(([item for items in tag_list if "Smart Contracts"\
                                           in items for item in items or "Smart Contract" in items for item in items]))
#print(associated_with_tag)
print(top_tags_for_smart_contracts)

12840
1732
Counter({'Smart Contracts': 5424, 'Ethereum': 3907, 'Blockchain': 3635, 'Solidity': 1840, 'Security': 1399, 'Cryptocurrency': 610, 'Audit': 449, 'Bitcoin': 381, 'ICO': 322, 'Tutorial': 309, 'Erc20': 230, 'Dapps': 229, 'Truffle': 226, 'Programming': 223, 'Technology': 194, 'Web3': 173, 'Development': 163, 'Token Sale': 133, 'Blockchain Technology': 124, 'Crypto': 123, 'Smart Contract Security': 112, 'Token': 94, 'Blockchain Development': 94, 'Decentralization': 85, 'Fintech': 80, 'Ethereum Blockchain': 73, 'Tech': 65, 'Insurance': 60, 'Tech Blog': 60, 'Cryptography': 59, 'Startup': 55, 'Hacking': 54, 'Neo': 54, 'Eos': 54, 'Cybersecurity': 54, 'JavaScript': 53, 'Open Source': 50, 'Vulnerability': 50, 'Vyper': 44, 'Remix': 42, 'Software Development': 40, 'Real Estate': 40, 'Quantstamp': 40, 'Security Audit': 40, 'Tokenization': 40, 'Legaltech': 39, 'Finance': 39, 'Smart Contract Auditing': 38, 'Openzeppelin': 35, 'Smart Contracts Tutorial': 35, 'Solidity Tutorial': 35, 'Coding'

### Tag frequency count

In [147]:
import json
with open("../conference_medium_data/new_data/related_data_rm_duplicacy_conf_final_2.json","r") as f:
    data = json.load(f)
    
tag_list = ["solidity", "ethereum", "blockchain", "web3", "metamask", "truffle", "remix", "token",\
            "tokens", "erc20", "vyper", "smart contract", "smart contracts", "ethereum blockchain",\
            "myetherwallet", "wallet", "dapp", "dapps", "security", "decentralization", "bitcoin",\
            "cryptocurrency", "audit", "ico", "token sale", "blockchain technology", "decentralization",\
            "crypto", "ethereum blockchain", "blockchain development", "programming"]

for i in tag_list:
    count = 0
    for key in data:
        if i in [item.lower() for item in key['tags']]:
            count += 1
            # print(key['title'])
    print(i, "=", count)

solidity = 854
ethereum = 1860
blockchain = 1657
web3 = 377
metamask = 296
truffle = 181
remix = 14
token = 70
tokens = 3
erc20 = 425
vyper = 26
smart contract = 40
smart contracts = 1145
ethereum blockchain = 57
myetherwallet = 22
wallet = 28
dapp = 9
dapps = 174
security = 352
decentralization = 94
bitcoin = 172
cryptocurrency = 345
audit = 101
ico = 158
token sale = 60
blockchain technology = 65
decentralization = 94
crypto = 63
ethereum blockchain = 57
blockchain development = 52
programming = 113


In [124]:
import json
with open("../conference_medium_data/new_data/related_data_rm_duplicacy_conf_final_2.json","r") as f:
    data = json.load(f)
response_list = []
for key in data:
    response_list.append(key['responses'])
    count += 1
response_freq = Counter([item for item in response_list])
print(len(response_list))
print("Average= ", sum(response_list)/len(response_list))
print(response_freq.most_common())
# print(sorted(response_list, reverse=True))

2760
Average=  1.857246376811594
[(0, 1726), (1, 417), (2, 194), (3, 119), (4, 63), (5, 55), (7, 28), (8, 28), (6, 26), (9, 20), (11, 14), (10, 12), (12, 9), (14, 5), (16, 5), (21, 4), (17, 3), (23, 3), (13, 3), (39, 2), (18, 2), (136, 2), (146, 2), (153, 1), (68, 1), (24, 1), (19, 1), (22, 1), (79, 1), (20, 1), (29, 1), (40, 1), (72, 1), (165, 1), (105, 1), (63, 1), (131, 1), (57, 1), (26, 1), (15, 1), (140, 1)]


### Avg claps and voters for individual tag

In [153]:
import json
from collections import Counter
with open("../conference_medium_data/new_data/related_data_rm_duplicacy_conf_final_2.json","r") as f:
    data = json.load(f)

clap_list = []
voter_list = []
data_dict = dict()
for key in data:
    clap_list.append(key['claps'])
    voter_list.append(key['voters'])
for i,j in zip(clap_list,voter_list):
    data_dict[i] = j

def resp_clap_voter_1(data, tag):
    new_clap = []
    new_voter = []
    new_response = []
    for key in data:
        for i in key['tags']:
            if tag in i:
                new_clap.append(key['claps'])
                new_voter.append(key['voters'])
                new_response.append(key['responses'])
    
    max_response = max(new_response)
    total_response = sum(new_response)
    avg_response = sum(new_response)/len(new_response)
    
    max_clap = max(new_clap)
    total_clap = sum(new_clap)
    avg_clap = sum(new_clap)/len(new_clap)
    
    max_voter = max(new_voter)
    total_voter = sum(new_voter)
    avg_voter = sum(new_voter)/len(new_voter)
    
    return total_response, round(avg_response,2), total_clap, round(avg_clap,2),\
                         total_voter, round(avg_voter,2)

tags_list = ["Solidity", "Web3", "Ethereum", "Truffle", "Security", "Metamask", "Remix",\
             "Ethereum Blockchain", "Blockchain", "Myetherwallet", "Erc20", "Token Sale",\
             "Cryptocurrency", "Bitcoin", "Ico", "ICO", "Programming", "Audit", "Decentralization", "Crypto"]
for tag in tags_list:
    a,b,c,d,e,f = resp_clap_voter_1(data, tag)
    print(tag, "-->", a, "&", b, "&", c, "&", d, "&", e, "&", f)

Solidity --> 1491 & 1.65 & 258005 & 286.35 & 26794 & 29.74
Web3 --> 765 & 1.8 & 194111 & 457.81 & 18066 & 42.61
Ethereum --> 4358 & 2.21 & 800054 & 406.33 & 78924 & 40.08
Truffle --> 259 & 1.11 & 24371 & 104.15 & 2772 & 11.85
Security --> 469 & 0.98 & 99980 & 209.16 & 8666 & 18.13
Metamask --> 237 & 0.79 & 67243 & 224.89 & 5298 & 17.72
Remix --> 15 & 0.94 & 2178 & 136.12 & 212 & 13.25
Ethereum Blockchain --> 113 & 1.98 & 47661 & 836.16 & 2878 & 50.49
Blockchain --> 3415 & 1.8 & 833901 & 440.05 & 72145 & 38.07
Myetherwallet --> 12 & 0.55 & 2976 & 135.27 & 172 & 7.82
Erc20 --> 1511 & 3.39 & 378248 & 848.09 & 23398 & 52.46
Token Sale --> 191 & 3.18 & 30373 & 506.22 & 1628 & 27.13
Cryptocurrency --> 732 & 2.02 & 240544 & 664.49 & 16842 & 46.52
Bitcoin --> 815 & 4.58 & 165487 & 929.7 & 18009 & 101.17
Ico --> 0 & 0.0 & 253 & 84.33 & 33 & 11.0
ICO --> 920 & 5.82 & 147425 & 933.07 & 8803 & 55.72
Programming --> 172 & 1.38 & 41225 & 329.8 & 4005 & 32.04
Audit --> 52 & 0.36 & 21217 & 146.32 & 15

In [111]:
def resp_clap_voter_1(data, tag1, tag2):
    new_clap = []
    new_voter = []
    new_response = []
    for key in data:
        for i in key['tags']:
            if tag1 in i or tag2 in i:
                new_clap.append(key['claps'])
                new_voter.append(key['voters'])
                new_response.append(key['responses'])
    
    max_response = max(new_response)
    total_response = sum(new_response)
    avg_response = sum(new_response)/len(new_response)
    
    max_clap = max(new_clap)
    total_clap = sum(new_clap)
    avg_clap = sum(new_clap)/len(new_clap)
    
    max_voter = max(new_voter)
    total_voter = sum(new_voter)
    avg_voter = sum(new_voter)/len(new_voter)
    
    return total_response, round(avg_response), total_clap, round(avg_clap),\
                            total_voter, round(avg_voter)

tags_list = [["Smart Contracts", "Smart Contract"], ["Dapps", "Dapp"], ["Token", "Tokens"]]
for tags in tags_list:
    a,b,c,d,e,f = resp_clap_voter_1(data, tags[0], tags[1])
    print(tags[0], "-->", a, "&", b, "&", c, "&", d, "&", e, "&", f)

Smart Contracts --> 2032 & 2 & 385351 & 294 & 39706 & 30
Dapps --> 364 & 2 & 87432 & 424 & 7419 & 36
Token --> 492 & 2 & 149770 & 643 & 9303 & 40


## Vulnerability Search

In [158]:
import json
with open("../conference_medium_data/new_data/related_data_rm_duplicacy_conf_final_2.json") as f:
    post_data = json.load(f)

word_list = ["security", "vulnerability", "vulnerab", "reentrancy", "re entrancy", "re-entrancy", "race condition",\
             "denial of service", "DoS", "transaction order", "transactions order", "trasaction order depend",\
             "transaction-ordering dependence", "timestamp dependence", "integer overflow", "integer underflow"]
# word_list = ["transaction order"]
for i in word_list:
    count = 0
    for key in post_data:
        if i in key['content'] or i in key['title'].lower() or i in key['tags']:
            count += 1
            # print(key['title'])
    print(i, "=", count)

security = 1013
vulnerability = 201
vulnerab = 466
reentrancy = 56
re entrancy = 0
re-entrancy = 26
race condition = 26
denial of service = 18
DoS = 54
transaction order = 13
transactions order = 0
trasaction order depend = 0
transaction-ordering dependence = 7
timestamp dependence = 14
integer overflow = 50
integer underflow = 8


### Mention of Individual tools

In [118]:
sec_tool_list = ["mythril", "mythx", "mythos", "oyente", "solhint", "solium", "ethlint",\
                 "securify", "teether", "smartcheck", "manticore", "sonarsolidity", "ethir",\
                 "maian", "solcheck", "solgraph", "solint", "vandal", "contractfuzzer",\
                 "rattle", "sasc", "zeus", "contractlarva", "echinda", "ethertrust", "fsolidm",\
                 "octopus", "osiris", "reguard", "scompile", "slither", "surya", "sūrya", "verisolid",\
                 "verx", "vultron", "checks-effects-interactions"]
def tool_mention(data, tool):
    count = 0
    for key in post_data:
        if tool in key['content'].lower() or i in key['title'].lower() or i in key['tags']:
            count += 1
            # print(key['title'])
    print(tool, "=", count)
    
for i in sec_tool_list:
    tool_mention(post_data, i)

mythril = 46
mythx = 22
mythos = 4
oyente = 52
solhint = 29
solium = 24
ethlint = 5
securify = 36
teether = 2
smartcheck = 46
manticore = 15
sonarsolidity = 0
ethir = 2
maian = 2
solcheck = 4
solgraph = 6
solint = 3
vandal = 1
contractfuzzer = 1
rattle = 2
sasc = 2
zeus = 1
contractlarva = 1
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 3
osiris = 0
reguard = 0
scompile = 31
slither = 6
surya = 5
sūrya = 1
verisolid = 1
verx = 0
vultron = 0
checks-effects-interactions = 15


In [140]:
sec_tool_list = ["mythril", "mythx", "mythos", "oyente", "solhint", "solium", "ethlint",\
                 "securify", "teether", "smartcheck", "manticore", "sonarsolidity", "ethir",\
                 "maian", "solcheck", "solgraph", "solint", "vandal", "contractfuzzer",\
                 "rattle", "sasc", "zeus", "contractlarva", "echinda", "ethertrust", "fsolidm",\
                 "octopus", "osiris", "reguard", "scompile", "slither", "surya", "sūrya", "verisolid",\
                 "verx", "vultron"]
# sec_tool_list = ["sūrya"]
for i in sec_tool_list:
    count = 0
    for key in post_data:
        if (i in key['content'].lower() or i in key['title'].lower() or i in key['tags']) \
                            and (("transaction ordering dependency" in key['content'].lower() or i in key['title'].lower())\
                                or ("transaction-ordering dependency" in key['content'].lower() or i in key['title'].lower())\
                                or ("transaction ordering dependent" in key['content'].lower() or i in key['title'].lower())):
                                 #or ("timestamp-depend" in key['content'].lower() or i in key['title'].lower())):
            count += 1
            # print(key['title'])
    print(i, "=", count)

mythril = 4
mythx = 12
mythos = 0
oyente = 2
solhint = 1
solium = 3
ethlint = 0
securify = 1
teether = 0
smartcheck = 1
manticore = 0
sonarsolidity = 0
ethir = 0
maian = 1
solcheck = 0
solgraph = 0
solint = 0
vandal = 0
contractfuzzer = 0
rattle = 0
sasc = 1
zeus = 0
contractlarva = 0
echinda = 0
ethertrust = 0
fsolidm = 0
octopus = 0
osiris = 0
reguard = 0
scompile = 0
slither = 1
surya = 0
sūrya = 0
verisolid = 0
verx = 0
vultron = 0


## Number of mentions of popular tags

In [100]:
tags = ["solidity", "goethereum", "go-ethereum", "web3", "web3js", "contract development", "blockchain", "ethereum", "truffle",\
        "transaction", "transactions", "remix", "contract design", "token", "tokens", "ether", "erc20", "erc-20",\
        "metamask", "mining", "mine", "javascript", "private blockchain", "wallet", "wallets", "gas", "parity", "parities"]

for i in tags:
    count = 0
    for key in post_data:
        counter = Counter(key['content'].lower().split(" "))
        count += counter[i]
            # print(key['title'])
    print(i, "=", count)

solidity = 3424
goethereum = 0
go-ethereum = 14
web3 = 1399
web3js = 46
contract development = 0
blockchain = 4190
ethereum = 5382
truffle = 2244
transaction = 2336
transactions = 1182
remix = 356
contract design = 0
token = 4361
tokens = 3435
ether = 1315
erc20 = 1628
erc-20 = 364
metamask = 2088
mining = 179
mine = 72
javascript = 545
private blockchain = 0
wallet = 1787
wallets = 325
gas = 1774
parity = 221
parities = 0


### Reading time

In [125]:
import json
with open("../conference_medium_data/new_data/related_data_rm_duplicacy_conf_final_2.json") as f:
    post_data = json.load(f)
    
read_list = []
for key in post_data:
    read_list.append(key['readtime'])

_0_2 = 0
_2_5 = 0
_5_10 = 0
_10_15 = 0
_15 = 0
for i in read_list:
    i = float(i)
    if i<2:
        _0_2 += 1
    elif i>=2 and i<5:
        _2_5 += 1
    elif i>=5 and i<10:
        _5_10 += 1
    elif i>=10 and i<15:
        _10_15 += 1
    else:
        _15 += 1

read_count = [_0_2, _2_5, _5_10, _10_15, _15]
read_count

[758, 1245, 598, 113, 46]