In [96]:
import os
import sys
import time
import math
import json
import subprocess
import pandas as pd

In [22]:
#-------------------------------------------------
# using the csv file of CRITICALITY_SCORE and searching for 
# repos from 200_repos; this excludes new/current projects
# download: https://storage.cloud.google.com/ossf-criticality-score
# UDPATE: looks like a monthly run is published here (wish I had of known!)
#-------------------------------------------------

# Read '200_repos.csv' into DataFrame df
#
# NaN is assigned to empty cells
df = pd.read_csv('200_repos.csv')

In [None]:
# subset dataframes for testing
# use .copy() as slicing will not allow for assignment
df10 = df.iloc[:10].copy()
df33 = df.iloc[:33].copy()

In [23]:
dfs = df[['CMC_id', 'source_code', 'forge']].copy()
dfc = pd.read_csv('project_criticality_all.csv')
num = 0
for row in dfs.itertuples():
    # only search if github
    if row.forge == 'github':
        # only search for strings; floats (NaN) are skipped
        if isinstance(row.source_code, str):
            url = str(row.source_code)
            # loop through df2 (criticality) looking for source code url
            for row2 in dfc.itertuples():
                if url == row2.url:
                    dfs.at[row.Index, 'criticality'] = row2.criticality_score
                    num += 1
                    break
            sys.stdout.write(".")
            sys.stdout.flush()
print(str(num), 'criticality scores found and updated')

........................................................................................................................................................................66 criticality scores found and updated


In [26]:
# update MERGED sheet with new data
# 'CMC_id' is the key, drop 'repo', and 'forge' before the merge
# to prevent duplicate columns
dfs.drop(columns = ['source_code', 'forge'], inplace = True)
dfm = pd.merge(df,dfs,on = ['CMC_id'], how = 'outer')

In [27]:
dfm

Unnamed: 0,CMC_id,CMC_rank,name,ticker,web_primary,web_secondary,source_code,check_source,notes,repo,forge,criticality
0,1,1.0,Bitcoin,BTC,https://bitcoin.org/,,https://github.com/bitcoin/bitcoin,y,,bitcoin/bitcoin,github,0.86864
1,1027,2.0,Ethereum,ETH,https://www.ethereum.org/,,https://github.com/ethereum/go-ethereum,y,,ethereum/go-ethereum,github,0.82297
2,1839,3.0,Binance Coin,BNB,https://www.binance.com/,,https://github.com/binance-chain/bsc,y,fork of go-ethereum,binance-chain/bsc,github,
3,52,4.0,XRP,XRP,https://xrpl.org/,,https://github.com/ripple/rippled,y,,ripple/rippled,github,0.63159
4,825,5.0,Tether,USDT,https://tether.to,,private,y,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
220,6841,,Phala Network,PHA,,,,n,,,,
221,7617,,saffron.finance,SFI,,,,n,,,,
222,5802,,Sora,XOR,,,,n,,,,
223,4189,,Ultra,UOS,,,,n,,,,


In [28]:
# write out new data
dfs.to_csv('200_crit.csv', encoding='utf-8', index = 0)
dfm.to_csv('200_merged.csv', encoding='utf-8', index = 0)

In [None]:
#-------------------------------------------------
# directly call CRITICALITY_SCORE
# require github token and command line access
#-------------------------------------------------
#
# >> repo: https://github.com/ossf/criticality_score
# 0. make sure github access token is exported to PATH (see methodology notes)
# 1. install: pip3 install criticality-score
# 2. check PATH: WARNING: The script criticality_score is installed in '/home/user/.local/bin' which is not on PATH.
# >> export PATH="/home/user/.local/bin:$PATH"
# 3. get 'GITHUB_AUTH_TOKEN' and export path on command line or set env variable in jupyter
# 
# Set the environment variable 'GITHUB_AUTH_TOKEN'
# (this is a short-cut; in the future look into pycrosskit)
# >key = 'GITHUB_AUTH_TOKEN'
# >os.environ[key] = 'secret'
#
# read out the value
# >value = os.getenv(key)
# >print("Value of 'GITHUB_AUTH_TOKEN' environment variable :", value) 
#
# 4. run: criticality_score --repo https://github.com/bitcoin/bitcoin
#
'''     
['name: bitcoin',
 'url: https://github.com/bitcoin/bitcoin',
 'language: C++',
 'created_since: 142',
 'updated_since: 0',
 'contributor_count: 961',
 'org_count: 4',
 'commit_frequency: 54.8',
 'recent_releases_count: 3',
 'updated_issues_count: 1920',
 'closed_issues_count: 1467',
 'comment_frequency: 2.7',
 'dependents_count: 348588',
 'criticality_score: 0.86651']
'''

In [352]:
# Read '200_repos.csv' into DataFrame df
df = pd.read_csv('200_repos.csv')
# keep 'source_code' location and 'forge'
dfs = df[['source_code', 'forge']].copy()
# subset dataframes for testing
df_in = dfs.iloc[:10].copy()
df33 = dfs.iloc[:33].copy()

In [344]:
df_in

Unnamed: 0,source_code,forge
0,https://github.com/bitcoin/bitcoin,github
1,https://github.com/ethereum/go-ethereum,github


In [353]:
# ------------------------------------------------
# dfParse builds a dataFrame using bash output
#  @output the command line output from calling criticality_score
#  @firstTime boolean to initialize the dataframe the first loop call
#  @dataframe the dataframe to be updated and returned
# ------------------------------------------------
def dfParse(output, firstTime, dataframe):
    
    jout = json.dumps(output)    #jout is a str
    out_dict = json.loads(jout)    #out_dict is a list
    # prepare the dataFrame, initialize with column headers the same
    # as the criticality_score output
    #
    df = pd.DataFrame(out_dict)
    df.rename(columns = {0:'metric'}, inplace = True)
    df[['metric','value']] = df.metric.str.split(expand = True)
    df = df.transpose(copy = True)

    # remove index column (with labels 'metric' & 'value')
    # and reset the index
    df.reset_index(drop=True, inplace=True)

    # rename columns according to first row; then drop the row
    df = df.rename(columns = df.iloc[0]).drop(df.index[0])
    
    if firstTime:
        #print('firstTime')
        dataframe = df.copy()
    else:
        # append row[1] to df
        dataframe = dataframe.append(df, ignore_index = True)   
        #print('not first time')
        
    return dataframe

# ------------------------------------------------
# variables
# ------------------------------------------------
start = time.time()
num = 0
firstTime = True
df_out = pd.DataFrame
# ------------------------------------------------
# main loop requires dataFrame: 'df_in'
#                     returns: 'df_out'
# df_out does not have CMC_id and some will be missed;
# should be able to merge back on 'url:'='source_code'
# ------------------------------------------------
for row in df_in.itertuples():
    # proceed if github
    if row.forge == 'github':
        cmd = 'criticality_score --repo ' + row.source_code
        output = !{cmd}
        if num > 0: firstTime = False
        df_out = dfParse(output, firstTime, df_out)
        num += 1
    sys.stdout.write(".")
    sys.stdout.flush()
    
# log some output with a timer
print('\n', str(num), 'criticality scores updated\n', 
      str(df_in.shape[0] - num), 'repos private or missing\n', 
      'Total time elapsed:', time.time() - start, 'seconds')


..........
 9 criticality scores updated
 1 repos private or missing
 Total time elapsed: 65.1545512676239 seconds


In [354]:
df_out

Unnamed: 0,name:,url:,language:,created_since:,updated_since:,contributor_count:,org_count:,commit_frequency:,recent_releases_count:,updated_issues_count:,closed_issues_count:,comment_frequency:,dependents_count:,criticality_score:
0,bitcoin,https://github.com/bitcoin/bitcoin,C++,142,0,961,4,54.9,3,1922,1469,2.7,348595,0.86656
1,go-ethereum,https://github.com/ethereum/go-ethereum,Go,89,0,594,7,14.2,15,675,540,2.4,66944,0.82435
2,bsc,https://github.com/binance-chain/bsc,Go,89,1,511,7,1.5,12,112,66,2.8,11,0.59201
3,rippled,https://github.com/ripple/rippled,C++,116,1,107,7,3.5,2,159,79,1.9,4807,0.63892
4,cardano-node,https://github.com/input-output-hk/cardano-node,Haskell,23,0,77,6,31.6,15,450,315,2.2,412,0.64192
5,polkadot,https://github.com/paritytech/polkadot,Rust,39,0,115,3,21.3,33,688,547,2.3,357,0.64631
6,dogecoin,https://github.com/dogecoin/dogecoin,C++,142,2,552,3,1.0,1,242,159,3.0,133,0.57698
7,uniswap-v3-core,https://github.com/Uniswap/uniswap-v3-core,TypeScript,20,0,11,3,13.0,7,146,143,0.7,66,0.4601
8,litecoin,https://github.com/litecoin-project/litecoin,C++,142,20,755,4,0.0,1,37,16,1.0,185,0.4852
