In [None]:
# NOTES
# if not running in local dev environment (perceval_venv / health-env):
#
# Install a pip package in the current Jupyter kernel
# >import sys
# >!{sys.executable} -m pip install grimoirelab
#
# Command line usage: 
# >!pip3 install grimoirelab
# >!perceval git 'https://github.com/bitcoin/bitcoin.git' --from-date '2021-07-14'
#
# web tutorial: https://chaoss.github.io/grimoirelab-tutorial/perceval/git.html
# perceval docs: https://perceval.readthedocs.io/en/latest/

In [37]:
from perceval.backends.core.git import Git
from datetime import datetime
import dateutil.rrule
import dateutil.tz
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter
import seaborn as sns
import time
import csv

# file I/O for running a colab notebook
#from google.colab import files

In [38]:
def fetchTimeZonesArray(repoURL, repoName, from_Date, to_Date):
    # directory for letting Perceval clone the git repo
    # must be unique for the repo because it is reused
    # Date format is 'YYYY,MM,DD'
    dir_string = '/tmp/'+repoName+'.git'
    repo_dir = dir_string

    # create a Git object, pointing to repo_url: the github repo path
    # and repo_dir: the local path for cloning
    repo_object = Git(uri = repoURL, gitpath = repo_dir)
    count = 0
    noTZ = 0
    # timeData array(list) has 1 position for each timezone
    timeData = [0] * 24
    #res = 0
    for commit in repo_object.fetch(from_date = from_Date, to_date = to_Date):
        # handy for storing all data
        #rawData.append(commit)
        count += 1
        # commit date is a string
        time = commit['data']['CommitDate']
        
        # this is now an aware date object and can be universally sequenced
        dateTimeObject = datetime.strptime(' '.join(time.split(' ')), '%a %b %d %H:%M:%S %Y %z')
        
        # convert to an offset in hours
        offset = dateTimeObject.utcoffset().seconds // 3600
        
        # NZ is UTC+12 (ahead); +13 is also -11 geographically, or 13-24=-11 
        # due to polar plotting all >12 shifted by -24, so:
        # +12 -> -12: e.g NZ
        # +13 -> -11: Tokelau, Samoa, Tonga (Midway, Niue are already -11)
        # +14 -> -10: Christmas Island, Kiwibati  
        # half-hour offsets are taken as the floor, e.g. India (+5.5) -> +5
        if offset > 11:
            offset = offset - 24
        
        # increment timeData[offset] counter
        timeData[offset+12] += 1
        
        if count % 1000 == 0:
            print('.', end='')
        
    # stats
    if count > 0: percentage = (noTZ/count)*100
    else: percentage = 0
    # maybe later write these to the header
    # >check that 0.5 timezone offsets get binned
    print('finished fetching data from:', repoName)
    #print('total commits counted:', count)
    if noTZ != 0: print('number with null timezone:', noTZ, '(', round(percentage, 1),'%)')

    # return the list of timezone offsets
    return timeData
    #return rawData

In [39]:
repos = pd.read_csv('merged.csv')

In [40]:
repos5 = repos[:15].copy()

In [45]:
# collect 6 months up to date of final GHTorrent download (March 26, 2022)
fromDate = datetime(2021, 9, 25) # from is inclusive
toDate = datetime(2022, 3, 26)  # to is exclusive
    
# cumulative main loop
# make a default df
allData = pd.DataFrame()
#total = pd.DataFrame()
count = 0

for index, row in repos.iterrows():
    # screen logging
    count += 1
    
    repoName = row['name']
    print('>> Repo #',count, repoName)
    repoURL = row['url']
    #data = fetchTimeZonesArray(repoURL, repoName, fromDate, toDate) 

    if (repoURL != 'private') and (repoURL != 'exclude') and (not pd.isna(repoURL)):

        try:
            data = fetchTimeZonesArray(repoURL, repoName, fromDate, toDate) 

            # concatenate data
            df = pd.DataFrame(data)
            #total = total.append(df, ignore_index = True)
            df.columns = [repoURL]
            allData = pd.concat([allData, df], axis=1)

        except Exception as e:
            print(f"An error occurred while fetching the repository {repoURL}. Error: {e}")
            # You can also log the error message to a log file
            # logger.error(f"An error occurred while fetching the repository {repoURL}. Error: {e}")
            data = None

>> Repo # 1 Bitcoin
..finished fetching data from: Bitcoin
>> Repo # 2 Ethereum
finished fetching data from: Ethereum
>> Repo # 3 Tether
>> Repo # 4 BNB
finished fetching data from: BNB
>> Repo # 5 USD Coin
finished fetching data from: USD Coin
>> Repo # 6 XRP
finished fetching data from: XRP
>> Repo # 7 Terra
finished fetching data from: Terra
>> Repo # 8 Cardano
finished fetching data from: Cardano
>> Repo # 9 Solana
...finished fetching data from: Solana
>> Repo # 10 Avalanche
.finished fetching data from: Avalanche
>> Repo # 11 Polkadot
.finished fetching data from: Polkadot
>> Repo # 12 Binance USD
finished fetching data from: Binance USD
>> Repo # 13 Dogecoin
finished fetching data from: Dogecoin
>> Repo # 14 TerraUSD
finished fetching data from: TerraUSD
>> Repo # 15 Shiba Inu
>> Repo # 16 Polygon
finished fetching data from: Polygon
>> Repo # 17 Wrapped Bitcoin
finished fetching data from: Wrapped Bitcoin
>> Repo # 18 Cronos
>> Repo # 19 Dai
finished fetching data from: Dai
>> 

Git https://github.com/YOUengine/ReadMe repository is empty; unable to get the log


finished fetching data from: yOUcash
>> Repo # 214 Venus BNB
finished fetching data from: Venus BNB
>> Repo # 215 Liquity USD
finished fetching data from: Liquity USD
>> Repo # 216 BitDAO
finished fetching data from: BitDAO
>> Repo # 217 NXM
finished fetching data from: NXM
>> Repo # 218 Counos X
finished fetching data from: Counos X
>> Repo # 219 WEMIX
finished fetching data from: WEMIX
>> Repo # 220 KOK
finished fetching data from: KOK
>> Repo # 221 ECOMI
>> Repo # 222 HUSD
>> Repo # 223 MetisDAO
finished fetching data from: MetisDAO
>> Repo # 224 The Transfer Token
finished fetching data from: The Transfer Token
>> Repo # 225 WhiteCoin
finished fetching data from: WhiteCoin
>> Repo # 226 MobileCoin
finished fetching data from: MobileCoin
>> Repo # 227 Lido DAO Token
finished fetching data from: Lido DAO Token
>> Repo # 228 Rocket Pool
finished fetching data from: Rocket Pool
>> Repo # 229 Pirate Chain
finished fetching data from: Pirate Chain
>> Repo # 230 Sapphire
finished fetching

Git https://github.com/hyperdao/hdd repository is empty; unable to get the log


finished fetching data from: HyperDAO
>> Repo # 533 TokenPocket
finished fetching data from: TokenPocket
>> Repo # 534 BoringDAO
finished fetching data from: BoringDAO
>> Repo # 535 KILT Protocol
finished fetching data from: KILT Protocol
>> Repo # 536 Morpheus.Network
>> Repo # 537 BarnBridge
finished fetching data from: BarnBridge
>> Repo # 538 RChain
finished fetching data from: RChain
>> Repo # 539 Maro
finished fetching data from: Maro
>> Repo # 540 XeniosCoin
finished fetching data from: XeniosCoin
>> Repo # 541 Refereum
>> Repo # 542 CargoX
>> Repo # 543 Wrapped NXM
>> Repo # 544 Bluzelle
>> Repo # 545 Tornado Cash
finished fetching data from: Tornado Cash
>> Repo # 546 Groestlcoin
..finished fetching data from: Groestlcoin
>> Repo # 547 Kava Lend
>> Repo # 548 Cortex
finished fetching data from: Cortex
>> Repo # 549 Gas
>> Repo # 550 Frontier
finished fetching data from: Frontier
>> Repo # 551 Manchester City Fan Token
>> Repo # 552 DODO
finished fetching data from: DODO
>> Rep

In [48]:
allData.info()            nn

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Columns: 413 entries, https://github.com/bitcoin/bitcoin to https://github.com/Bytom/bytom
dtypes: int64(413)
memory usage: 77.6 KB


In [49]:
allData.to_csv("allData600.csv",index=False)