In [1]:
import pandas as pd
import json
import requests
import time

In [2]:
# All packages 
r = requests.get('https://formulae.brew.sh/api/formula.json')
packages_json = r.json()

In [5]:
# pick the package name
p_name = packages_json[0]['name']

In [6]:
# Each Package Analytics
r_package = requests.get(f'https://formulae.brew.sh/api/formula/{p_name}.json')
package_json = r_package.json()

In [None]:
results = []
start_time = time.perf_counter()
for package_json in packages_json:
    
    #package metadata
    package_name = package_json['name']
    package_desc = package_json['desc']
    package_url = package_json['homepage']
    package_version = package_json['versions']['stable']
    package_license = package_json['license']
    package_dependencies = ", ".join(package_json['dependencies'])
    
    # Each Package Analytics
    r_package = requests.get(f'https://formulae.brew.sh/api/formula/{package_name}.json')
    package_json = r_package.json()

    # Package Analytics
    install_30 = package_json['analytics']['install']['30d'][package_name]
    install_90 = package_json['analytics']['install']['90d'][package_name]
    install_365 = package_json['analytics']['install']['365d'][package_name]
    on_request_30 = package_json['analytics']['install_on_request']['30d'][package_name]
    on_request_90 = package_json['analytics']['install_on_request']['90d'][package_name]
    on_request_365 = package_json['analytics']['install_on_request']['365d'][package_name]
    try:
        build_error_30 = package_json['analytics']['build_error']['30d'][package_name] 
    except KeyError:
        build_error_30 = 0
    try:
        build_error_90 = package_json['analytics']['build_error']['90d'][package_name] 
    except KeyError:
        build_error_90 = 0
    try:
        build_error_365 = package_json['analytics']['build_error']['365d'][package_name] 
    except KeyError:
        build_error_365 = 0
        
    # assemble the package information 
    data = {
        
        'name' : package_name,
        'desc' : package_desc,
        'url' : package_url,
        'version' : package_version,
        'license' : package_license,
        'dependencies' : package_dependencies,
        'install_30':install_30,
        'install_90':install_90,
        'install_365':install_365,
        'on_request_30':on_request_30,
        'on_request_90':on_request_90,
        'on_request_365':on_request_365,
        'build_error_30':build_error_30,
        'build_error_90':build_error_90,
        'build_error_365':build_error_365
          }
    # add data to result
    results.append(data)
    
    # how long to wait before sending the next request
    time.sleep(r.elapsed.total_seconds())
    
    # how long it takes to get a particular package
    print(f'Got {package_name} in {r.elapsed.total_seconds()} seconds')
end_time = time.perf_counter()
total = start_time - end_time

print(f'Collected {len(packages_json)} informations in {total} seconds')

In [10]:
# save a copy as json file
with open('package_info.json','w') as f:
    json.dump(results,f, indent=2)

In [11]:
# convert the result to a dataframe
df = pd.DataFrame.from_dict(results)

In [12]:
df.head().transpose()

Unnamed: 0,0,1,2,3,4
name,a2ps,a52dec,aacgain,aalib,aamath
desc,Any-to-PostScript filter,Library for decoding ATSC A/52 streams (AKA 'A...,AAC-supporting version of mp3gain,Portable ASCII art graphics library,Renders mathematical expressions as ASCII art
url,https://www.gnu.org/software/a2ps/,https://liba52.sourceforge.io/,https://aacgain.altosdesign.com/,https://aa-project.sourceforge.io/aalib/,http://fuse.superglue.se/aamath/
version,4.15,0.7.4,1.8,1.4rc5,0.3
license,GPL-3.0-or-later,GPL-2.0-or-later,GPL-2.0-or-later,GPL-2.0-or-later,GPL-2.0-only
dependencies,"bdw-gc, libpaper",,,,
install_30,43,42,20,60,9
install_90,204,155,133,292,46
install_365,1188,845,629,1499,156
on_request_30,44,14,20,51,9


In [13]:
# export the homebrew data
df.to_csv('homebrew_packages.csv')