# This script is used to fetch package analytics data from https://formulae.brew.sh/

In [9]:
import requests
import json

# This is the url from where we can fetch different package name 
get_package_url = 'https://formulae.brew.sh/api/formula.json'

# Use requests api to get the contents from the json 
package_res = requests.get(get_package_url)

# sample printout of one package and can identify different fields on the package structure.
package = package_res.json()[0]
package
print(f"{package['name']} description - {package['desc']}")

a2ps description - Any-to-PostScript filter


## Get analytics data of one package

In [23]:
# PACKAGE contains structure of one package
pkg_name = package['name']
pkg_desc = package['desc']

# Constrcution of package url for fetching the analytics data
pkg_json_url = f'https://formulae.brew.sh/api/formula/{pkg_name}.json'
pkg_res = requests.get(pkg_json_url)
pkg_res_data = pkg_res.json()

# Retreiving the analytics data from the json retreived from the package url
pkg_analytics_30d = pkg_res_data['analytics']['install_on_request']['30d'][pkg_name]
pkg_analytics_90d = pkg_res_data['analytics']['install_on_request']['90d'][pkg_name]
pkg_analytics_365d = pkg_res_data['analytics']['install_on_request']['365d'][pkg_name]

print(f'{pkg_name} analytics data for 30d-{pkg_analytics_30d}, 90d-{pkg_analytics_90d}, 365d-{pkg_analytics_365d}')

a2ps analytics data for 30d-82, 90d-324, 365d-1145


## get the data from all the packages

In [26]:
import time
pkg_data = list()

# use start time to calculte the time taken for retreival of all analytics data
start_time = time.perf_counter()

for pkg in package_res.json():
    pkg_dict = dict()
    pkg_dict['pkg_name'] = pkg['name']
    pkg_dict['description'] = pkg['desc']
    
    pkg_url = f'https://formulae.brew.sh/api/formula/{pkg_dict["pkg_name"]}.json'
    pkg_res = requests.get(pkg_url)
    pkg_res_data = pkg_res.json()

    # Retreiving the analytics data from the json retreived from the package url
    pkg_dict['analytics_30d'] = pkg_res_data['analytics']['install_on_request']['30d'][pkg_dict['pkg_name']]
    pkg_dict['analytics_90d'] = pkg_res_data['analytics']['install_on_request']['90d'][pkg_dict['pkg_name']]
    pkg_dict['analytics_365d'] = pkg_res_data['analytics']['install_on_request']['365d'][pkg_dict['pkg_name']]
    pkg_data.append(pkg_dict)

end_time = time.perf_counter()
print(f'Total time took for fetching analytics data of all packages - {end_time - start_time} seconds')

    

Total time took for fetching analytics data of all packages - 890.3446401000001 seconds


In [28]:
print(json.dumps(pkg_data, indent=2))

[
  {
    "pkg_name": "a2ps",
    "description": "Any-to-PostScript filter",
    "analytics_30d": 82,
    "analytics_90d": 324,
    "analytics_365d": 1145
  },
  {
    "pkg_name": "a52dec",
    "description": "Library for decoding ATSC A/52 streams (AKA 'AC-3')",
    "analytics_30d": 20,
    "analytics_90d": 85,
    "analytics_365d": 346
  },
  {
    "pkg_name": "aacgain",
    "description": "AAC-supporting version of mp3gain",
    "analytics_30d": 54,
    "analytics_90d": 149,
    "analytics_365d": 557
  },
  {
    "pkg_name": "aalib",
    "description": "Portable ASCII art graphics library",
    "analytics_30d": 60,
    "analytics_90d": 197,
    "analytics_365d": 867
  },
  {
    "pkg_name": "aamath",
    "description": "Renders mathematical expressions as ASCII art",
    "analytics_30d": 17,
    "analytics_90d": 59,
    "analytics_365d": 221
  },
  {
    "pkg_name": "aardvark_shell_utils",
    "description": "Utilities to aid shell scripts or command-line users",
    "analytics_30d"

## write the data of packages in to csv file

In [58]:
import csv

header = ['pkg_name','description','analytics_30d','analytics_90d','analytics_365d']
# create a new file for writing the details in csv format
# NOTE: you need to use encoding='utf-8', else some entries will fail and when we try to open
#       it fails with  message 'file not properly utf-8 encoded'
with open('package_analytics.csv','w', encoding="utf8") as csv_file:
    pkg_writer = csv.DictWriter(csv_file, fieldnames=header)
    pkg_writer.writeheader()
    for pkg in pkg_data:
        try:
            pkg_writer.writerow(pkg)
        except UnicodeEncodeError as err:
            print(f'{err} observed during {pkg}')
            continue
            
# when used encoding technique in file writing , solved some of the errors which could be seen entries being added to cvs file

## Now we need to parse the csv using pandas and determine the needed statistics

In [59]:
import pandas as pd

pkg_df = pd.read_csv('package_analytics.csv')
pkg_df.head()

Unnamed: 0,pkg_name,description,analytics_30d,analytics_90d,analytics_365d
0,a2ps,Any-to-PostScript filter,82,324,1145
1,a52dec,Library for decoding ATSC A/52 streams (AKA 'A...,20,85,346
2,aacgain,AAC-supporting version of mp3gain,54,149,557
3,aalib,Portable ASCII art graphics library,60,197,867
4,aamath,Renders mathematical expressions as ASCII art,17,59,221
