In [1]:
import requests
import pandas as pd
import altair as alt
import os

## Setup API

In [2]:
class Libraries:
    def __init__(self, api_file_path='./local/api.txt', api_from_env=False):
        
        self.api_file_path = api_file_path
        self.api_from_env = api_from_env
        
        self.api_key = None
        self.payload = None
        self.url = None
        self.r  = None
        self.json = None
        self.json_flat = None
        
        self.load_api_key()
        
        return
    
    def load_api_key(self):
        if not self.api_from_env:
            with open(self.api_file_path, 'r') as file:
                self.api_key = file.read()
        else:
            self.api_key = os.environ['API_KEY']
        return
    
    def create_payload(self, paramenters=None):
        self.payload = dict()
        self.payload.update({'api_key': self.api_key})
        return
    
    def get_response(self):
        self.r = requests.get(self.url, params=self.payload)
        return
    
    def get_package(self, repository='Pypi', package='requests'):
        self.url = 'https://libraries.io/api/{}/{}'.format(repository, package)
        self.get_response()
        self.json = self.r.json()
        self.json_flat = self.r.json()
        del self.json_flat['versions']
        del self.json_flat['normalized_licenses']
        del self.json_flat['keywords']
        del self.json_flat['latest_stable_release']
        
        return self.json_flat
    
    def get_repository(self, repository='Pypi', package='requests'):
        self.url = 'https://libraries.io/api/{}/{}'.format(repository, package)
        self.get_response()
        self.json = self.r.json()
        self.json_flat = self.r.json()
        del self.json_flat['versions']
        del self.json_flat['normalized_licenses']
        del self.json_flat['keywords']
        del self.json_flat['latest_stable_release']
        
        return self.json_flat
    
lib = Libraries(api_from_env=True)
data = lib.get_package(repository='Pypi', package='seaborn')


In [3]:
lib.json

{'dependent_repos_count': 6393,
 'dependents_count': 69,
 'deprecation_reason': None,
 'description': 'seaborn: statistical data visualization',
 'forks': 0,
 'homepage': 'https://seaborn.pydata.org',
 'keywords': [],
 'language': None,
 'latest_download_url': None,
 'latest_release_number': '0.10.1',
 'latest_release_published_at': '2020-04-26T21:21:58.000Z',
 'latest_stable_release': {'id': 26438997,
  'project_id': 77061,
  'number': '0.10.1',
  'published_at': '2020-04-26T21:21:58.000Z',
  'created_at': '2020-04-26T21:24:13.878Z',
  'updated_at': '2020-04-26T21:24:13.878Z',
  'runtime_dependencies_count': None,
  'spdx_expression': None,
  'original_license': 'BSD (3-clause)',
  'researched_at': None},
 'latest_stable_release_number': '0.10.1',
 'latest_stable_release_published_at': '2020-04-26T21:21:58.000Z',
 'license_normalized': True,
 'licenses': 'BSD (3-clause)',
 'name': 'seaborn',
 'normalized_licenses': ['BSD-1-Clause'],
 'package_manager_url': 'https://pypi.org/project/se

# Create csv of data from packages

In [5]:
packages = [
    ['Pypi', 'seaborn'],
    ['Pypi', 'matplotlib'],
    ['Pypi', 'plotly'],
    ['Pypi', 'bokeh'],
    ['Pypi', 'altair']
]
lib = Libraries(api_from_env=True)

def make_dataframe(packages, lib):
    package_dict = dict()
    i = 0
    for package in packages:
        package_dict[i] = lib.get_package(repository=package[0], package=package[1])
        i =i+1
    return pd.DataFrame.from_dict(package_dict, orient='index').reindex()
        
df =make_dataframe(packages, lib)
df.to_csv('package-data.csv')

In [6]:
df

Unnamed: 0,dependent_repos_count,dependents_count,deprecation_reason,description,forks,homepage,language,latest_download_url,latest_release_number,latest_release_published_at,...,latest_stable_release_published_at,license_normalized,licenses,name,package_manager_url,platform,rank,repository_url,stars,status
0,6393,69,,seaborn: statistical data visualization,0,https://seaborn.pydata.org,,,0.10.1,2020-04-26T21:21:58.000Z,...,2020-04-26T21:21:58.000Z,True,BSD (3-clause),seaborn,https://pypi.org/project/seaborn/,Pypi,14,,0,
1,39552,850,,Python plotting package,5266,https://matplotlib.org,Python,,3.3.0,2020-07-16T22:15:35.000Z,...,2020-07-16T22:15:35.000Z,True,PSF,matplotlib,https://pypi.org/project/matplotlib/,Pypi,26,https://github.com/matplotlib/matplotlib,11973,
2,2491,89,,"An open-source, interactive data visualization...",0,https://plotly.com/python/,,,4.9.0,2020-07-16T12:46:37.000Z,...,2020-07-16T12:46:37.000Z,False,MIT,plotly,https://pypi.org/project/plotly/,Pypi,14,,0,
3,2936,50,,Interactive plots and applications in the brow...,3479,http://github.com/bokeh/bokeh,Python,,2.2.0.dev7,2020-08-03T19:20:14.000Z,...,2020-06-22T21:56:40.000Z,False,BSD-3-Clause,bokeh,https://pypi.org/project/bokeh/,Pypi,22,https://github.com/bokeh/bokeh,13782,
4,185,13,,Altair: A declarative statistical visualizatio...,19,http://altair-viz.github.io,Python,,4.1.0,2020-04-01T13:23:10.000Z,...,2020-04-01T13:23:10.000Z,True,BSD 3-clause,altair,https://pypi.org/project/altair/,Pypi,14,https://github.com/ellisonbg/altair,233,


# Create vega-lite visualization using csv

In [7]:
url = 'https://raw.githubusercontent.com/library-usage/library-sync/master/package-data.csv'

chart = alt.Chart(url, width=400, height=400).mark_point().encode(
    x='rank:Q',
    y='stars:Q',
    color='name:N',
    tooltip='name:N',
).interactive()
chart

In [8]:
chart.save('stars.json')

In [9]:
import altair as alt
from vega_datasets import data
from altair import datum

url = 'https://raw.githubusercontent.com/library-usage/library-sync/master/package-data.csv'
parallel = alt.Chart(url).transform_window(
    index='count()'
).transform_fold(
    ['dependent_repos_count', 'dependents_count', 'stars', 'forks']
).transform_joinaggregate(
     min='min(value)',
     max='max(value)',
     groupby=['key']
).transform_calculate(
    minmax_value=(datum.value-datum.min)/(datum.max-datum.min),
    mid=(datum.min+datum.max)/2
).mark_line().encode(
    x='key:N',
    y='minmax_value:Q',
    color='name:N',
#     detail='index:N',
    opacity=alt.value(0.5)
).properties(width=500).interactive()
parallel

In [10]:
parallel.save('parallel.json')

In [11]:
url = 'https://raw.githubusercontent.com/library-usage/library-sync/master/package-data.csv'

parallel = alt.Chart(url, width=400, height=400).mark_point().encode(
    x='rank:Q',
    y='stars:Q',
    color='name:N',
    tooltip='name:N',
).interactive()
chart

In [12]:
!jupyter nbconvert --to script pull-library-data.ipynb

[NbConvertApp] Converting notebook pull-library-data.ipynb to script
[NbConvertApp] Writing 4254 bytes to pull-library-data.py
