In [22]:
import numpy as np
import pandas as pd
import requests
import json
import keyring
import time
import datetime
from requests.structures import CaseInsensitiveDict
import matplotlib.pyplot as plt
import plotly.express as px
import time

* [Docs for the API](https://certificationapi.oshwa.org/documentation#section/Introduction)
* [The website version is here](https://certification.oshwa.org/list.html)
* Step one, get a token!
    * Fill out the form [here](https://certificationapi.oshwa.org/)
    * There is a pop-up that will have a big long string of letters and nubmers

* Step two, let's add that token to our our keyring
    * On Linux install keyring with, `sudo apt install keyring`
    * Copy the api key and then add it to keyring by running  `keyring set oshwa_api NoName` and pasting the api key
    * To check the keyring run `keyring set oshwa_api NoName`
    * Add it to your password manager if you use one. 
* Next we'll add the keyring safely to our notebook

In [23]:
# We'll get our first key
oshwa_key = keyring.get_password("oshwa_api","NoName")
if oshwa_key is None:
    print('Failed to get API key')
else:
    print("Got API Key")

Got API Key


# Now we're going to make our first request. We'll need our endpoint and a token.
# [This tool is really helpful for figuring out request formats](https://reqbin.com/req/python/5k564bhv/get-request-with-bearer-token-authorization-header) 

In [24]:
all_items = []
url = "https://certificationapi.oshwa.org/api/projects/"
headers = CaseInsensitiveDict()
headers["Accept"] = "application/json"
headers["Authorization"] = "Bearer {0}".format(oshwa_key)
params = {"offset":0,"limit":100}
# do the first request so we know how many items there are and we can page through
resp = requests.get(url, headers=headers, params=params)
print(resp.status_code)
if resp.status_code == 200:
    cert_list = resp.json()
    all_items.extend(cert_list["items"])
    total = cert_list["total"]
    limit = cert_list["limit"]
    for i in range(limit,total,limit):
        print("Getting batch {0} to {1}".format(i,i+limit))
        params = {"offset":i,"limit":100}
        # do the first request so we know how many items there are and we can page through
        resp = requests.get(url, headers=headers, params=params)
        if resp.status_code == 200:
            temp = resp.json()
            all_items.extend(temp["items"])
        else:
            print("Failed to get batch.")
        time.sleep(1)

200
Getting batch 100 to 200
Getting batch 200 to 300
Getting batch 300 to 400
Getting batch 400 to 500
Getting batch 500 to 600
Getting batch 600 to 700
Getting batch 700 to 800
Getting batch 800 to 900
Getting batch 900 to 1000
Getting batch 1000 to 1100
Getting batch 1100 to 1200
Getting batch 1200 to 1300
Getting batch 1300 to 1400
Getting batch 1400 to 1500
Getting batch 1500 to 1600


In [26]:
# Now we're going to convert this data to a pandas dataframe and clean up the time
df = pd.DataFrame(data=all_items)
df.to_csv("RawData.csv")
df['certificationDate'] = pd.to_datetime(df['certificationDate'], format="%Y-%m-%d %H:%M:%S")
df.head()
print("Got {0} projects".format(len(df)))

Got 1558 projects


In [27]:
# Let's now just pick certifications from 2020
start = datetime.datetime(2020,1,1,tzinfo=datetime.timezone.utc)
stop = datetime.datetime(2020,12,31,tzinfo=datetime.timezone.utc)
df2020 = df.loc[(df['certificationDate'] >= start) & 
                (df['certificationDate'] <  stop)] 
print("{0} projects from 2020.".format(len(df2020)))

949 projects from 2020.


In [39]:
countries = df2020.country.unique()
print("Unique countries {0} -- {1}".format(len(countries),countries))
parties = df2020.responsibleParty.unique()
print("Unique Entities {0} -- {1}".format(len(parties),df2020.responsibleParty.unique()))

Unique countries 37 -- ['United States of America' 'India' 'Switzerland' 'Australia' 'Ecuador'
 'Sweden' 'Turkey' 'Japan' 'Bulgaria' 'Belgium' 'Poland' 'France'
 'Germany' 'Spain' 'Croatia' 'Azerbaijan' 'United Kingdom' 'Indonesia'
 'Sri Lanka' 'Portugal' 'Russia' 'Greece' 'Finland' 'Czech Republic'
 'Mauritius' 'Taiwan' 'Mexico' 'Thailand' 'Ireland' 'El Salvador' 'Canada'
 'Costa Rica' 'Guatemala' 'Lithuania' 'China' 'Netherlands' 'Brazil']
Unique Entities 128 -- ['Field Ready' 'Amal Mathew' 'Radomir Dopieralski' 'Whatnick INC'
 'Jedidiah Hodson' 'OSHWA' 'Eric Olinger' 'Jonathan Cagua'
 'Jonathan Cagua Ordoñez' 'arturo182' 'Ali Tekin'
 'CHIRIMEN OPEN HARDWARE COMMUNITY' 'Gregory Davill' 'Aziz Wadi'
 'ANAVI TECHNOLOGY' 'Vadim Shlyonskiy' '3mdeb embedded systems consulting'
 'TurtleForGaming' 'Rush Robbins' 'OLIMEX Ltd' 'Staudt Technologies GmbH'
 'Meister Whiteboxes GmbH' 'BeagleBoard.org Foundation'
 'Adafruit Industries, LLC' 'Miguel Madrid' 'SparkFun Electronics'
 'Digital Sqrt' 'Pr

In [40]:
# Get the number of certs by country
country_data = df2020.groupby("country").count().sort_values("oshwaUid",ascending=False)

In [41]:
fig = px.bar(country_data,y="oshwaUid",title="2020 Certifications by Origin")
fig.show()

In [42]:
rp_data = df2020.groupby("responsibleParty").count()
fig = px.histogram(rp_data,x="oshwaUid",nbins=10,title="2020 Certifications Histogram of Certs by Party")
fig.show()

In [43]:
rp_data = rp_data[rp_data["oshwaUid"]>1]
fig = px.bar(rp_data,y="oshwaUid",title="2020 Certifications by Person/Org")
fig.show()

In [44]:
df2020 = df2020.sort_values("certificationDate")
df2020['certification_number'] = np.arange(0,len(df2020))
fig = px.area(df2020, y="certification_number",x="certificationDate",title="Certification by Day 2020")
fig.show()

In [45]:
pt_data = df2020.groupby("primaryType").count().sort_values("oshwaUid",ascending=False)
fig = px.bar(pt_data,y="oshwaUid",title="2020 Certifications by Primary Type")
fig.show()

In [46]:
hl_data = df2020.groupby("hardwareLicense").count().sort_values("oshwaUid",ascending=False)
fig = px.bar(hl_data,y="oshwaUid",title="2020 Certifications by Hardware License")
fig.show()

In [47]:
hl_data = df2020.groupby("hardwareLicense").count().sort_values("oshwaUid",ascending=False)
fig = px.bar(hl_data,y="oshwaUid",title="2020 Certifications by Hardware License")
fig.show()

In [48]:
dl_data = df2020.groupby("documentationLicense").count().sort_values("oshwaUid",ascending=False)
fig = px.bar(dl_data,y="oshwaUid",title="2020 Certifications by Documentation License")
fig.show()