# Nobel prize analysis with API

The objective here is to get the nobel prize data via API and perform some analysis by converting data into dataframes.

In [1]:
# extension to help clean python code
%load_ext lab_black

## 1. Packages to install

In [3]:
# loading packages as per other notebook in repository
import json
import requests
import numpy as np
import pandas as pd
from pandas import json_normalize

## 2. Load data

In [4]:
# url to use for query
url = "http://api.nobelprize.org/v1/prize.json"

In [9]:
# get and deserialize data into dictionary
data = requests.get(url).json()
type(data)

dict

In [11]:
# save data in json format
with open("nobel_prize.json", "w") as out_file:
    json.dump(data, out_file)

In [12]:
# load data from nobel_prize.json if needed
# data = json.load(open('nobel_prize.json)

## 3. Explore data

In [17]:
# key values
data.keys()

dict_keys(['prizes'])

In [19]:
# explore the content of key 'prizes'
data["prizes"][:2]

[{'year': '2022',
  'category': 'chemistry',
  'laureates': [{'id': '1015',
    'firstname': 'Carolyn',
    'surname': 'Bertozzi',
    'motivation': '"for the development of click chemistry and bioorthogonal chemistry"',
    'share': '3'},
   {'id': '1016',
    'firstname': 'Morten',
    'surname': 'Meldal',
    'motivation': '"for the development of click chemistry and bioorthogonal chemistry"',
    'share': '3'},
   {'id': '743',
    'firstname': 'Barry',
    'surname': 'Sharpless',
    'motivation': '"for the development of click chemistry and bioorthogonal chemistry"',
    'share': '3'}]},
 {'year': '2022',
  'category': 'economics',
  'laureates': [{'id': '1021',
    'firstname': 'Ben',
    'surname': 'Bernanke',
    'motivation': '"for research on banks and financial crises"',
    'share': '3'},
   {'id': '1022',
    'firstname': 'Douglas',
    'surname': 'Diamond',
    'motivation': '"for research on banks and financial crises"',
    'share': '3'},
   {'id': '1023',
    'firstna

In [22]:
# check inner keys for single dictionary within key 'prizes'
data["prizes"][0].keys()

dict_keys(['year', 'category', 'laureates'])

## 4. Number of Nobel prizes given per category

Solution with dictionaries:

In [25]:
# 'prizes' key data
data_prizes = data["prizes"]

# empty lists
years = []
categories = []

# for loop
for item in data_prizes:
    years.append(item["year"])
    categories.append(item["category"])

# check output
print(years[:5])
print(categories[:5])

['2022', '2022', '2022', '2022', '2022']
['chemistry', 'economics', 'literature', 'peace', 'physics']


In [33]:
# find sorted unique elements of array
unique_categories = np.unique(categories)

In [34]:
# count nobel prizes per category using dictionary comprehension
category_counts = {k: categories.count(k) for k in unique_categories}

category_counts

{'chemistry': 122,
 'economics': 54,
 'literature': 122,
 'medicine': 122,
 'peace': 122,
 'physics': 122}

Solution with dataframes:

In [43]:
# normalize semi-structured JSON data into a flat table
df = json_normalize(data["prizes"])
df.head()

Unnamed: 0,year,category,laureates,overallMotivation
0,2022,chemistry,"[{'id': '1015', 'firstname': 'Carolyn', 'surna...",
1,2022,economics,"[{'id': '1021', 'firstname': 'Ben', 'surname':...",
2,2022,literature,"[{'id': '1017', 'firstname': 'Annie', 'surname...",
3,2022,peace,"[{'id': '1018', 'firstname': 'Ales', 'surname'...",
4,2022,physics,"[{'id': '1012', 'firstname': 'Alain', 'surname...",


In [49]:
# groupby category, add count column, rename columns, sort descending, reset index
df.groupby(["category"]).agg({"category": "count"}).rename(
    columns={"category": "count"}
).sort_values("count", ascending=False).reset_index()

Unnamed: 0,category,count
0,chemistry,122
1,literature,122
2,medicine,122
3,peace,122
4,physics,122
5,economics,54
