## Zenodo API

In [1]:
#import libraries
import os
import requests
import pandas as pd
import pickle
import pprint as pp
from flatten_json import flatten

In [2]:
# Load credentials
with open('credentials.pkl', 'rb') as credentials:
        ZENODO_TOKEN = pickle.load(credentials)['ZENODO_TOKEN']

In [3]:
#General Zenodo requests format
#response = requests.get('https://zenodo.org/api/records',
#                        params={'q': 'my title',
#                                'access_token': ACCESS_TOKEN})
#print(response.json())

In [4]:
#Specify search term
SEARCH_TERM = 'machine learning'

In [5]:
#Specify parameters
PARAMS = {'q': SEARCH_TERM,
         'access_token': ZENODO_TOKEN}

In [6]:
## Search
response = requests.get('https://zenodo.org/api/records', #Records — search published records
                        params = PARAMS)

In [7]:
## Put output into json format
output = response.json()

In [8]:
pp.pprint(output)

#first section of json is summary of results (number by file type, format, etc)
#"aggregations" section
#this is good info - want to extract as separate summary table

#the section with object details is "hits"
#we want to extract and collapse to have hits metadata as tabular form

#for later consideration:
#do we want everything, or just "open"?
#do we just want most recent version? (probably)

{'aggregations': {'access_right': {'buckets': [{'doc_count': 34927,
                                                'key': 'open'},
                                               {'doc_count': 443,
                                                'key': 'closed'},
                                               {'doc_count': 275,
                                                'key': 'restricted'},
                                               {'doc_count': 67,
                                                'key': 'embargoed'}],
                                   'doc_count_error_upper_bound': 0,
                                   'sum_other_doc_count': 0},
                  'file_type': {'buckets': [{'doc_count': 27541, 'key': 'pdf'},
                                            {'doc_count': 3130, 'key': 'zip'},
                                            {'doc_count': 858, 'key': 'docx'},
                                            {'doc_count': 651, 'key': 'txt'},
                  

                                             'links': {'self': 'https://zenodo.org/api/grants/10.13039/501100000780::612944'},
                                             'program': 'FP7',
                                             'title': 'Learning from Massive, '
                                                      'Incompletely annotated, '
                                                      'and Structured Data'},
                                            {'acronym': 'LANDMARK',
                                             'code': '635201',
                                             'funder': {'acronyms': [],
                                                        'doi': '10.13039/501100000780',
                                                        'links': {'self': 'https://zenodo.org/api/funders/10.13039/501100000780'},
                                                        'name': 'European '
                                                                'Commissi

                    'metadata': {'access_right': 'open',
                                 'access_right_category': 'success',
                                 'communities': [{'id': 'ai_ml'},
                                                 {'id': 'covid-19'}],
                                 'creators': [{'affiliation': 'Machine '
                                                              'Learning Lab, '
                                                              'School of '
                                                              'Medicine and '
                                                              'Health Science, '
                                                              'University of '
                                                              'Oldenburg, '
                                                              'Germany',
                                               'name': 'Drefs, Jakob'},
                                              {'affi

In [None]:
#Multiple levels here - will need to decide how best to unnest

In [9]:
## Convert output to pd dataframe and see table format
pd_output1 = pd.json_normalize(output)

pd_output1
#potential for splitting out aggregation vs hits
#could split into two dfs and then continue to collapse to columns

Unnamed: 0,aggregations.access_right.buckets,aggregations.access_right.doc_count_error_upper_bound,aggregations.access_right.sum_other_doc_count,aggregations.file_type.buckets,aggregations.file_type.doc_count_error_upper_bound,aggregations.file_type.sum_other_doc_count,aggregations.keywords.buckets,aggregations.keywords.doc_count_error_upper_bound,aggregations.keywords.sum_other_doc_count,aggregations.type.buckets,aggregations.type.doc_count_error_upper_bound,aggregations.type.sum_other_doc_count,hits.hits,hits.total,links.next,links.self
0,"[{'doc_count': 34927, 'key': 'open'}, {'doc_co...",0,0,"[{'doc_count': 27541, 'key': 'pdf'}, {'doc_cou...",0,3710,"[{'doc_count': 901, 'key': 'machine learning'}...",0,93629,"[{'doc_count': 27757, 'key': 'publication', 's...",0,0,"[{'conceptdoi': '10.5281/zenodo.3662112', 'con...",35712,https://zenodo.org/api/records/?sort=bestmatch...,https://zenodo.org/api/records/?sort=bestmatch...


In [None]:
#alternate flattening method

#output_flattened = [flatten(x) for x in output]
#df = pd.DataFrame(output_flattened)

#AssertionError: flatten requires a dictionary input - thought output WAS a dictionary...

In [None]:
# Task 1: extract just hits.hits as list

In [10]:
pd_output2 = list(pd_output1['hits.hits'])

#naming as sequential objects for now, so can see if any errors along the way
#downstream this should all be condensed

In [11]:
pd_output2
#now it's a list in a list, which wasn't intended
#want to just extract the contents of hits.hits column as a single list, then unnest

[[{'conceptdoi': '10.5281/zenodo.3662112',
   'conceptrecid': '3662112',
   'created': '2020-02-11T09:00:48.461959+00:00',
   'doi': '10.5281/zenodo.3662113',
   'files': [{'bucket': '54dc6794-9135-4217-af9e-cc1649960b90',
     'checksum': 'md5:c4da02730a59a3dea2f1cbad807ac198',
     'key': 'opium-sh/prl-v0.1.0.zip',
     'links': {'self': 'https://zenodo.org/api/files/54dc6794-9135-4217-af9e-cc1649960b90/opium-sh/prl-v0.1.0.zip'},
     'size': 54220,
     'type': 'zip'}],
   'id': 3662113,
   'links': {'badge': 'https://zenodo.org/badge/doi/10.5281/zenodo.3662113.svg',
    'bucket': 'https://zenodo.org/api/files/54dc6794-9135-4217-af9e-cc1649960b90',
    'conceptbadge': 'https://zenodo.org/badge/doi/10.5281/zenodo.3662112.svg',
    'conceptdoi': 'https://doi.org/10.5281/zenodo.3662112',
    'doi': 'https://doi.org/10.5281/zenodo.3662113',
    'html': 'https://zenodo.org/record/3662113',
    'latest': 'https://zenodo.org/api/records/3662113',
    'latest_html': 'https://zenodo.org/reco

In [12]:
pd_output3 = pd.DataFrame(pd_output2)

In [13]:
pd_output3
#not quite what we want, but not a terrible start: each column is a search result

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,"{'conceptdoi': '10.5281/zenodo.3662112', 'conc...","{'conceptdoi': '10.5281/zenodo.4738769', 'conc...","{'conceptdoi': '10.5281/zenodo.3530883', 'conc...","{'conceptrecid': '4768051', 'created': '2021-0...","{'conceptrecid': '3715017', 'created': '2020-0...","{'conceptrecid': '3461067', 'created': '2019-0...","{'conceptrecid': '2559571', 'created': '2019-0...","{'conceptrecid': '637569', 'created': '2016-06...","{'conceptdoi': '10.5281/zenodo.591889', 'conce...","{'conceptdoi': '10.5281/zenodo.4559516', 'conc..."


In [14]:
#swap rows and columns
pd_output4 = pd_output3.transpose()

In [15]:
pd_output4

Unnamed: 0,0
0,"{'conceptdoi': '10.5281/zenodo.3662112', 'conc..."
1,"{'conceptdoi': '10.5281/zenodo.4738769', 'conc..."
2,"{'conceptdoi': '10.5281/zenodo.3530883', 'conc..."
3,"{'conceptrecid': '4768051', 'created': '2021-0..."
4,"{'conceptrecid': '3715017', 'created': '2020-0..."
5,"{'conceptrecid': '3461067', 'created': '2019-0..."
6,"{'conceptrecid': '2559571', 'created': '2019-0..."
7,"{'conceptrecid': '637569', 'created': '2016-06..."
8,"{'conceptdoi': '10.5281/zenodo.591889', 'conce..."
9,"{'conceptdoi': '10.5281/zenodo.4559516', 'conc..."


In [16]:
#rename main column
pd_output4.rename(columns = {0: 'Main'}, inplace = True)

In [17]:
pd_output4

Unnamed: 0,Main
0,"{'conceptdoi': '10.5281/zenodo.3662112', 'conc..."
1,"{'conceptdoi': '10.5281/zenodo.4738769', 'conc..."
2,"{'conceptdoi': '10.5281/zenodo.3530883', 'conc..."
3,"{'conceptrecid': '4768051', 'created': '2021-0..."
4,"{'conceptrecid': '3715017', 'created': '2020-0..."
5,"{'conceptrecid': '3461067', 'created': '2019-0..."
6,"{'conceptrecid': '2559571', 'created': '2019-0..."
7,"{'conceptrecid': '637569', 'created': '2016-06..."
8,"{'conceptdoi': '10.5281/zenodo.591889', 'conce..."
9,"{'conceptdoi': '10.5281/zenodo.4559516', 'conc..."


In [None]:
#Task 2: expand each item in dictionary in column(s) into own column

In [18]:
#expand each item in dictionary in main column into own column
pd_output5 = pd_output4['Main'].apply(pd.Series)

In [37]:
pd_output5

#yes!!

Unnamed: 0,conceptdoi,conceptrecid,created,doi,files,id,links,metadata,owners,revision,stats,updated
0,10.5281/zenodo.3662112,3662112,2020-02-11T09:00:48.461959+00:00,10.5281/zenodo.3662113,[{'bucket': '54dc6794-9135-4217-af9e-cc1649960...,3662113,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[90744],4,"{'downloads': 11.0, 'unique_downloads': 8.0, '...",2020-02-12T07:20:58.741012+00:00
1,10.5281/zenodo.4738769,4738769,2021-05-05T10:21:43.604973+00:00,10.5281/zenodo.4738770,[{'bucket': 'fdefeabc-7897-4130-9628-438795c87...,4738770,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[37667],3,"{'downloads': 2.0, 'unique_downloads': 2.0, 'u...",2021-05-05T13:48:11.586654+00:00
2,10.5281/zenodo.3530883,3530883,2019-11-06T21:34:30.631777+00:00,10.5281/zenodo.3530884,[{'bucket': '0b2a69fa-3fcf-46d6-b8e5-fbffe4118...,3530884,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[37667],3,"{'downloads': 42.0, 'unique_downloads': 36.0, ...",2020-01-25T07:26:46.366487+00:00
3,,4768051,2021-05-17T17:53:16.165204+00:00,10.1007/s10994-021-05968-x,[{'bucket': 'a43e8b77-a43a-488c-8e02-489f02047...,4768052,{'badge': 'https://zenodo.org/badge/doi/10.100...,"{'access_right': 'open', 'access_right_categor...",[71235],2,"{'downloads': 9.0, 'unique_downloads': 9.0, 'u...",2021-05-18T01:48:13.633614+00:00
4,,3715017,2020-03-18T13:39:33.176909+00:00,10.1007/s10994-018-5744-y,,3715018,{'badge': 'https://zenodo.org/badge/doi/10.100...,"{'access_right': 'closed', 'access_right_categ...",[94796],4,"{'downloads': 9.0, 'unique_downloads': 4.0, 'u...",2020-03-19T08:20:10.704817+00:00
5,,3461067,2019-09-26T00:12:55.671236+00:00,10.1007/s10994-019-05800-7,[{'bucket': '9503c5c7-366f-42f6-8400-136e06048...,3461068,{'badge': 'https://zenodo.org/badge/doi/10.100...,"{'access_right': 'open', 'access_right_categor...",[78227],4,"{'downloads': 5.0, 'unique_downloads': 3.0, 'u...",2020-01-25T07:26:48.545118+00:00
6,,2559571,2019-02-07T21:44:19.241776+00:00,10.1109/ICMLC.2016.7872990,,2559572,{'badge': 'https://zenodo.org/badge/doi/10.110...,"{'access_right': 'closed', 'access_right_categ...",[59932],3,"{'downloads': 6.0, 'unique_downloads': 2.0, 'u...",2019-02-08T12:18:35.011395+00:00
7,,637569,2016-06-29T11:14:32+00:00,10.5281/zenodo.56379,[{'bucket': 'a1be4284-1b73-43e1-873e-46e101045...,56379,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[20166],10,"{'downloads': 34.0, 'unique_downloads': 31.0, ...",2020-01-20T17:36:19.201388+00:00
8,10.5281/zenodo.591889,591889,2017-07-09T01:32:11.013300+00:00,10.5281/zenodo.824572,[{'bucket': 'dcdb6092-c0a9-4ca8-93d6-359d0d68b...,824572,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[27182],6,"{'downloads': 5.0, 'unique_downloads': 5.0, 'u...",2020-01-25T07:26:17.138411+00:00
9,10.5281/zenodo.4559516,4559516,2021-02-25T13:59:22.292039+00:00,10.5281/zenodo.4559517,[{'bucket': 'a3d35e2c-f833-4d73-a6c9-0ec34f1c4...,4559517,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[199333],4,"{'downloads': 41.0, 'unique_downloads': 24.0, ...",2021-03-02T09:53:06.245350+00:00


In [None]:
#needs a BUNCH of clean up, but this workflow will work

In [55]:
list(pd_output5.columns)

['conceptdoi',
 'conceptrecid',
 'created',
 'doi',
 'files',
 'id',
 'links',
 'metadata',
 'owners',
 'revision',
 'stats',
 'updated']

In [None]:
#columns that need to expanded from dictionary:
#files
#links
#metadata
#stats

#this could be a loop...for now, testing as singular in case any errors pop up

In [42]:
#example for 'stats'
print(pd_output5.stats.head())

0    {'downloads': 11.0, 'unique_downloads': 8.0, '...
1    {'downloads': 2.0, 'unique_downloads': 2.0, 'u...
2    {'downloads': 42.0, 'unique_downloads': 36.0, ...
3    {'downloads': 9.0, 'unique_downloads': 9.0, 'u...
4    {'downloads': 9.0, 'unique_downloads': 4.0, 'u...
Name: stats, dtype: object


In [47]:
#expand 'stats' column to see what it looks like
pd_output5['stats'].apply(pd.Series)

Unnamed: 0,downloads,unique_downloads,unique_views,version_downloads,version_unique_downloads,version_unique_views,version_views,version_volume,views,volume
0,11.0,8.0,160.0,11.0,8.0,160.0,176.0,596420.0,176.0,596420.0
1,2.0,2.0,12.0,2.0,2.0,12.0,13.0,266634.0,13.0,266634.0
2,42.0,36.0,230.0,42.0,36.0,230.0,245.0,582858234.0,245.0,582858234.0
3,9.0,9.0,9.0,9.0,9.0,9.0,9.0,27002502.0,9.0,27002502.0
4,9.0,4.0,31.0,9.0,4.0,31.0,37.0,9460674.0,37.0,9460674.0
5,5.0,3.0,47.0,5.0,3.0,47.0,53.0,76766869.0,53.0,76766869.0
6,6.0,2.0,43.0,6.0,2.0,43.0,49.0,5368386.0,49.0,5368386.0
7,34.0,31.0,80.0,34.0,31.0,80.0,90.0,9252692.0,90.0,9252692.0
8,5.0,5.0,106.0,7.0,7.0,148.0,158.0,57373822.0,108.0,45867490.0
9,41.0,24.0,394.0,41.0,24.0,394.0,414.0,77792175.0,414.0,77792175.0


In [48]:
#do this expansion & append to main df
pd_output6 = pd_output5.join(pd_output5['stats'].apply(pd.Series))

In [49]:
pd_output6
#this makes me oddly nervous because not joining by a unique ID, but no reason it wouldn't work
#maybe keep an eye out for any weird edge cases, like empty cells in 'stats' that would error out?

Unnamed: 0,conceptdoi,conceptrecid,created,doi,files,id,links,metadata,owners,revision,...,downloads,unique_downloads,unique_views,version_downloads,version_unique_downloads,version_unique_views,version_views,version_volume,views,volume
0,10.5281/zenodo.3662112,3662112,2020-02-11T09:00:48.461959+00:00,10.5281/zenodo.3662113,[{'bucket': '54dc6794-9135-4217-af9e-cc1649960...,3662113,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[90744],4,...,11.0,8.0,160.0,11.0,8.0,160.0,176.0,596420.0,176.0,596420.0
1,10.5281/zenodo.4738769,4738769,2021-05-05T10:21:43.604973+00:00,10.5281/zenodo.4738770,[{'bucket': 'fdefeabc-7897-4130-9628-438795c87...,4738770,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[37667],3,...,2.0,2.0,12.0,2.0,2.0,12.0,13.0,266634.0,13.0,266634.0
2,10.5281/zenodo.3530883,3530883,2019-11-06T21:34:30.631777+00:00,10.5281/zenodo.3530884,[{'bucket': '0b2a69fa-3fcf-46d6-b8e5-fbffe4118...,3530884,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[37667],3,...,42.0,36.0,230.0,42.0,36.0,230.0,245.0,582858234.0,245.0,582858234.0
3,,4768051,2021-05-17T17:53:16.165204+00:00,10.1007/s10994-021-05968-x,[{'bucket': 'a43e8b77-a43a-488c-8e02-489f02047...,4768052,{'badge': 'https://zenodo.org/badge/doi/10.100...,"{'access_right': 'open', 'access_right_categor...",[71235],2,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,27002502.0,9.0,27002502.0
4,,3715017,2020-03-18T13:39:33.176909+00:00,10.1007/s10994-018-5744-y,,3715018,{'badge': 'https://zenodo.org/badge/doi/10.100...,"{'access_right': 'closed', 'access_right_categ...",[94796],4,...,9.0,4.0,31.0,9.0,4.0,31.0,37.0,9460674.0,37.0,9460674.0
5,,3461067,2019-09-26T00:12:55.671236+00:00,10.1007/s10994-019-05800-7,[{'bucket': '9503c5c7-366f-42f6-8400-136e06048...,3461068,{'badge': 'https://zenodo.org/badge/doi/10.100...,"{'access_right': 'open', 'access_right_categor...",[78227],4,...,5.0,3.0,47.0,5.0,3.0,47.0,53.0,76766869.0,53.0,76766869.0
6,,2559571,2019-02-07T21:44:19.241776+00:00,10.1109/ICMLC.2016.7872990,,2559572,{'badge': 'https://zenodo.org/badge/doi/10.110...,"{'access_right': 'closed', 'access_right_categ...",[59932],3,...,6.0,2.0,43.0,6.0,2.0,43.0,49.0,5368386.0,49.0,5368386.0
7,,637569,2016-06-29T11:14:32+00:00,10.5281/zenodo.56379,[{'bucket': 'a1be4284-1b73-43e1-873e-46e101045...,56379,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[20166],10,...,34.0,31.0,80.0,34.0,31.0,80.0,90.0,9252692.0,90.0,9252692.0
8,10.5281/zenodo.591889,591889,2017-07-09T01:32:11.013300+00:00,10.5281/zenodo.824572,[{'bucket': 'dcdb6092-c0a9-4ca8-93d6-359d0d68b...,824572,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[27182],6,...,5.0,5.0,106.0,7.0,7.0,148.0,158.0,57373822.0,108.0,45867490.0
9,10.5281/zenodo.4559516,4559516,2021-02-25T13:59:22.292039+00:00,10.5281/zenodo.4559517,[{'bucket': 'a3d35e2c-f833-4d73-a6c9-0ec34f1c4...,4559517,{'badge': 'https://zenodo.org/badge/doi/10.528...,"{'access_right': 'open', 'access_right_categor...",[199333],4,...,41.0,24.0,394.0,41.0,24.0,394.0,414.0,77792175.0,414.0,77792175.0


In [56]:
#expand 'links' column
links_expand = pd_output6['links'].apply(pd.Series)

links_expand
#has repeat columns: conceptdoi, doi
#a merge does NOT work - expanded conceptdoi has https, so not identical

Unnamed: 0,badge,bucket,conceptbadge,conceptdoi,doi,html,latest,latest_html,self,thumb250,thumbs
0,https://zenodo.org/badge/doi/10.5281/zenodo.36...,https://zenodo.org/api/files/54dc6794-9135-421...,https://zenodo.org/badge/doi/10.5281/zenodo.36...,https://doi.org/10.5281/zenodo.3662112,https://doi.org/10.5281/zenodo.3662113,https://zenodo.org/record/3662113,https://zenodo.org/api/records/3662113,https://zenodo.org/record/3662113,https://zenodo.org/api/records/3662113,,
1,https://zenodo.org/badge/doi/10.5281/zenodo.47...,https://zenodo.org/api/files/fdefeabc-7897-413...,https://zenodo.org/badge/doi/10.5281/zenodo.47...,https://doi.org/10.5281/zenodo.4738769,https://doi.org/10.5281/zenodo.4738770,https://zenodo.org/record/4738770,https://zenodo.org/api/records/4738770,https://zenodo.org/record/4738770,https://zenodo.org/api/records/4738770,,
2,https://zenodo.org/badge/doi/10.5281/zenodo.35...,https://zenodo.org/api/files/0b2a69fa-3fcf-46d...,https://zenodo.org/badge/doi/10.5281/zenodo.35...,https://doi.org/10.5281/zenodo.3530883,https://doi.org/10.5281/zenodo.3530884,https://zenodo.org/record/3530884,https://zenodo.org/api/records/3530884,https://zenodo.org/record/3530884,https://zenodo.org/api/records/3530884,,
3,https://zenodo.org/badge/doi/10.1007/s10994-02...,https://zenodo.org/api/files/a43e8b77-a43a-488...,,,https://doi.org/10.1007/s10994-021-05968-x,https://zenodo.org/record/4768052,https://zenodo.org/api/records/4768052,https://zenodo.org/record/4768052,https://zenodo.org/api/records/4768052,,
4,https://zenodo.org/badge/doi/10.1007/s10994-01...,,,,https://doi.org/10.1007/s10994-018-5744-y,https://zenodo.org/record/3715018,https://zenodo.org/api/records/3715018,https://zenodo.org/record/3715018,https://zenodo.org/api/records/3715018,,
5,https://zenodo.org/badge/doi/10.1007/s10994-01...,https://zenodo.org/api/files/9503c5c7-366f-42f...,,,https://doi.org/10.1007/s10994-019-05800-7,https://zenodo.org/record/3461068,https://zenodo.org/api/records/3461068,https://zenodo.org/record/3461068,https://zenodo.org/api/records/3461068,,
6,https://zenodo.org/badge/doi/10.1109/ICMLC.201...,,,,https://doi.org/10.1109/ICMLC.2016.7872990,https://zenodo.org/record/2559572,https://zenodo.org/api/records/2559572,https://zenodo.org/record/2559572,https://zenodo.org/api/records/2559572,,
7,https://zenodo.org/badge/doi/10.5281/zenodo.56...,https://zenodo.org/api/files/a1be4284-1b73-43e...,,,https://doi.org/10.5281/zenodo.56379,https://zenodo.org/record/56379,https://zenodo.org/api/records/56379,https://zenodo.org/record/56379,https://zenodo.org/api/records/56379,,
8,https://zenodo.org/badge/doi/10.5281/zenodo.82...,https://zenodo.org/api/files/dcdb6092-c0a9-4ca...,https://zenodo.org/badge/doi/10.5281/zenodo.59...,https://doi.org/10.5281/zenodo.591889,https://doi.org/10.5281/zenodo.824572,https://zenodo.org/record/824572,https://zenodo.org/api/records/824572,https://zenodo.org/record/824572,https://zenodo.org/api/records/824572,,
9,https://zenodo.org/badge/doi/10.5281/zenodo.45...,https://zenodo.org/api/files/a3d35e2c-f833-4d7...,https://zenodo.org/badge/doi/10.5281/zenodo.45...,https://doi.org/10.5281/zenodo.4559516,https://doi.org/10.5281/zenodo.4559517,https://zenodo.org/record/4559517,https://zenodo.org/api/records/4559517,https://zenodo.org/record/4559517,https://zenodo.org/api/records/4559517,https://zenodo.org/api/iiif/v2/a3d35e2c-f833-4...,{'10': 'https://zenodo.org/record/4559517/thum...


In [None]:
#expand 'metadata' column and add columns

In [None]:
#expand 'stats' column and add columns

## OLD NOTES

In [None]:
#COLLAPSE METHOD 1

#collapse json using flatten_json (https://github.com/amirziai/flatten)
zenodo_search = flatten(response.json())
print(zenodo_search)

#this is a FULL collapse - good start but not quite what we want

In [None]:
type(zenodo_search)

In [None]:
#COLLAPSE METHOD 2

#alternate flattening pandas method
zenodo_search_pd = pd.io.json.json_normalize(response.json())
zenodo_search_pd

#potential for splitting out aggregation vs hits
#could split into two dfs and then continue to collapse to columns

In [None]:
#COLLAPSE METHOD 3

#We want something like this (maybe?): https://github.com/amirziai/flatten

dic = [
    {"a": 1, "b": 2, "c": {"d": 3, "e": 4}},
    {"a": 0.5, "c": {"d": 3.2}},
    {"a": 0.8, "b": 1.8},
]

dic_flattened = [flatten(d) for d in dic]

df = pd.DataFrame(dic_flattened)

In [None]:
dic

In [None]:
dic_flattened

In [None]:
df