# Various Routines to Harvest CRIM Metadata from Production Server

### Just the basics here, allowing interaction with "request" as a way to retrieve individual Observations and Relationships

In [2]:
import requests
import pandas as pd

# Variables
Now we can set a variable, in this case the URL of a single Observation in CRIM 

In [7]:
Obs_url = "https://crimproject.org/data/observations/2/"

And if we call for that variable, it will tell us what it is:

In [8]:
Obs_url

'https://crimproject.org/data/observations/2/'

# Requests
Now defining a new variable, which itself is a "get request" for our first variable:

In [9]:
response = requests.get(Obs_url)

In [10]:
type(response)

requests.models.Response

And now the json representation of that variable:

In [12]:
Obs_json = response.json()

In [13]:
Obs_json

{'url': 'https://crimproject.org/data/observations/2/',
 'id': 2,
 'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1012/',
  'name': 'Ian Lorenz'},
 'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Mass_0009_3/',
  'piece_id': 'CRIM_Mass_0009_3',
  'full_title': 'Missa Tota pulchra es: Credo',
  'roles': [],
  'mei_links': ['https://crimproject.org/mei/CRIM_Mass_0009_3.mei'],
  'pdf_links': ['https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf']},
 'ema': '1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1',
 'musical_type': 'Fuga',
 'relationships_as_model': [],
 'relationships_as_derivative': [{'url': 'https://crimproject.org/data/relationships/1/',
   'id': 1,
   'observer': {'url': 'https://crimproject.org/data/people/CRIM_Person_1012/',
    'name': 'Ian Lorenz'},
   'model_observation': {'url': 'https://crimproject.org/data/observations/1/',
    'id': 1,
    'piece': {'url': 'https://crimproject.org/data/pieces/CRIM_Model_0011/',
     'piece_id':

# Json, Dictionaries, Keys and Values
Json is in fact an elaborate dictionary, with items nested in an order.

In [14]:
type(Obs_json)

dict

We can list the fixed "keys" for that JSON, which are in turned paired with "values".

In [15]:
Obs_json.keys()

dict_keys(['url', 'id', 'observer', 'piece', 'ema', 'musical_type', 'relationships_as_model', 'relationships_as_derivative', 'mt_cf', 'mt_cf_voices', 'mt_cf_dur', 'mt_cf_mel', 'mt_sog', 'mt_sog_voices', 'mt_sog_dur', 'mt_sog_mel', 'mt_sog_ostinato', 'mt_sog_periodic', 'mt_csog', 'mt_csog_voices', 'mt_csog_dur', 'mt_csog_mel', 'mt_cd', 'mt_cd_voices', 'mt_fg', 'mt_fg_voices', 'mt_fg_int', 'mt_fg_tint', 'mt_fg_periodic', 'mt_fg_strict', 'mt_fg_flexed', 'mt_fg_sequential', 'mt_fg_inverted', 'mt_fg_retrograde', 'mt_pe', 'mt_pe_voices', 'mt_pe_int', 'mt_pe_tint', 'mt_pe_strict', 'mt_pe_flexed', 'mt_pe_flt', 'mt_pe_sequential', 'mt_pe_added', 'mt_pe_invertible', 'mt_id', 'mt_id_voices', 'mt_id_int', 'mt_id_tint', 'mt_id_strict', 'mt_id_flexed', 'mt_id_flt', 'mt_id_invertible', 'mt_nid', 'mt_nid_voices', 'mt_nid_int', 'mt_nid_tint', 'mt_nid_strict', 'mt_nid_flexed', 'mt_nid_flt', 'mt_nid_sequential', 'mt_nid_invertible', 'mt_hr', 'mt_hr_voices', 'mt_hr_simple', 'mt_hr_staggered', 'mt_hr_seque

And here we are after the value of just ONE key

In [16]:
Obs_ema = Obs_json["ema"]

In [17]:
Obs_ema

'1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1'

It has a data type:  string

In [18]:
type(Obs_ema)

str

Now calling for various other values for other keys:

In [19]:
Obs_json["musical_type"]

'Fuga'

In [20]:
Obs_mt = Obs_json["musical_type"]

In [21]:
Obs_mt

'Fuga'

The piece key actually is a dictionary within a dictionary, so it has LOTS of keys and values within it.

In [22]:
Obs_piece = Obs_json["piece"]

In [24]:
Obs_piece

{'url': 'https://crimproject.org/data/pieces/CRIM_Mass_0009_3/',
 'piece_id': 'CRIM_Mass_0009_3',
 'full_title': 'Missa Tota pulchra es: Credo',
 'roles': [],
 'mei_links': ['https://crimproject.org/mei/CRIM_Mass_0009_3.mei'],
 'pdf_links': ['https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf']}

And to interact with the items there, we need to call for a key *within* that key.

In [25]:
Obs_mei = Obs_piece["mei_links"]

In [26]:
Obs_mei

['https://crimproject.org/mei/CRIM_Mass_0009_3.mei']

Various ways of calling for items according to their position.  Note:  Zero-based indexing!

In [27]:
len(Obs_mei)

1

In [28]:
Obs_mei[0]

'https://crimproject.org/mei/CRIM_Mass_0009_3.mei'

In [29]:
Obs_json["piece"]["mei_links"][0]

'https://crimproject.org/mei/CRIM_Mass_0009_3.mei'

In [31]:
Obs_json["ema"]

'1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1'

In [32]:
def get_ema_for_observation_id(obs_id):
    # get Obs_url
    url = "https://crimproject.org/data/observations/{}/".format(obs_id)
    return url

In [33]:
def get_ema_for_observation_id(obs_id):
    # get Obs_ema
    my_ema_mei_dictionary = dict()
    url = "https://crimproject.org/data/observations/{}/".format(obs_id)
    response = requests.get(url)
    Obs_json = response.json()
    
    # Obs_ema = Obs_json["ema"]
    
    my_ema_mei_dictionary["id"]=Obs_json["id"]
    my_ema_mei_dictionary["musical type"]=Obs_json["musical_type"]
    my_ema_mei_dictionary["int"]=Obs_json["mt_fg_int"]
    my_ema_mei_dictionary["tint"]=Obs_json["mt_fg_tint"]
    my_ema_mei_dictionary["ema"]=Obs_json["ema"]
    my_ema_mei_dictionary["mei"]=Obs_json["piece"]["mei_links"][0]
    my_ema_mei_dictionary["pdf"]=Obs_json["piece"]["pdf_links"][0]
   
    
    # Obs_piece = Obs_json["piece"]
    # Obs_mei = Obs_piece["mei_links"]
    
    print(f'Got: {obs_id}')
    
    # return {"ema":Obs_ema,"mei":Obs_mei}
    
    return my_ema_mei_dictionary


Now we get a _particular_ observation.

In [34]:
get_ema_for_observation_id(20)

Got: 20


{'id': 20,
 'musical type': 'Fuga',
 'int': '1+',
 'tint': 'M1',
 'ema': '7-8/2-3,2-3/@3-4.5+@2-4,@1+@1',
 'mei': 'https://crimproject.org/mei/CRIM_Mass_0015_2.mei',
 'pdf': 'https://crimproject.org/pdf/CRIM_Mass_0015_2.pdf'}

A new variable that contains the "get_ema" routine.  We will pass a series of numbers to it.

In [35]:
output = get_ema_for_observation_id(20)

Got: 20


In [36]:
# this holds the output as a LIST of DICTS
obs_data_list = []

In [37]:
# this is the list of Observation IDs to call

obs_call_list = [1,3,5,17,21]

In [38]:
# this is the LOOP that runs through the list aboe
# for observ in obs_call_list:

for observ in range(1,11):
    call_list_output = get_ema_for_observation_id(observ)
    
    # the print command simply puts the output in the notebook terminal.  
    #Later we will put it in the List of Dicts.
    
   #  print(call_list_output)  The APPEND function adds one item after each loop.
    obs_data_list.append(call_list_output)
    

Got: 1
Got: 2
Got: 3
Got: 4
Got: 5
Got: 6
Got: 7
Got: 8
Got: 9
Got: 10


In [39]:
# list includes APPEND function that will allow us to add one item after each loop.
#  EX blank_list = [1,5,6]  (note that these are in square brackets as LIST)
# blank_list.append(89)
# range would in parenths as in:  range(1,11)
# here we make a LIST object that contains the Range.  
# This allows it to iterate over the range
# since the range could be HUGE  We can ONLY append a number to a LIST!

Obs_range = list(range(1,11))


Now we call up the list of observations we created above, after appending one at a time to the "[]"

In [40]:
obs_data_list

[{'id': 1,
  'musical type': 'Fuga',
  'int': '4-',
  'tint': 'B2',
  'ema': '1-6/1,1,1-2,1-2,2,2/@1,@1-3,@1-3+@1,@1-3+@1-3,@1-3,@1',
  'mei': 'https://crimproject.org/mei/CRIM_Model_0011.mei',
  'pdf': 'https://crimproject.org/pdf/CRIM_Model_0011.pdf'},
 {'id': 2,
  'musical type': 'Fuga',
  'int': '4-',
  'tint': 'B2',
  'ema': '1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@1-3,@1-3,@1',
  'mei': 'https://crimproject.org/mei/CRIM_Mass_0009_3.mei',
  'pdf': 'https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf'},
 {'id': 3,
  'musical type': 'Fuga',
  'int': '8-',
  'tint': 'B2',
  'ema': '5-9/1,1,1-2,2,2/@3,@1-4,@1-2+@4,@1-4,@1-2',
  'mei': 'https://crimproject.org/mei/CRIM_Model_0011.mei',
  'pdf': 'https://crimproject.org/pdf/CRIM_Model_0011.pdf'},
 {'id': 4,
  'musical type': 'Fuga',
  'int': '5-',
  'tint': 'B1',
  'ema': '5-8/1,1+4,1+4,4/@3,@1-4+@4,@1+@1-3,@1',
  'mei': 'https://crimproject.org/mei/CRIM_Mass_0009_3.mei',
  'pdf': 'https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf'},


# Pandas as Data Frame or CSV

In [41]:
pd.Series(obs_data_list).to_csv("obs_data_list.csv")

  """Entry point for launching an IPython kernel.


In [42]:
# Pandas DataFrame interprets the series of items in each Dict 
# as separate 'cells' (a tab structure)
DF_output = pd.DataFrame(obs_data_list)

In [43]:
DF_output

Unnamed: 0,id,musical type,int,tint,ema,mei,pdf
0,1,Fuga,4-,B2,"1-6/1,1,1-2,1-2,2,2/@1,@1-3,@1-3+@1,@1-3+@1-3,...",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
1,2,Fuga,4-,B2,"1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@...",https://crimproject.org/mei/CRIM_Mass_0009_3.mei,https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf
2,3,Fuga,8-,B2,"5-9/1,1,1-2,2,2/@3,@1-4,@1-2+@4,@1-4,@1-2",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
3,4,Fuga,5-,B1,"5-8/1,1+4,1+4,4/@3,@1-4+@4,@1+@1-3,@1",https://crimproject.org/mei/CRIM_Mass_0009_3.mei,https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf
4,5,Fuga,8-,B2,"5-9,14-18/1,1,1-2,1-2,1-2,3,3,3-4,3-4,3-4/@3,@...",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
5,6,Periodic entry,,,"9-12/3-4,2-4,2,2/@4+@1-4,@4+@1-4+@1-3,@1-3,@1",https://crimproject.org/mei/CRIM_Mass_0009_5.mei,https://crimproject.org/pdf/CRIM_Mass_0009_5.pdf
6,7,Fuga,8-,M9,"5-9/1,1,1-2,2,2/@3,@1-4,@1-2+@4,@1-4,@1-2",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
7,8,Fuga,5-,M5,"5-10/1,1+4,1+4,1+4,1+4,4/@3,@1-4+@4,@1-4.5+@1-...",https://crimproject.org/mei/CRIM_Mass_0009_3.mei,https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf
8,9,Soggetto,,,"1-38/6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6...",https://crimproject.org/mei/CRIM_Model_0018_a4...,https://crimproject.org/pdf/CRIM_Model_0018.pdf
9,10,Cantus firmus,,,"1-41/5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5...",https://crimproject.org/mei/CRIM_Mass_0017_4.mei,https://crimproject.org/pdf/CRIM_Mass_0017_4.pdf


In [44]:
DF_output.to_csv("obs_data_list.csv")

In [45]:
#  two "==" means check for equality
# for 'contains' use str.contains("letter")
# can also use regex in this (for EMA range)
# Filter_by_Type = (DF_output["musical type"]=="Fuga") & (DF_output["id"]==8)
Filter_by_Type = DF_output["musical type"].str.contains("Fuga")

# 

In [46]:
DF_output[Filter_by_Type]

Unnamed: 0,id,musical type,int,tint,ema,mei,pdf
0,1,Fuga,4-,B2,"1-6/1,1,1-2,1-2,2,2/@1,@1-3,@1-3+@1,@1-3+@1-3,...",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
1,2,Fuga,4-,B2,"1-6/1,1,1+3,1+3,3,3/@1-4,@1-3,@1-3+@1-4,@1-3+@...",https://crimproject.org/mei/CRIM_Mass_0009_3.mei,https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf
2,3,Fuga,8-,B2,"5-9/1,1,1-2,2,2/@3,@1-4,@1-2+@4,@1-4,@1-2",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
3,4,Fuga,5-,B1,"5-8/1,1+4,1+4,4/@3,@1-4+@4,@1+@1-3,@1",https://crimproject.org/mei/CRIM_Mass_0009_3.mei,https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf
4,5,Fuga,8-,B2,"5-9,14-18/1,1,1-2,1-2,1-2,3,3,3-4,3-4,3-4/@3,@...",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
6,7,Fuga,8-,M9,"5-9/1,1,1-2,2,2/@3,@1-4,@1-2+@4,@1-4,@1-2",https://crimproject.org/mei/CRIM_Model_0011.mei,https://crimproject.org/pdf/CRIM_Model_0011.pdf
7,8,Fuga,5-,M5,"5-10/1,1+4,1+4,1+4,1+4,4/@3,@1-4+@4,@1-4.5+@1-...",https://crimproject.org/mei/CRIM_Mass_0009_3.mei,https://crimproject.org/pdf/CRIM_Mass_0009_3.pdf
