In [5]:
import json
import pandas as pd

# User story: Getting data easly from Dyned web api
As a researcher I would like to be able to build a dataframe sample from the 1D dined database given the available studies and measurements so that I can easily perform different analysis with data analysis tools.

## Acceptance criteria
- Given a list of measurements and studies associated with this measurements.
- When calling dined method or command, 
- Then a new dataFrame with all the measurements is generated
(The api documentation could have simple images explaining the measurements....)

The package could provide a kind of ORM that allows me to build a data frame from selecting a study.

In [6]:
import dined.data_1D as data

We get a list of measures to pick, ideally in a dataframe

In [7]:
measures = json.loads(data.issue_request('GET', 'measures/').content)
measures

[{'id': 1,
  'name': 'Reach height, standing',
  'description': '',
  'measure_id': 1,
  'locale_id': 1,
  'created_at': '2020-04-15T12:19:11+00:00',
  'updated_at': '2020-04-15T12:19:11+00:00',
  'measure_group_id': 1,
  'correlation_type': 'height',
  'unit': 'mm',
  'distribution': 'normal',
  'decimals': 0,
  'video_url': None,
  'is_locked': False},
 {'id': 2,
  'name': 'Stature',
  'description': 'The vertical distance from the floor to the top of the head (vertex).',
  'measure_id': 2,
  'locale_id': 1,
  'created_at': '2020-04-15T12:19:11+00:00',
  'updated_at': '2020-04-15T12:19:11+00:00',
  'measure_group_id': 1,
  'correlation_type': 'height',
  'unit': 'mm',
  'distribution': 'normal',
  'decimals': 0,
  'video_url': 'https://www.youtube.com/watch?v=SvSEX6rFHZc',
  'is_locked': False},
 {'id': 3,
  'name': 'Eye height, standing',
  'description': 'Vertical distance between the standing surface and the inner corner of the right eye.',
  'measure_id': 3,
  'locale_id': 1,
  '

In [8]:
ms_df = pd.DataFrame(measures)[['id','name','unit']]
ms_df.head()

Unnamed: 0,id,name,unit
0,1,"Reach height, standing",mm
1,2,Stature,mm
2,3,"Eye height, standing",mm
3,4,Shoulder height,mm
4,5,"Elbow height, standing",mm


In [9]:
def get_by_kw(measures: pd.DataFrame, keyword: str) -> list:
    '''Returns a list of measure names that contain the keyword'''
    names = measures['name']
    # Gets name by keyword
    # First upper, then lower

    results = [name for name in names if keyword in name or keyword.capitalize() in name]
    if results: return results 
    else: print('No results found for this keyword')


In [10]:
## TODO: Implement this function with different keywords
# get_by_kw(measures, '')

In [11]:
# Simple ecept for the keyword
get_by_kw(ms_df, 'xx')

No results found for this keyword


In [12]:
# Works with capitalized keyword and non-capitalized keyword
stature = get_by_kw(ms_df, 'stature')[0]
stature_capitals = get_by_kw(ms_df, 'Stature')[0]
print(stature)
print(stature_capitals)

Stature
Stature


In [13]:
query = get_by_kw(ms_df, 'standing')
query

['Reach height, standing',
 'Eye height, standing',
 'Elbow height, standing',
 'Fist height, standing',
 'Palm height, standing']

In [14]:
elbow_height = get_by_kw(ms_df, 'elbow height')[0]
elbow_height

'Elbow height, standing'

Now we know which dimensions we need to get the actual data

In [15]:
# Get a dataframe of measures and their ids
# API call example data.get_measures()
# !We dont have to return a dataframe, we can just return a list of ids
def pick_measures(measures_pick: list, measures:pd.DataFrame)-> pd.DataFrame:
    '''
    Returns a dataframe of measures with the ids in the list
    '''
    # measures_data = pd.DataFrame(['id','name','unit'])
    measures_data = pd.DataFrame()
    assert isinstance(measures, pd.DataFrame) 
    assert isinstance(measures_pick, list)
    for measure_name in measures_pick:
        if measure_name in measures.values:
            row = measures.loc[measures['name'] == measure_name]
            measures_data = pd.concat([row, measures_data], axis=0)
    return measures_data

In [16]:
ms_df.loc[ms_df['name'] == 'Stature']

Unnamed: 0,id,name,unit
1,2,Stature,mm


In [17]:
picked = pick_measures([stature, elbow_height], ms_df)
picked

Unnamed: 0,id,name,unit
4,5,"Elbow height, standing",mm
1,2,Stature,mm


## Now we have our selection of measures we want to get data from
- We still need to specify the study, this is another function we will define later

In [18]:
# get caesar study
studies = json.loads(data.issue_request('GET', 'studies/').content)

In [19]:
for study in studies:
    print(study['name'], study['id'])

Dutch elderly 1
Dutch students 2
Dutch elderly 3
Dutch adults 4
Dutch children 5
Dutch adults 6
Dutch adults 7
International 8
Dutch Growth 9
Chilean children 10
Dutch students 11
Chilean workers 12
CAESAR (USA) 13
CAESAR (IT) 14
CAESAR (NL) 15
Dutch children, 0.5-7.0 years (NL) 16


## Watch this to properly do the request
https://www.youtube.com/watch?v=fmf_y8zpOgA&t=637s

In [27]:
import requests 
import re

URL = 'https://dined.io.tudelft.nl/en/auth/login?return-to=/en'

session = requests.session()

front = session.get(URL)
csrf_token = re.findall(r'<input type="hidden" name="_token" value="(.*)"', front.text)[0]

cookies = session.cookies

payload = {
    'email': 'j.c.urrallanusa@tudelft.nl',
    'password': 'munari86',
    '_token': csrf_token,
}

r = requests.post(URL + '/auth/login', data=payload, cookies=cookies)

print(r.status_code)

200


In [38]:
token = r.headers['Set-Cookie']
token

'XSRF-TOKEN=eyJpdiI6IkdiQmtGQU5nSUJHUG1kZWNUZlpFelE9PSIsInZhbHVlIjoieVJCb3IySFNPUThqVDRqdnV5WHNPcUJvUVdJTVdTZVdPeGV0TTZYNGNpRW5jck1UMHZBYlVKZFwvd3p4RDN6XC9GbWdpbnNrSzkyNkhxblE1dFNmb1ZIdz09IiwibWFjIjoiNWE2OTlhOTk2YjdkNjg0NjczNWY3NGRjNDdmZWIwZmM4ZTg3MTgzOWNkMmEzZGU5MDA1YjIzZjI2YzhjMGI3NyJ9; expires=Wed, 23-Mar-2022 18:22:09 GMT; Max-Age=7200; path=/, laravel_session=eyJpdiI6Ik1UMlcwVzlKcTIxU0xYeEoyZnZlclE9PSIsInZhbHVlIjoia3NvUEVrMTdqcmxJTTltOE9idGZubm0xOWRDOWlZYXVHVUE3dkVSVjc1RG01Z25cL0VQMlc1czJkaHprTjRIZTEzMWF0SVBvOCt3dWZ2RTlsY0xcL0phQT09IiwibWFjIjoiNzk2MzA4MmMxZjBhZmMxNmE1YmVkNjFiNTUwYzBmODVkZTU5ZTZkY2FkNWM1N2ExZDU4ZWVlNGI2Nzk0Yjk1MiJ9; expires=Wed, 23-Mar-2022 18:22:09 GMT; Max-Age=7200; path=/; httponly'

In [None]:
ceasar = data.issue_request('GET', 'measurements?study=15&measure=5')

In [None]:
## Given that I got the measurements from the Dined website, 
# I want to get the data into a format that can be used for analysis.
def get_oned_data(study_id, measure_id)-> pd.DataFrame:
    '''
    Returns a data frame with a sample of antrhopometric data
    '''
    # issue request to get data
    res = data.issue_request('GET', f'measurements?study={study_id}&measure={measure_id}').content
    res = json.loads(res)
    if res is [] or res is None:
        raise ValueError('No data returned')
    else:
        return res

In [None]:
res = get_oned_data(2,5)
res

[]