# API Functions to import/export/update eLab

Here we will provide some examples of API functionalities with case examples

## Configuration of eLab API and Google Drive API
To install google-api to be able to query the table, see https://medium.com/swlh/google-drive-api-with-python-part-i-set-up-credentials-1f729cb0372b.

Then to use it, you may be interested in that help: https://billydharmawan.medium.com/?p=e8c7b4b79f39

Note that the actions to create the credentials.json file below do not work from jupyter. Just open a python shell and copy-paste them! Once this done, the code below works.



In [1]:
import os
import json
import requests
import csv
import pandas
import numpy
from apiclient import discovery, errors
from httplib2 import Http
from oauth2client import client, file, tools
import os.path

token = format(open("credentials/tokenELAB","r").readline().strip())
url = "https://elab-dev.pasteur.fr/api/v1/"
headers1 = {'Authorization': token, 'Accept': 'application/json','Content-Type':'application/json'}
headers2 = {'Authorization': token, 'Accept': 'application/json'}
params={}


credentials_file_path = './credentials/credentials.json'
clientsecret_file_path = './credentials/client_secret.json'
#print(os.path.isfile(clientsecret_file_path ))
#print(os.path.isfile(credentials_file_path ))
SCOPE = 'https://www.googleapis.com/auth/drive'

store = file.Storage(credentials_file_path)
credentials=store.get()
if not credentials or credentials.invalid:
    flow =  client.flow_from_clientsecrets(clientsecret_file_path, SCOPE)
    print(flow.client_id)
    credentials =  tools.run_flow(flow, store)

http = credentials.authorize(Http())
drive = discovery.build('drive','v3',http=http)
sheets = discovery.build('sheets', 'v4', credentials=credentials)



## Class Definition
Here we define classes for experiments, sample types etc

### Experiments

In [2]:
r = requests.get(url + "experiments", headers = headers2,params = params)
data = r.json()
experiments = {}
for exp in data.get("data"):
    experiments[format(exp.get("name"))] = format(exp.get("experimentID"))


### Projects

In [3]:
r = requests.get(url + "projects", headers = headers2,params = params)
data = r.json()
projects = {}
for pro in data.get("data"):
    projects[format(pro.get("name"))] = format(pro.get("projectID"))

### Storage

In [4]:
r = requests.get(url + "storage", headers = headers2)
data = r.json()
storage = {}
for sto in data.get("data"):
    storage[format(sto.get("name"))] = format(sto.get("storageID"))
    if sto.get("name") == "Freezer n9":
        print(sto)
    

{'storageLayerID': 774661, 'storageType': {'storageTypeID': 2, 'groupID': 0, 'userID': 0, 'name': '-20 Freezer', 'deviceType': 'STORAGE'}, 'deviceType': 'STORAGE', 'deviceTypeID': 2, 'deviceTypeName': '-20 Freezer', 'barcode': '008000000774661', 'status': 'Available', 'storageID': 50002, 'instituteID': 631, 'groupID': 10684, 'userID': 40629, 'storageTypeID': 2, 'name': 'Freezer n9', 'department': 'The GLOBE Institute, Faculty of Health and Medical Sciences, University of Copenhagen', 'address': '', 'building': '', 'floor': '', 'room': '', 'notes': ''}


In [5]:
print(sto)
print(storage)


{'storageLayerID': 776238, 'storageType': {'storageTypeID': 4086, 'groupID': 10684, 'userID': 40629, 'name': 'Lugage', 'deviceType': 'STORAGE'}, 'deviceType': 'STORAGE', 'deviceTypeID': 4086, 'deviceTypeName': 'Lugage', 'barcode': '008000000776238', 'status': 'Available', 'storageID': 50157, 'instituteID': 631, 'groupID': 10684, 'userID': 40629, 'storageTypeID': 4086, 'name': 'With Lumila', 'department': '', 'address': '', 'building': '', 'floor': '', 'room': '', 'notes': ''}
{'Nico office': '49999', 'In Copenhagen': '50000', 'Tom Gilbert Freezer': '50001', 'Freezer n9': '50002', 'Freezer 4': '50027', 'Unknown': '50028', 'Individual': '50056', 'Site': '50057', 'Sequencing': '50099', 'Hannes Freezer': '50122', 'In Tartu': '50135', 'With Lumila': '50157'}


## Obtain classes for each all samples types 

In [6]:
r = requests.get(url + "sampleTypes", headers = headers2)
data = r.json()
#types = []
#for typ in data.get("data"):
#    types.append({format(typ.get("name")):format(typ.get("sampleTypeID"))})
types = {}
for typ in data.get("data"):
    types[format(typ.get("name"))] = format(typ.get("sampleTypeID"))

print(types)

{'Individual': '39466', 'Site': '39468', 'Skeleton Element': '39469', 'Extract': '39470', 'Indexed Library': '39494', 'Library pool': '39495', 'Non Indexed Library': '39556'}


## Obtain the list of samples for each sample type


In [7]:
registered = {}
for it in types.items():
    name = it[0]
    ID = it[1]
    #print(name + " --> " + ID)
    r = requests.get(url + "samples" , headers = headers2, params = {'sampleTypeID': ID})
    data = r.json()
    myList = {}
    for sam in data.get("data"):
        if format(sam.get("name")) in myList.keys():
            print(name + ": " + sam.get("name") + " duplicated")
            break
        myList[format(sam.get("name"))]=format(sam.get("sampleID"))
    registered[name] = myList
    
print(registered["Individual"])

{'AR0003': '9477522', 'AR0004': '9477523', 'AR0005': '9477524', 'AR0006': '9477525', 'AR0007': '9477526', 'AR0008': '9477527', 'AR0009': '9477528', 'AR0010': '9477529', 'AR0011': '9477530', 'AR0012': '9477531', 'AR0013': '9477532', 'AR0014': '9477533', 'AR0015': '9477534', 'AR0016': '9477535', 'AR0017': '9477536', 'AR0018': '9477537', 'AR0019': '9477538', 'AR0020': '9477539', 'AR0021': '9477540', 'AR0022': '9477541', 'AR0023': '9477542', 'AR0024': '9477543', 'AR0025': '9477544', 'AR0026': '9477545', 'AR0027': '9477546', 'AR0028': '9477547', 'AR0029': '9477548', 'AR0030': '9477549', 'AR0031': '9477550', 'AR0032': '9477551', 'AR0033': '9477552', 'AR0034': '9477553', 'AR0035': '9477554', 'AR0037': '9477555', 'AR0038': '9477556', 'AR0039': '9477557', 'AR0040': '9477558', 'AR0041': '9477559', 'AR0042': '9477560', 'AR0043': '9477561', 'AR0044': '9477562', 'AR0045': '9477563', 'AR0046': '9477564', 'AR0047': '9477565', 'AR0048': '9477566', 'AR0049': '9477567', 'AR0050': '9477568', 'AR0051': '9

## Upload samples to eLab

### Define a dictionnary for feature names in eLab and in our tables
One dictionary for each sample type.
Note that "parent sample" is not a pre-set feature so it does not appear.

#### Get the columns corresponding to eLab features

In [8]:
SiteDict={
    "Name":"Site",
    "Description":"None",
    "Note":"None",
    "Amount":"fixed_1",
    #"Unit":"fixed_Unit | pcs",
    "Unit":"fixed_unit",
    "Main geographic region":"Geographic Zone",
    "Country":"Country",
    "Province / Region":"Province / Region",
    "Locality":"Locality",
    "Latitude":"LatChanged",
    "Longitude":"LongChanged",
    "Site type":"Site type",
    "Pictures":"None"
}

IndDict={
    "Name":"RascovanLabID",
    "Description":"None",
    "Note":"None",
    "Amount":"fixed_1",
    #"Unit":"fixed_Unit | pcs",
    "Unit":"fixed_unit",
    "parentSampleID":"Site",
    "Archaeologist ID":"Individual ID",
    "Archaeologist group":"Archaeologists Group",
    "Site Name":"Site",
    "Date":"Date",
    "Datation method":"Datation Method",
    #"Subsistence Strategy": "Subsistence.Strategy",
    "Age":"Age",
    "Gender":"Gender",
    "Pictures":"None",
    "Linked individuals":"None"
}

SkeDict={"Name":"RascovanLabID",
         "From Individual":"RascovanLabID",
         "Description":"Observations",
         "Note":"None",
         "Amount":"fixed_1",
         #"Unit":"fixed_Unit | pcs",
         "Unit":"fixed_unit",
         "parentSampleID":"TobeextractFromRascovanLabID",
         "Archaeologist sample ID":"Sample ID",
         "Pictures Labelling":"PicturePath",
         "Bone type":"Bone Type",
         "Skeleton element":"Skeletal Element",
         "Exportation Permit Number":"Expediente",
         "Observation Labelling":"Observation Pierre / Maria",
         "Observation Drilling":"GeneralSampleComment",
         "Pictures Drilling":"DrillingPictures"
}

ExeDict={"Name":"ExtractID",
         "From Skeleton Element":"RascovanLabID",
         "Description":"None",
         "Note":"None",
         "Amount":"Weight",
         "Unit":"fixed_gram",
         "parentSampleID":"RascovanLabID",
         "Date of drilling":"Date",
         "Pictures":"None",
         "Person in charge":"fixed_Maria Lopopolo",
         "Laboratory where processed":"fixed_Hannes Schroeder",
         "Extract Type":"ExtractType",
         "Conservation":"Observation",
         "Pathology":"Pathologie",
         "Pathology description":"None",
         "Taken for extraction":"TakenForExtraction",
         "Extracted":"Extraction",
         "Extraction Comment":"extractionComment",
         "density UDG treatment (ng/uL)":"densityUDGtreated",
         "Volume UDG treatment (uL)":"volumeUDGtreated",
         "mass UDG in Tube (ng)":"massInTube"
}


### For "macro" sample types (Site, Individual, Skeleton Element)

We start from a tsv file (downloaded from there: https://docs.google.com/spreadsheets/d/1bnu9oZV5fXOaPY_KDvBEBSIIzbydPxyWdLg-A83cJnc/edit#gid=159434896 and then formatted through a Rscript...). (I will figure out how to download it automatically later), and then we register the site one by one. If a site exists in eLab, we change (patch) the values, if not we register it from scratch.

To install google-api to be able to query the table, see https://medium.com/swlh/google-drive-api-with-python-part-i-set-up-credentials-1f729cb0372b.



In [9]:
page_token = None
driveFiles=[]
while True:
    try:
        param = {}
        if page_token:
            param['pageToken'] = page_token
        files = drive.files().list(**param).execute()
        # append the files from the current result page to our list
        driveFiles.extend(files.get('files'))
        # Google Drive API shows our files in multiple pages when the number of files exceed 100
        page_token = files.get('nextPageToken')
        if not page_token:
            break
    except errors.HttpError as error:
        print(f'An error has occurred: {error}')
        break    # output the file metadata to console


# define a function to export sheet to csv
def download_sheet_to_csv(spreadsheet_id, sheet_name):
    result = sheets.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=sheet_name).execute()
    output_file = f'{sheet_name}.tsv'

    with open(output_file, 'w') as f:
        writer = csv.writer(f,delimiter="\t")
        writer.writerows(result.get('values'))

    f.close()

    print(f'Successfully downloaded {sheet_name}.tsv')        
        
        
        
for file in driveFiles:
    if file.get('name') == "Conjuntos_Muestras_aDNA":
        id=file.get('id')
        sheet='WholeDataSet'
        download_sheet_to_csv(id,sheet)
        break

Successfully downloaded WholeDataSet.tsv


Now read the table and format it!

WATCHOUT! There are many things that will need some tuning as we add entries in the google spreadsheet

In [10]:
rawTab=pandas.read_csv("WholeDataSet.tsv",delimiter="\t")
rawTab=rawTab.dropna(subset=['RascovanLabID'])
rawTab.loc[rawTab['Latitude'].isnull(),"Latitude"]=None
rawTab.loc[rawTab['Longitud'].isnull(),"Longitud"]=None
rawTab.loc[rawTab['Latitude'].isin([ "Undefined","desconocido","nan"]),"Latitude"]=None
rawTab.loc[rawTab['Longitud'].isin([ "Undefined","desconocido","nan"]),"Longitud"]=None

Change the coordinates so they are numerical (case by case here...)

In [11]:
###change coordinates
def changeCOORfunction(coord):
    stuf="º"
    stuf2="°"
    if coord is None:
        #return([None,None,None,None])
        return(None)

    coordFed=coord
    #print(coord)
    #####HERE CHANGE ALL WEIRD CHARACTERS THAT CAN COME ON THE WAY
    coord=coord.replace("39° 06 ́","39°06'")
    coord=coord.replace("63° 47 ́","63°47'") 
    ##change weird degrees characters
    coord=coord.replace(stuf2,stuf)
    ##change weird minutes characters
    coord=coord.replace("``","\"")
    coord=coord.replace("´´","\"")
    coord=coord.replace("”","\"")
    coord=coord.replace("''","\"")
    coord=coord.replace("’’","\"")
    coord=coord.replace("“","\"")
    ##change weird seconds characters
    coord=coord.replace(" ´","'")
    coord=coord.replace("`","'")
    coord=coord.replace("´","'")
    coord=coord.replace("’","'")
    ###we now that it is all South and West
    coord=coord.replace(" ","")
    coord=coord.replace("S","")
    coord=coord.replace("O","")
    coord=coord.replace("W","")
    ###change decimal character
    coord=coord.replace(",",".")

    ##read degrees
    if len(coord.split(stuf)) ==2 :
        if coord.split(stuf)[1] == "":
            if stuf not in coord:
                coord=coord + stuf + "0'"
            else:
                coord=coord+"0'"
    elif len(coord.split(stuf)) == 1 :
        if stuf not in coord:
            coord=coord + stuf + "0'"
        else:
            coord=coord+"0'"
    else:
        print("splitting minute/second " + coordFed + "-->" + coord)
            
    deg=coord.split(stuf)[0]
    ###read minutes and seconds
    tmp=coord.split(stuf)[1]
    minute=tmp.split("'")[0]
    if len(tmp.split("'")) != 2:
        print( coord.split(stuf))
        print("splitting minute/second " + coordFed + "-->" + coord)
        raise()
    else:
        if tmp.split("'")[1] == "":
            sec=0
        else:
            sec=tmp.split("'")[1]
            sec=sec.replace("\"","")

    #print([coordFed,coord,deg,minute,sec])
    ####verify all read ok!
    if numpy.isnan(float(deg)):
        print("pb numerical degree" + coordFed + "-->" +coord + " (" + deg + ")")
        raise()
    if numpy.isnan(float(minute)):
        print(minute)
        print("pb numerical minute" + coordFed + "-->" +coord + " (" + minute + ")")
        raise()
    if numpy.isnan(float(sec)):
        print(sec)
        print("pb numerical sec" + coordFed + "-->" + coord + " (" + sec + ")")
        raise()
  
    deg=float(deg)
    minute=float(minute)
    sec=float(sec)
    new=deg+minute/60+sec/3600
    #return([-new,deg,minute,sec])
    return(-new)



#print(rawTab.loc[rawTab['Latitude']==""])
rawTab['LatChanged']=rawTab['Latitude'].apply(changeCOORfunction)
rawTab['LongChanged']=rawTab['Longitud'].apply(changeCOORfunction)

print(rawTab[["LatChanged","LongChanged"]])

     LatChanged  LongChanged
0    -31.255803   -60.456464
1    -31.255803   -60.456464
2    -31.255803   -60.456464
3    -31.255803   -60.456464
4    -31.255803   -60.456464
..          ...          ...
867  -35.280000   -69.520000
868  -35.150000   -69.650000
869  -35.150000   -69.650000
870  -35.150000   -69.650000
871  -33.710000   -68.980000

[871 rows x 2 columns]



Make Bone Type variable according to Skeletal Element

In [12]:
import itertools

l1=list(range(1,9)) * 8
l2=list(itertools.chain.from_iterable(itertools.repeat(x, 8) for x in list(range(1,9))))
l3=[]
for i in list(range(0,len(l1))):
    l3.append(str(l1[i])+"."+str(l2[i]))

    
toothElements=["tooth",
               "diente",
               "molar",
               "mx",
               "42 inf. 1° derecho",
               " m ",
               " md",
               "incisive",
               "incisivo",
               "incisor",
               "canino",
               "canine",
               l3,
               "1.1 o 2.1",
               "1.5 o 2.5"]

calculusElements=["calculus"]
petrousElements=["petrous",
                 "petroso",
                 "petrozo"]

undefinedElements=["??"]
otherElements=["rib",
               "fibula",
               "matoideo",
               "metatarsal",
               "humer",
               "humerus",
               "lumbar",
               "craneo",
               "vertebra",
               "tibia",
               "radio",
               "phalanx",
               "phalange",
               "longbone",
               "femur",
               "metacarpo",
               "metatarsus",
               "tarseano",
               "hueso",
               "femur",
              "tarso"]

noHumanElements=["valva"]    
    
    
rawTab["Bone Type"]=None
for index, ele in rawTab["Skeletal Element"].items():
    lowerEle=ele.lower()
    if bool([ttt for ttt in toothElements if(lowerEle in ttt or str(ttt) in lowerEle)]):
        rawTab.at[index,"Bone Type"]="Tooth"        
    elif bool([ttt for ttt in calculusElements if(lowerEle in ttt or str(ttt) in lowerEle)]):
        rawTab.at[index,"Bone Type"]="Dental Calculus"
    elif bool([ttt for ttt in petrousElements if(lowerEle in ttt or str(ttt) in lowerEle)]):
        rawTab.at[index,"Bone Type"]="Petrous"
    elif bool([ttt for ttt in otherElements if(lowerEle in ttt or str(ttt) in lowerEle)]):
        rawTab.at[index,"Bone Type"]="Other Bone"
    elif bool([ttt for ttt in undefinedElements if(lowerEle in ttt or str(ttt) in lowerEle)]):
        rawTab.at[index,"Bone Type"]="Undefined"
    elif bool([ttt for ttt in noHumanElements if(lowerEle in ttt or str(ttt) in lowerEle)]):
        rawTab.at[index,"Bone Type"]="non Human"
    else:
        print(ele+" issue...")
        
        break
    

###check it is fine...
for i in list(set(rawTab["Bone Type"])):
    print(i)
    print(list(set(rawTab.loc[rawTab["Bone Type"]==i]["Skeletal Element"])))

if any(rawTab["Bone Type"].isnull()):
    print("HHAAAAAAA")
    print(rawTab.loc[rawTab["Bone Type"].isnull()]["Skeletal Element"])
    print(rawTab.loc[rawTab["Bone Type"].isnull()])
                              

Dental Calculus
['Dental Calculus']
Tooth
['Tooth (lower left second premolar)', 'Tooth (lower right first premolar)', 'Tooth (upper left first incisor)', 'Diente', 'M2i Md', 'Incisor', 'Tooth (lower right second incisor)', '1o premolar inferior derecho', 'I1dMx', 'Ci Mx', 'Tooth (second upper left premolar)', '2 molar superior izquierdo', 'first left superior molar', '3er molar inf izq', 'Tooth (second upper left incisor)', '3o molar superior izquierdo', '2° molar inferior izquierdo', 'I2d Md', 'PM1d Md', 'canino superior derecho', '2o molar inferior izquierdo', 'Premolar', 'Tooth', 'PM1d Mx', 'M2d Md', '2do molar inf der', 'I2 Md', 'PM2i Mx', 'Ci Md', 'Tooth (second upper right premolar, in pieces)', 'I1d Mx', '1 molar superior derecho', 'Tooth (upper left canine)', 'Tooth fragment', 'Tooth (lower left first premolar)', 'M3i Mx', 'Tooth (upper left second molar )', '3 molar inferior izquierdo', '3o molar superior derecho', 'molar', 'inferior molar', '1 molar inferior izquierdo', 'I1i

In [13]:
###back up
table=rawTab


#### Prepare json for uploading and updating Sites 

Get features for Site 

In [14]:
r = requests.get(url + "sampleTypes/" + types["Site"] + "/meta", headers = headers2)
data = r.json()
FeateLabSites = {}
for feat in ['Name','Description','Note','Amount','Unit']:
    FeateLabSites[feat] = {"ID": "notMeta"}
for feat in data.get("data"):
    FeateLabSites[format(feat.get("key"))] = { "ID":format(feat.get("sampleTypeMetaID")),
                                              "TYPE":format(feat.get("sampleDataType"))}
#print(FeateLabSites)

And check that they have been declared SiteDict

In [15]:
for feat in FeateLabSites.keys():
    if feat not in SiteDict.keys():
        print(feat + "--> NOT IN DICTIONARY")
        
for feat in SiteDict.keys():
    if feat not in FeateLabSites.keys():
        print(feat + "--> NOT IN eLAB")

Now, we make a table with unique entries for the relevant Columns for Sites

In [16]:
tableSite=pandas.DataFrame()
for col in list(SiteDict.keys()):
#        if SiteDict[col] == "None":
        if SiteDict[col] == "None" or SiteDict[col].startswith("fixed"):
            continue
#        elif SiteDict[col].startswith("fixed"):
#            tableSite[col]=SiteDict[col].split("_")[1]
        else:
            tableSite[col]=table[ SiteDict[col]]
tableSite=tableSite.drop_duplicates()
print(tableSite)

                           Name   Main geographic region    Country  \
0            Isla Barranquita I  Paraná medio / Santa Fe  Argentina   
6            Isla Cementerio R3  Paraná medio / Santa Fe  Argentina   
8              Puesto Rolancito  Paraná medio / Santa Fe  Argentina   
16                          NaN             Buenos Aires  Argentina   
18     Tamberías de Bella Vista                 San Juan  Argentina   
..                          ...                      ...        ...   
836                Finca Flores         Southern Mendoza  Argentina   
857  Camping Familiar Cristiano         Southern Mendoza  Argentina   
861                   La Cabeza         Southern Mendoza  Argentina   
863        El Perdido-El Mallín         Southern Mendoza  Argentina   
867              Puesto El Alto         Southern Mendoza  Argentina   

    Province / Region       Locality   Latitude  Longitude Site type  
0            Santa Fe  Arroyo Aguiar -31.255803 -60.456464       NaN  
6    

Now, remove entries for which no Site is reported

In [17]:
print(table[ table[SiteDict['Name']].isnull()][[IndDict['Name'],IndDict['Archaeologist ID']]])
tableSite=tableSite.drop(tableSite[tableSite['Name'].isnull()].index)

    RascovanLabID  Individual ID
16       AR0015.1            435
17       AR0015.2            435
135      AR0120.1           5616
156      AR0141.1           7766
157      AR0142.1  claromecó-S/N
159      AR0144.1           Alum


Now, we check if there no Site name duplicates in that table (dropping Latitude and longitude because sometimes we have the exact location for each sample, in that case we will make a rough average location of the site).

In [18]:
#print(tableSite.drop(['Latitude','Longitude'],axis=1))
duplicatedSites=tableSite.drop(['Latitude','Longitude'],axis=1)
duplicatedSites=duplicatedSites.drop_duplicates()
duplicatedSites=duplicatedSites[duplicatedSites['Name'].duplicated(keep=False)]
if len(duplicatedSites.index) > 0 :
    print("DUPLICATED SITES... GO BACK TO THE TABLE AND FIX THOSE")
    print(duplicatedSites.sort_values('Name'))

Now, we average Latitude and Longitude for each Site and check no further duplicated Site appears

In [19]:
for si in list(tableSite['Name']):
    if(len(tableSite.loc[tableSite['Name']==si,].index))>1:
        for coor in ['Latitude','Longitude']:
            tableSite.loc[tableSite['Name']==si,[coor]]=tableSite.loc[tableSite['Name']==si][coor].mean()
            
tableSite=tableSite.drop_duplicates()
duplicatedSites=tableSite[tableSite['Name'].duplicated(keep=False)]
if len(duplicatedSites.index) > 0 :
    print("DUPLICATED SITES... GO BACK TO THE TABLE AND FIX THOSE")
    duplicatedSites.sort_values('Name')

Now match the number of digits handled by eLab

In [20]:
tableSite['Latitude']=tableSite['Latitude'].round(12)
tableSite['Longitude']=tableSite['Longitude'].round(12)



We get all the possible values for checkboxes and dropdown features of Sites and check our Site table is fine.
When an entry is null in google spreadsheet, we change it to a NA string

In [21]:
r = requests.get(url + "sampleTypes/" + types["Site"] + "/meta", headers = headers2)
data = r.json()
for feat in data.get("data"):
    if feat.get("sampleDataType") == "CHECKBOX" or feat.get("sampleDataType") == "COMBO":
        OptionELAB=feat.get("optionValues")
        key=feat.get("key")
        tableSite.loc[tableSite[key].isnull(),key] = 'NA'
        for tabVal in tableSite[key].unique():
            if tabVal not in OptionELAB:
                print(tabVal + " not mapped in eLab for " + key)



Now, we make the json for each Site and we upload or update in eLab!

In [22]:
###iterate over Sites
for index,name in tableSite['Name'].items():     
    Data={}
    for fea in FeateLabSites.keys():
        if FeateLabSites[fea]['ID'] == "notMeta":
            ###fixed value (from dico)
            if SiteDict[fea].startswith("fixed"):
                    element=SiteDict[fea].split("_")[1]
            elif SiteDict[fea]=="None":
                    element="Nothing entered"
            else:
                element=tableSite[fea][index]
            Data[fea]=element
    ###case of updating
    if name in registered['Site'].keys():
        #print(name + " updating")
        patch=True
        id=registered['Site'][name]     

        Data["Note"]="Updated from API"
        DR=requests.patch(url + "samples/"+id, headers = headers2,data = Data)
    else:
        ###case of uploading
        #print(name + " uploading")
        patch=False
        Data["Note"]="Uploaded from API"
        Data["sampleTypeID"]=types["Site"]
        Data["Name"]=name
        DR=requests.post(url + "samples/", headers = headers2,data = Data)             
    ####check the Data loading was correct
    if DR.status_code not in [200,204]:
        print("error for " + name)
        print(DR.status_code)
        print(DR.raise_for_status())
    ###actualize the registered["Site"] list (checking we did not duplicated anything here)
    r=requests.get(url + "samples/forNames?names="+name.replace(" ","%20"), headers = headers2)
    data=r.json()
    

    sam=data.get("data")
    if len(sam)!=1:
        print("different Site entries (" + str(len(sam)) + ") for name "+name)
        break
    else:
        sam=sam[0]
        id=str(sam.get("sampleID"))
        #print("Data OK for "+ name + " (" + id + ")")
        registered["Site"][name]=id

    print("data already loaded")
    print(data)
    ###patch the metaData
    ###get loaded values
    if patch:
        #print("patching meta so need to heck if differences")
        MDR=requests.get(url + "samples/"+id+"/meta", headers = headers2)
        if MDR.status_code!=200:
            print("error querrying meta for " + name)
            break
        data=MDR.json().get("data")
        metaLoaded={}
        for i in data:
            metaLoaded[i["key"]]=str(i["value"])
    print("metadata already loaded")
    print(metaLoaded)
    for fea in FeateLabSites.keys():
        needToPatch=False
        ###get new element to be loaded
        if FeateLabSites[fea]['ID'] != "notMeta" and FeateLabSites[fea]['TYPE'] != "FILE":
            ###fixed value (from dico)
            if SiteDict[fea].startswith("fixed"):
                element=SiteDict[fea].split("_")[1]
            elif SiteDict[fea]=="None":
                element="Nothing entered"
            else:
                element=tableSite[fea][index]
            
            ###check if this is a new entry or not
            if patch:
                ###check if new element is similar to what already loaded
                if metaLoaded[fea] != str(element):
                    print("difference for " + name + "(feature: " + fea + ") " + format(element) + " vs loaded : " + format(metaLoaded[fea]))
                    prompt="?"
                    #prompt="y"
                    while prompt not in ["y","n"]:
                        prompt = input("replace y/n??")
                    if prompt == "y":
                        needToPatch=True
            else:
                needToPatch=True

            ###if difference ==> we load
            if needToPatch:
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabSites[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabSites[fea]['TYPE']}
                print(MetaData)
                MDR=requests.put(url + "samples/"+id+"/meta", headers = headers2,data = MetaData)
                ####check the MetaData loading was correct
                if MDR.status_code not in [200,204]:
                    print("error for " + name + " for feature " + fea)
                    print(MDR.status_code)
                    print(MDR.raise_for_status())
                    break
    #print("metadata OK for "+ name + " (" + id + ")")
    
    Quant={}
    for fea in ['Amount','Unit']:
        if SiteDict[fea].startswith("fixed"):
            element=SiteDict[fea].split("_")[1]
        elif SiteDict[fea]=="None":
            element="Nothing entered"
        else:
            element=tableSite[fea][index]
        Quant[fea]=element
    Quant["displayUnit"]=Quant["Unit"].capitalize()
    Quant["fullAmount"]=Quant["Amount"]
    QR=requests.put(url + "samples/" + id + "/quantity", headers = headers2, data = Quant)
    if QR.status_code not in [200,204]:
        print("error for " + name + " for quantity")
        print(QR.status_code)
        print(QR.raise_for_status())
    #else:
        #print("quantity OK for "+ name + " (" + id + ")")
    break


data already loaded
{'recordCount': 1, 'currentPage': 0, 'maxRecords': 1000, 'totalRecords': 1, 'data': [{'owner': 'Pierre LUISI', 'archived': False, 'sampleID': 9482459, 'created': '2021-06-03T13:03:42Z', 'userID': 40629, 'creatorID': 40629, 'storageLayerID': 774999, 'position': 0, 'barcode': '005000009482459', 'sampleType': {'sampleTypeID': 39468, 'userID': 40629, 'groupID': 10684, 'name': 'Site', 'backgroundColor': '900', 'foregroundColor': 'FFF'}, 'sampleTypeID': 39468, 'checkedOut': False, 'parentSampleID': 0, 'name': 'Isla Barranquita I', 'description': 'Nothing entered', 'note': 'Updated from API'}]}
metadata already loaded
{'Pictures': '', 'Main geographic region': 'Paraná medio / Santa Fe', 'Country': 'Argentina', 'Province / Region': 'Santa Fe', 'Locality': 'Arroyo Aguiar', 'Latitude': '-31.255802777778', 'Longitude': '-60.456463888889', 'Site type': 'nan'}


In [23]:
def BadRequest(myReq,code=200):
    return(myReq.status_code !=code)


id=registered["Individual"]["AR0001"]
getReq=requests.get(url + "samples/"+id, headers = headers2)
if BadRequest(getReq,200):
        print("error")
        print(getReq.status_code)
        print(getReq.raise_for_status())


print(getReq.json())


{'owner': 'Pierre LUISI', 'archived': False, 'sampleID': 9507924, 'created': '2021-07-21T12:11:14Z', 'userID': 40629, 'creatorID': 40629, 'storageLayerID': 774998, 'position': 0, 'barcode': '005000009507924', 'sampleType': {'sampleTypeID': 39466, 'userID': 40629, 'groupID': 10684, 'name': 'Individual', 'backgroundColor': 'F00', 'foregroundColor': 'FFF'}, 'sampleTypeID': 39466, 'checkedOut': False, 'parentSampleID': 9482459, 'name': 'AR0001', 'description': 'Nothing entered', 'note': 'Updated from API'}


In [24]:

Data={'description': getReq.json()["description"]+"\n TEST HOHEHEINBON"}
patchReq=requests.patch(url + "samples/"+id, headers = headers2,data=Data)
if BadRequest(patchReq,204):
        print("error")
        print(patchReq.status_code)
        print(patchReq.raise_for_status())

#### Prepare json for uploading and updating Individuals
Get features for Individual 

In [25]:
r = requests.get(url + "sampleTypes/" + types["Individual"] + "/meta", headers = headers2)
data = r.json()
FeateLabInds = {}
for feat in ['Name','Description','Note','Amount','Unit',"parentSampleID"]:
    FeateLabInds[feat] = {"ID": "notMeta"}
for feat in data.get("data"):
    FeateLabInds[format(feat.get("key"))] = { "ID":format(feat.get("sampleTypeMetaID")),
                                              "TYPE":format(feat.get("sampleDataType"))}


And check that they have been declared IndDict

In [26]:
for feat in FeateLabInds.keys():
    if feat not in IndDict.keys():
        print(feat + "--> NOT IN DICTIONARY")
        
for feat in IndDict.keys():
    if feat not in FeateLabInds.keys():
        print(feat + "--> NOT IN eLAB")

Now we make a table of unique entries for relevant columns for individuals

In [27]:
tableInd=pandas.DataFrame()
for col in list(IndDict.keys()):
#        if SiteDict[col] == "None":
        if IndDict[col] == "None" or IndDict[col].startswith("fixed"):
            continue
        else:
            tableInd[col]=table[ IndDict[col]]
#print(tableInd)
duplicatedInd=tableInd[tableInd['Name'].duplicated(keep=False)]
if len(duplicatedInd.index) > 0 :
    print("DUPLICATED Inds... GO BACK TO THE TABLE AND FIX THOSE")
    print(duplicatedInd.sort_values('Name'))
    
tableInd['Name']=tableInd['Name'].str.split(".",expand=True)[0]
tableInd=tableInd.drop_duplicates()

print(tableInd)


       Name      parentSampleID            Archaeologist ID  \
0    AR0001  Isla Barranquita I                         252   
1    AR0002  Isla Barranquita I                         253   
2    AR0003  Isla Barranquita I                         255   
3    AR0004  Isla Barranquita I                         256   
4    AR0005  Isla Barranquita I                         154   
..      ...                 ...                         ...   
866  AR0692    Villa 25 de Mayo                       V25-1   
867  AR0693      Puesto El Alto              Puesto El Alto   
868  AR0694         Ojo de Agua                       OA1-9   
870  AR0695         Ojo de Agua                     OA1-4-3   
871  AR0696          Capiz Alto  Capiz Alto 1 (prospección)   

                       Archaeologist group           Site Name  \
0                         Mariano del Papa  Isla Barranquita I   
1                         Mariano del Papa  Isla Barranquita I   
2                         Mariano del Papa  I

Change gender To Male / Female / NA

In [28]:
Male={"M",
     "male",
     "Male",
     "Male (Det.)",
     "Masculino",
     "masculino",
     "Male (Estimated)",
     "Male (Est.)"}
Female={"F",
        "female",
        "Female",
        "Female (Det.)",
        "femenino",
        "Femenino",
        "Female (Estimated)",
        "Female (Est.)"}
NA={"I","F?","M?","Unknown","ND","IN","-","Female?","NA","Undefined","indet","Indeterminado","?","No determinado"}

tableInd.loc[tableInd["Gender"].isnull(),"Gender"]='NA'
for index, ele in tableInd["Gender"].items():
    if ele in Male:
        tableInd.at[index,"Gender"]="Male"
    elif ele in Female:
        tableInd.at[index,"Gender"]="Female"
    elif ele in NA:
        tableInd.at[index,"Gender"]="NA"
    else:
        print(ele + " not defined")


We get all the possible values for checkboxes and dropdown features of Individuals and check our Individual table is fine. When an entry is null in google spreadsheet, we change it to a NA string.


In [29]:
r = requests.get(url + "sampleTypes/" + types["Individual"] + "/meta", headers = headers2)
data = r.json()
for feat in data.get("data"):
    if feat.get("sampleDataType") == "CHECKBOX" or feat.get("sampleDataType") == "COMBO":
        OptionELAB=feat.get("optionValues")
        key=feat.get("key")
        tableInd.loc[tableInd[key].isnull(),key]='NA'
        for tabVal in tableInd[key].unique():
            if tabVal not in OptionELAB:
                print("--" + tabVal + "-- not mapped in eLab for " + key)



Check if duplicated entries (meaning that some fields are inconsistent across different lines for same individual)

In [30]:
duplicatedInd=tableInd.drop_duplicates()
duplicatedInd=duplicatedInd[duplicatedInd['Name'].duplicated(keep=False)]
if len(duplicatedInd.index) > 0 :
    print("DUPLICATED Individuals... GO BACK TO THE TABLE AND FIX THOSE")
    print(duplicatedInd.sort_values('Name'))

Now, we make the json for each Individual and we upload or update in eLab!

In [32]:
###iterate over Individuals
for index,name in tableInd['Name'].items():
    ####load the Data!
    Data={}
    for fea in FeateLabInds.keys():
        if FeateLabInds[fea]['ID'] == "notMeta":
            ###fixed value (from dico)
            if IndDict[fea].startswith("fixed"):
                element=IndDict[fea].split("_")[1]
            elif IndDict[fea]=="None":
                element="Nothing entered"
            elif fea == "parentSampleID":
                #element=tableInd[fea][index]+"|"+registered[IndDict[fea]][tableInd[fea][index]]
                if format(tableInd[fea][index])=="nan":
                    element=0
                else:
                    element=registered[IndDict[fea]][tableInd[fea][index]]
            else:
                element=tableInd[fea][index]
            Data[fea]=element
    ###case of updating
    if name in registered['Individual'].keys():
        #print(name + "updating")
        patch=True
        id=registered['Individual'][name]
        Data["Note"]="Updated from API"
        ###QUERY CAMPO
        ##SI CAMPO ELAB <> CAMPO TABLA:
        ##        proimpt: update???
            
        DR=requests.patch(url + "samples/"+id, headers = headers2,data = Data)
    else:
        ###case of uploading
        #print(name + "uploading")
        patch=False
        Data["Note"]="Uploaded from API"
        Data["sampleTypeID"]=types["Individual"]
        Data["Name"]=name
        DR=requests.post(url + "samples/", headers = headers2,data = Data)             
    ####check the Data loading was correct
    if DR.status_code not in [200,204]:
        print("error for " + name)
        print(DR.status_code)
        print(DR.raise_for_status())
    ###actualize the registered["Site"] list (checking we did not duplicated anything here)
    r=requests.get(url + "samples/forNames?names="+name, headers = headers2)
    data=r.json()
    sam=data.get("data")
    if len(sam)!=1:
        print("different Individual entries (" + str(len(sam)) + ") for name "+name)
        break
    else:
        sam=sam[0]
        id=str(sam.get("sampleID"))
        #print("Data OK for "+ name + " (" + id + ")")
        registered["Individual"][name]=id

    ###patch the metaData
    if patch:
        #print("patching meta so need to heck if differences for "+name)
        MDR=requests.get(url + "samples/"+id+"/meta", headers = headers2)
        if MDR.status_code!=200:
            print("error querrying meta for " + name)
            break
        data=MDR.json().get("data")
        metaLoaded={}
        for i in data:
            metaLoaded[i["key"]]=str(i["value"])

    for fea in FeateLabInds.keys():
        needToPatch=False
        ###get new element to be loaded
        if FeateLabInds[fea]['ID'] != "notMeta" and FeateLabInds[fea]['TYPE'] != "FILE":
            ###fixed value (from dico)
            if IndDict[fea].startswith("fixed"):
                element=IndDict[fea].split("_")[1]
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabInds[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabInds[fea]['TYPE']}
            elif IndDict[fea]=="None":
                element="Nothing entered"
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabInds[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabInds[fea]['TYPE']}
            elif FeateLabInds[fea]['TYPE'] == "SAMPLELINK" and format(tableInd[fea][index])!="nan":
                samples=[]
                splitted=tableInd[fea][index].split(",")
                splitted=list(dict.fromkeys(splitted))
                for sisi in splitted:
                    IDsisi=registered[IndDict[fea]][sisi]
                    samples.append({"sampleID": IDsisi,"name": sisi})
                    if sisi != splitted[0]:
                        element=element+","+sisi+"|"+IDsisi
                    else:
                        element=sisi+"|"+IDsisi
                MetaData={
                    "sampleTypeMetaID": int(FeateLabInds[fea]['ID']),
                    "sampleDataType": FeateLabInds[fea]['TYPE'],
                    "samples": samples,
                    "key": fea,
                    "value": element
                }
            else:
                element=tableInd[fea][index]
                if format(element)=="nan" :
                    element="NA"
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabInds[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabInds[fea]['TYPE']}
            
            ###check if this is a new entry or not
            if patch:
                ###check if new element is similar to what already loaded
                if metaLoaded[fea] != str(element):
                    print("difference for " + name + "(feature: " + fea + ") " + element + " vs loaded : " + metaLoaded[fea])
                    #prompt="y"
                    prompt="?"
                    while prompt not in ["y","n"]:
                        prompt = input("replace y/n??")
                    if prompt == "y":
                        needToPatch=True
            else:
                needToPatch=True
    
            if needToPatch:
                #print(MetaData)      
                MDR=requests.put(url + "samples/"+id+"/meta", headers = headers2,data = MetaData)
                ####check the MetaData loading was correct
                if MDR.status_code not in [200,204]:
                    print("error for " + name + " for feature " + fea)
                    print(MDR.status_code)
                    print(MDR.raise_for_status())
                    break
    #print("metadata OK for "+ name + " (" + id + ")")
    ###patch the quantity
    Quant={}
    for fea in ['Amount','Unit']:
        if IndDict[fea].startswith("fixed"):
            element=IndDict[fea].split("_")[1]
        elif IndDict[fea]=="None":
            element="Nothing entered"
        else:
            element=tableInd[fea][index]
        Quant[fea]=element
    Quant["displayUnit"]=Quant["Unit"].capitalize()
    Quant["fullAmount"]=Quant["Amount"]
    QR=requests.put(url + "samples/" + id + "/quantity", headers = headers2, data = Quant)
    if QR.status_code not in [200,204]:
        print("error for " + name + " for quantity")
        print(QR.status_code)
        print(QR.raise_for_status())
    #else:
    #    print("quantity OK for "+ name + " (" + id + ")")

print("finished") 

difference for AR0001(feature: Date) 1200 vs loaded : 1200 BP
replace y/n??n
finished


#### Prepare json for uploading and updating Skeleton elements
Get features for Skeleton elements

In [33]:
r = requests.get(url + "sampleTypes/" + types["Skeleton Element"] + "/meta", headers = headers2)
data = r.json()
FeateLabSkel = {}
for feat in ['Name','Description','Note','Amount','Unit',"parentSampleID"]:
    FeateLabSkel[feat] = {"ID": "notMeta"}
for feat in data.get("data"):
    FeateLabSkel[format(feat.get("key"))] = { "ID":format(feat.get("sampleTypeMetaID")),
                                              "TYPE":format(feat.get("sampleDataType"))}

And check that they have been declared SkeDict

In [34]:
for feat in FeateLabSkel.keys():
    if feat not in SkeDict.keys():
        print(feat + "--> NOT IN DICTIONARY")
        
for feat in SkeDict.keys():
    if feat not in FeateLabSkel.keys():
        print(feat + "--> NOT IN eLAB")

We get all the possible values for checkboxes and dropdown features of Skeleton Elements and check our Skeleton Elements table is fine. When an entry is null in google spreadsheet, we change it to a NA string.



In [35]:
r = requests.get(url + "sampleTypes/" + types["Skeleton Element"] + "/meta", headers = headers2)
data = r.json()
for feat in data.get("data"):
    if feat.get("sampleDataType") == "CHECKBOX" or feat.get("sampleDataType") == "COMBO":
        OptionELAB=feat.get("optionValues")
        key=feat.get("key")
        #table.loc[tableInd[key].isnull(),key]='NA'
        for tabVal in table[SkeDict[key]].unique():
            if tabVal not in OptionELAB:
                print("--" + tabVal + "-- not mapped in eLab for " + key)




Download extract file from metapaleo

In [36]:
import os
import paramiko 

user = open("credentials/sftpUser","r").readline().strip().split("\t")
psw=user[1]
user=user[0]

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

ssh.connect('sftpcampus.pasteur.fr', username=user, password=psw)
sftp = ssh.open_sftp()
localpath = './DDBB_extracts.csv'
remotepath = '/pasteur/entites/metapaleo/Research/ERC-project/Samples/LabellingExtracts/DDBB_extracts.csv'
sftp.get(remotepath,localpath)
sftp.close()
ssh.close()

extractTable=pandas.read_csv("DDBB_extracts.csv",delimiter=";")
extractTable["RascovanLabID"]=None
for index,name in extractTable["ExtractID"].items():
    extractTable.loc[index,"RascovanLabID"]=".".join(name.split(".")[0:2])



and compile it to the table for "Pictures" and "General Sample Comment"

In [37]:
skelTablefromEx=pandas.DataFrame()
for key in ["RascovanLabID","Pictures","GeneralSampleComment"]:
    skelTablefromEx[key]=extractTable[key]

    
skelTablefromEx=skelTablefromEx.drop_duplicates()
###duplicated (UNEXPECTED!)
dupSkefromEx=skelTablefromEx.loc[skelTablefromEx["RascovanLabID"].duplicated(keep=False)]
if len(dupSkefromEx):
    print("DUPLICATED rascoIDs in extract file")
    print(dupSkefromEx)

    
table["DrillingPictures"]=None
table["GeneralSampleComment"]=None

for index,rascoID in table["RascovanLabID"].items():
    #print(str(index)+" "+rascoID)
    pic="NA"
    com="NA"
    if rascoID in list(skelTablefromEx["RascovanLabID"]):
        if list(skelTablefromEx.loc[skelTablefromEx["RascovanLabID"]==rascoID,"Pictures"])[0] == "T":
            pic="/pasteur/entites/metapaleo/Research/ERC-project/Samples/pictures/Drilling/"+rascoID
        com=list(skelTablefromEx.loc[skelTablefromEx["RascovanLabID"]==rascoID,"GeneralSampleComment"])[0]
    #print(rascoID+" "+com+" "+pic)
    table.loc[index,"DrillingPictures"]=pic
    table.loc[index,"GeneralSampleComment"]=com

        

Check if duplicated entries (meaning that some fields are inconsistent across different lines for same skeleton element)
Check if no duplicated skeleton element nor archaeologist ID for skel element

In [38]:
duplicatedAR=table[table[SkeDict['Name']].duplicated(keep=False)]
if len(duplicatedAR.index):
    print("DUPLICATED rascoIDs")
    print(duplicatedAR)

duplicatedSam=table.loc[table[SkeDict['Archaeologist sample ID']].duplicated(keep=False)]
if len(duplicatedSam.index):
    print("DUPLICATED archeologist ID")
    print(duplicatedSam[[SkeDict[x] for x in ["Name","Archaeologist sample ID"]]])


Now, we make the json for each Skeleton element and we upload or update in eLab!

In [39]:
table.loc[table[SkeDict['Name']]=="AR0036.1"]

Unnamed: 0,PicturePath,1st Batch,Second Teeth batch,Expediente,Geographic Zone,Archaeologists Group,Sample ID,Individual ID,RascovanLabID,Skeletal Element,...,Pathologies (observed or suspected),Isotopes done,Isotopes needed,Observations,Observation Pierre / Maria,LatChanged,LongChanged,Bone Type,DrillingPictures,GeneralSampleComment
47,/pasteur/entites/metapaleo/Research/ERC-projec...,,,Tucuman_3aTanda,Piedemonte central de Tucumán,Gabriel Eduardo Miguez,Sitio Anta YACU 2-I1-1,Sitio Anta YACU 2-I1,AR0036.1,Tooth fragment,...,Sin identificar debido al mal estado de preser...,NO,Es de ALTA IMPORTANCIA (***) realizar análisis...,Individuo identificado en vasija-urna nº 2 de ...,sent 7 fragments. received very very fragmente...,-26.777472,-65.319472,Tooth,,


In [40]:
###iterate over Skeleton Elements
for index,name in table[SkeDict['Name']].items():
    ####load the Data!
    Data={}
    for fea in FeateLabSkel.keys():
        if FeateLabSkel[fea]['ID'] == "notMeta":
            ###fixed value (from dico)
            if SkeDict[fea].startswith("fixed"):
                element=SkeDict[fea].split("_")[1]
            elif SkeDict[fea]=="None":
                element="Nothing entered"
            elif fea == "parentSampleID":
                element=name.split(".")[0]
                if element not in registered["Individual"]:
                    print("can't not set "+element+" as parent sample")
                    break
                element=registered["Individual"][element]
            else:
                element=table[SkeDict[fea]][index]
            Data[fea]=element
    ###case of updating
    if name in registered['Skeleton Element'].keys():
        #print(name + "updating")
        patch=True
        id=registered['Skeleton Element'][name]
        Data["Note"]="Updated from API"
        ###QUERY CAMPO
        ##SI CAMPO ELAB <> CAMPO TABLA:
        ##        proimpt: update???
            
        DR=requests.patch(url + "samples/"+id, headers = headers2,data = Data)
    else:
        ###case of uploading
        #print(name + "uploading")
        patch=False
        Data["Note"]="Uploaded from API"
        Data["sampleTypeID"]=types["Skeleton Element"]
        Data["Name"]=name
        DR=requests.post(url + "samples/", headers = headers2,data = Data)             
    ####check the Data loading was correct
    if DR.status_code not in [200,204]:
        print("error for " + name)
        print(DR.status_code)
        print(DR.raise_for_status())
    ###actualize the registered["Site"] list (checking we did not duplicated anything here)
    r=requests.get(url + "samples/forNames?names="+name, headers = headers2)
    data=r.json()
    sam=data.get("data")
    if len(sam)!=1:
        print("different Skeleton Element entries (" + str(len(sam)) + ") for name "+name)
        break
    else:
        sam=sam[0]
        id=str(sam.get("sampleID"))
        #print("Data OK for "+ name + " (" + id + ")")
        registered["Skeleton Element"][name]=id

    ###patch the metaData
    if patch:
        #print("patching meta so need to heck if differences for "+name)
        MDR=requests.get(url + "samples/"+id+"/meta", headers = headers2)
        if MDR.status_code!=200:
            print("error querrying meta for " + name)
            break
        data=MDR.json().get("data")
        metaLoaded={}
        for i in data:
            metaLoaded[i["key"]]=str(i["value"])

    for fea in FeateLabSkel.keys():
        needToPatch=False
        ###get new element to be loaded
        if FeateLabSkel[fea]['ID'] != "notMeta" and FeateLabSkel[fea]['TYPE'] != "FILE":
            ###fixed value (from dico)
            if SkeDict[fea].startswith("fixed"):
                element=SkeDict[fea].split("_")[1]
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabSkel[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabSkel[fea]['TYPE']}
            elif SkeDict[fea]=="None":
                element="Nothing entered"
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabSkel[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabSkel[fea]['TYPE']}
            elif fea == "From Individual":
                sisi=name.split(".")[0]
                IDsisi=registered["Individual"][sisi]
                element=sisi+"|"+IDsisi
                MetaData={
                   "sampleTypeMetaID": int(FeateLabSkel[fea]['ID']),
                   "sampleDataType": FeateLabSkel[fea]['TYPE'],
                   "samples": sisi,
                    "key": fea,
                    "value": element
                }
            else:
                element=table[SkeDict[fea]][index]
                if format(element)=="nan" :
                    element="NA"
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabSkel[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabSkel[fea]['TYPE']}
            
            ###check if this is a new entry or not
            if patch:
                ###check if new element is similar to what already loaded
                if fea not in metaLoaded.keys(): 
                    needToPatch=True
                elif metaLoaded[fea] != str(element):
                    print("difference for " + name + "(feature: " + fea + ") " + element + " vs loaded : " + metaLoaded[fea])
                    #prompt="y"
                    if fea == "Pictures Labelling" or fea == "Exportation Permit Number":
                        prompt="y"
                    else:
                        prompt="?"
                    while prompt not in ["y","n"]:
                        prompt = input("replace y/n??")
                    if prompt == "y":
                        needToPatch=True
            else:
                needToPatch=True
    
            if needToPatch:
                #print(MetaData)      
                MDR=requests.put(url + "samples/"+id+"/meta", headers = headers2,data = MetaData)
                ####check the MetaData loading was correct
                if MDR.status_code not in [200,204]:
                    print("error for " + name + " for feature " + fea)
                    print(MDR.status_code)
                    print(MDR.raise_for_status())
                    break
    #print("metadata OK for "+ name + " (" + id + ")")
    ###patch the quantity
    Quant={}
    for fea in ['Amount','Unit']:
        if SkeDict[fea].startswith("fixed"):
            element=SkeDict[fea].split("_")[1]
        elif SkeDict[fea]=="None":
            element="Nothing entered"
        else:
            element=table[SkeDict[fea]][index]
        Quant[fea]=element
    Quant["displayUnit"]=Quant["Unit"].capitalize()
    Quant["fullAmount"]=Quant["Amount"]
    QR=requests.put(url + "samples/" + id + "/quantity", headers = headers2, data = Quant)
    if QR.status_code not in [200,204]:
        print("error for " + name + " for quantity")
        print(QR.status_code)
        print(QR.raise_for_status())
    #else:
    #    print("quantity OK for "+ name + " (" + id + ")")
    
    #if index > 9:
    #    print("break after 10")
    #    break 
print("finished") 

difference for AR0044.1(feature: Observation Drilling) no calculus, bad preservation with brown colour of the root and full of cracks and dirt vs loaded : NA
replace y/n??y
difference for AR0047.1(feature: Observation Drilling) no calculus observed, small incisive with longitudinal cracks and brown colour, bad preservation vs loaded : NA
replace y/n??y
difference for AR0048.1(feature: Observation Drilling) may have calculus, full of black consolidations on the root, badly preserved appears soft and chipped, root apex broke with slight pressure during decontamination vs loaded : NA
replace y/n??y
difference for AR0048.1(feature: Pictures Drilling) /pasteur/entites/metapaleo/Research/ERC-project/Samples/pictures/Drilling/AR0048.1 vs loaded : NA
replace y/n??y
difference for AR0051.1(feature: Observation Drilling) some calculus, longitudinal cracks on the crown, yellow colour, not great preservation vs loaded : NA
replace y/n??y
difference for AR0051.1(feature: Pictures Drilling) /pasteur

### For Extracts
We start from DDBB_extracts.csv file

In [41]:
r = requests.get(url + "sampleTypes/" + types["Extract"] + "/meta", headers = headers2)
data = r.json()
FeateLabExe = {}
for feat in ['Name','Description','Note','Amount','Unit',"parentSampleID"]:
    FeateLabExe[feat] = {"ID": "notMeta"}
for feat in data.get("data"):
    FeateLabExe[format(feat.get("key"))] = { "ID":format(feat.get("sampleTypeMetaID")),
                                              "TYPE":format(feat.get("sampleDataType"))}

And check that they have been declared ExeDict.




In [42]:
for feat in FeateLabExe.keys():
    if feat not in ExeDict.keys():
        print(feat + "--> NOT IN DICTIONARY")
        
for feat in ExeDict.keys():
    if feat not in FeateLabExe.keys():
        print(feat + "--> NOT IN eLAB")

We get all the possible values for checkboxes and dropdown features of Extracts and check our extractTable table is fine. 

In [43]:
r = requests.get(url + "sampleTypes/" + types["Extract"] + "/meta", headers = headers2)
data = r.json()
for feat in data.get("data"):
    if feat.get("sampleDataType") == "CHECKBOX" or feat.get("sampleDataType") == "COMBO":
        OptionELAB=feat.get("optionValues")
        key=feat.get("key")
        if ExeDict[key].startswith("fixed"):
            tabVal=ExeDict[key].split("_")[1]
            if tabVal not in OptionELAB:
                print("--" + tabVal + "-- not mapped in eLab for " + key)
        else:
            extractTable.loc[extractTable[ExeDict[key]].isnull(),ExeDict[key]]="NA"
            for tabVal in extractTable[ExeDict[key]].unique():
                if tabVal not in OptionELAB:
                    print("--" + tabVal + "-- not mapped in eLab for " + key)


Now, we make the json for each extract and we upload or update in eLab!

In [44]:
###iterate over extracts
for index,name in extractTable[ExeDict['Name']].items():
    #print(str(index)+" "+name)
    ####load the Data!
    Data={}
    for fea in FeateLabExe.keys():
        if FeateLabExe[fea]['ID'] == "notMeta":
            ###fixed value (from dico)
            if ExeDict[fea].startswith("fixed"):
                element=ExeDict[fea].split("_")[1]
            elif ExeDict[fea]=="None":
                element="Nothing entered"
            elif fea == "parentSampleID":
                if not name.startswith("Blank"):
                    element=registered["Skeleton Element"][extractTable["RascovanLabID"][index]]
                else:
                    element=None
            else:
                element=extractTable[ExeDict[fea]][index]
            Data[fea]=element
    ###case of updating
    if name in registered['Extract'].keys():
        #print(name + "updating")
        patch=True
        id=registered['Extract'][name]
        Data["Note"]="Updated from API"
        ###QUERY CAMPO
        ##SI CAMPO ELAB <> CAMPO TABLA:
        ##        proimpt: update???
            
        DR=requests.patch(url + "samples/"+id, headers = headers2,data = Data)
    else:
        ###case of uploading
        #print(name + "uploading")
        patch=False
        Data["Note"]="Uploaded from API"
        Data["sampleTypeID"]=types["Extract"]
        Data["Name"]=name
        DR=requests.post(url + "samples/", headers = headers2,data = Data)             
    ####check the Data loading was correct
    if DR.status_code not in [200,204]:
        print("error for " + name)
        print(DR.status_code)
        print(DR.raise_for_status())
    ###actualize the registered["Site"] list (checking we did not duplicated anything here)
    r=requests.get(url + "samples/forNames?names="+name, headers = headers2)
    data=r.json()
    sam=data.get("data")
    if len(sam)!=1:
        print("different Extract entries (" + str(len(sam)) + ") for name "+name)
        break
    else:
        sam=sam[0]
        id=str(sam.get("sampleID"))
        #print("Data OK for "+ name + " (" + id + ")")
        registered["Extract"][name]=id

    ###patch the metaData
    if patch:
        #print("patching meta so need to heck if differences for "+name)
        MDR=requests.get(url + "samples/"+id+"/meta", headers = headers2)
        if MDR.status_code!=200:
            print("error querrying meta for " + name)
            break
        data=MDR.json().get("data")
        metaLoaded={}
        for i in data:
            metaLoaded[i["key"]]=str(i["value"])

    for fea in FeateLabExe.keys():
        needToPatch=False
        MDR=requests.get(url + "samples/"+id+"/meta", headers = headers2)
        ###get new element to be loaded
        if FeateLabExe[fea]['ID'] != "notMeta" and FeateLabExe[fea]['TYPE'] != "FILE":
            ###fixed value (from dico)
            if ExeDict[fea].startswith("fixed"):
                element=ExeDict[fea].split("_")[1]
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabExe[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabExe[fea]['TYPE']}
            elif ExeDict[fea]=="None":
                element="Nothing entered"
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabExe[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabExe[fea]['TYPE']}
            elif fea == "From Skeleton Element":
                if not name.startswith("Blank"):
                    sisi=extractTable["RascovanLabID"][index]
                    IDsisi=registered["Skeleton Element"][sisi]
                    element=sisi+"|"+IDsisi
                    samples={"sampleID": IDsisi,"name": sisi}
                else:
                    samples=[]
                    splitted=extractTable["extractionComment"][index].split(",")
                    splitted=list(dict.fromkeys(splitted))
                    for sisi in splitted:
                        IDsisi=registered["Extract"][sisi]
                        samples.append({"sampleID": IDsisi,"name": sisi})
                        if sisi != splitted[0]:
                            element=element+"|"+sisi+"|"+IDsisi
                        else:
                            element=sisi+"|"+IDsisi
                MetaData={
                    "sampleTypeMetaID": int(FeateLabExe[fea]['ID']),
                    "sampleDataType": FeateLabExe[fea]['TYPE'],
                    "samples": samples,
                    "key": fea,
                    "value": element
                }
            else:
                element=extractTable[ExeDict[fea]][index]
                if format(element)=="nan" or format(element)=="" or format(element)==" ":
                    element="Nothing entered"
                MetaData={"key": fea,
                          "sampleTypeMetaID": int(FeateLabExe[fea]['ID']),
                          "value": element,
                          "sampleDataType": FeateLabExe[fea]['TYPE']}
            
            ###check if this is a new entry or not
            if patch:
                ###check if new element is similar to what already loaded
                if fea not in metaLoaded.keys(): 
                    needToPatch=True
                elif metaLoaded[fea] != str(element):
                    print("difference for " + name + "(feature: " + fea + ") " + element + " vs loaded : " + metaLoaded[fea])
                    #prompt="y"
                    prompt="?"
                    while prompt not in ["y","n"]:
                        prompt = input("replace y/n??")
                    if prompt == "y":
                        needToPatch=True
            else:
                needToPatch=True
    
            if needToPatch:
                #print(MetaData)      
                MDR=requests.put(url + "samples/"+id+"/meta", headers = headers2,data = MetaData)
                ####check the MetaData loading was correct
                if MDR.status_code not in [200,204]:
                    print("error for " + name + " for feature " + fea)
                    print(MDR.status_code)
                    print(MDR.raise_for_status())
                    break
    #print("metadata OK for "+ name + " (" + id + ")")
    ###patch the quantity
    Quant={}
    Note=None
    for fea in ['Amount','Unit']:
        if ExeDict[fea].startswith("fixed"):
            element=ExeDict[fea].split("_")[1]
        elif ExeDict[fea]=="None":
            element="Nothing entered"
        else:
            element=extractTable[ExeDict[fea]][index]
            if format(element)=="nan":
                element=0
            elif "<" in element:
                Note="actual weight reported: "+element
                element=0
        Quant[fea]=element
    Quant["displayUnit"]=Quant["Unit"].capitalize()
    Quant["fullAmount"]=Quant["Amount"]
    QR=requests.put(url + "samples/" + id + "/quantity", headers = headers2, data = Quant)
    if QR.status_code not in [200,204]:
        print("error for " + name + " for quantity")
        print(QR.status_code)
        print(QR.raise_for_status())
    ###put actual weight in note when there is a "<"
    if Note is not None:
            r=requests.get(url + "samples/"+id, headers = headers2)
            if r.status_code not in [200,204]:
                print("error for " + name + " for quantity 2")
            Data=r.json()
            Data["note"]=Data["note"]+" / "+ Note
            r=requests.patch(url + "samples/"+id, headers = headers2,data = Data)
            if r.status_code not in [200,204]:
                print("error for " + name + " for quantity 3")
print("finished") 


difference for AR0248.2.02(feature: density UDG treatment (ng/uL)) Nothing entered vs loaded : 0.258
replace y/n??n
difference for AR0248.2.02(feature: Volume UDG treatment (uL)) Nothing entered vs loaded : 22
replace y/n??n
difference for AR0248.2.02(feature: mass UDG in Tube (ng)) Nothing entered vs loaded : 5.676
replace y/n??n
difference for AR0248.2.02(feature: Extracted) NA vs loaded : yes
replace y/n??n
finished


### For Non indexed librairies
We have not done any external tables. I just actualize the parent sample link!

In [45]:
r = requests.get(url + "sampleTypes/" + types["Non Indexed Library"] + "/meta", headers = headers2)
data = r.json()
FeateLabNiLib = {}
for feat in ['Name','Description','Note','Amount','Unit',"parentSampleID"]:
    FeateLabNiLib[feat] = {"ID": "notMeta"}
for feat in data.get("data"):
    FeateLabNiLib[format(feat.get("key"))] = { "ID":format(feat.get("sampleTypeMetaID")),
                                              "TYPE":format(feat.get("sampleDataType"))}
print(FeateLabNiLib)

{'Name': {'ID': 'notMeta'}, 'Description': {'ID': 'notMeta'}, 'Note': {'ID': 'notMeta'}, 'Amount': {'ID': 'notMeta'}, 'Unit': {'ID': 'notMeta'}, 'parentSampleID': {'ID': 'notMeta'}, 'Person in charge': {'ID': '245169', 'TYPE': 'CHECKBOX'}, 'Date of preparation': {'ID': '245170', 'TYPE': 'DATE'}, 'Labroatory where prepared': {'ID': '245171', 'TYPE': 'CHECKBOX'}, 'UDG treatment type': {'ID': '245172', 'TYPE': 'COMBO'}, 'SSB and Adapters dilution type': {'ID': '245173', 'TYPE': 'COMBO'}, 'Temporary UDG treated extract location': {'ID': '245174', 'TYPE': 'TEXT'}, 'Initial Volume (uL)': {'ID': '245175', 'TYPE': 'NUMERIC'}, 'Remaining Volume (uL)': {'ID': '245176', 'TYPE': 'NUMERIC'}, 'From Extract': {'ID': '245177', 'TYPE': 'SAMPLELINK'}}


In [46]:

###GET extract Name from library name
def getExtract(libName):
    ret=libName[0]
    for char in libName[1:(len(libName)-2)]:
        ret=ret+char
    return(ret)


for niLib in registered["Non Indexed Library"].keys():
    #print(niLib)
    id=registered["Non Indexed Library"][niLib]
    #retrieve data
    DR=requests.get(url + "samples/"+id, headers = headers2)
    if DR.status_code not in [200]:
        print("error retrieving " + niLib)
        print(DR.status_code)
        print(DR.raise_for_status())
        break
    Data=DR.json()
    extract=None
    idExtract=None
    if niLib.startswith("BL"):
        MDR=requests.get(url + "samples/"+id+"/meta/", headers = headers2)
        ####check the MetaData loading was correct
        if MDR.status_code not in [200,204]:
            print("error retrieving from extract for " + niLib)
            print(MDR.status_code)
            print(MDR.raise_for_status())
            break
        data=MDR.json().get("data")
        for dd in data:
            if dd["key"] == "From Extract":
                extract=dd["value"].split("|")[0]
    else:
        extract=getExtract(niLib)
    if extract is None:
        print("extract not retrieve")
        break
    idExtract=registered["Extract"][extract]
    
    ###parent sample:
    Data["parentSampleID"]=idExtract
    DR=requests.patch(url + "samples/"+id, headers = headers2,data=Data)
    if DR.status_code not in [200,204]:
        print("error patching" + niLib)
        print(DR.status_code)
        print(DR.raise_for_status())
        break
        
    ###From extract
    element=extract+"|"+idExtract
    samples={"sampleID": idExtract,"name": extract}
    MetaData={
        "sampleTypeMetaID": int(FeateLabNiLib["From Extract"]['ID']),
        "sampleDataType": FeateLabNiLib["From Extract"]['TYPE'],
        "samples": samples,
        "key": "From Extract",
        "value": element
    }
    MDR=requests.put(url + "samples/"+id+"/meta", headers = headers2,data = MetaData)
    ####check the MetaData loading was correct
    if MDR.status_code not in [200,204]:
        print("error retrieving meta for " + niLib)
        print(MDR.status_code)
        print(MDR.raise_for_status())
        break
print("finished")

finished


### Indexed library
Same: we just actualize the links to Non Indexed libraries

In [47]:
r = requests.get(url + "sampleTypes/" + types["Indexed Library"] + "/meta", headers = headers2)
data = r.json()
FeateLabILib = {}
for feat in ['Name','Description','Note','Amount','Unit',"parentSampleID"]:
    FeateLabILib[feat] = {"ID": "notMeta"}
for feat in data.get("data"):
    FeateLabILib[format(feat.get("key"))] = { "ID":format(feat.get("sampleTypeMetaID")),
                                              "TYPE":format(feat.get("sampleDataType"))}
print(FeateLabILib)

{'Name': {'ID': 'notMeta'}, 'Description': {'ID': 'notMeta'}, 'Note': {'ID': 'notMeta'}, 'Amount': {'ID': 'notMeta'}, 'Unit': {'ID': 'notMeta'}, 'parentSampleID': {'ID': 'notMeta'}, 'Person in charge': {'ID': '244383', 'TYPE': 'CHECKBOX'}, 'From Non Indexed Library': {'ID': '244384', 'TYPE': 'SAMPLELINK'}, 'Laboratory where processed': {'ID': '244385', 'TYPE': 'CHECKBOX'}, 'Date of preparation': {'ID': '245107', 'TYPE': 'DATE'}, 'qPCR': {'ID': '245114', 'TYPE': 'COMBO'}, 'number of Ct': {'ID': '245115', 'TYPE': 'NUMERIC'}, 'Date of qPCR': {'ID': '245116', 'TYPE': 'DATE'}, 'qPCR Comment ': {'ID': '245117', 'TYPE': 'TEXTAREA'}, 'Dual Unique Index ID': {'ID': '245118', 'TYPE': 'TEXT'}, 'Indexing PCR (# cycles)': {'ID': '245119', 'TYPE': 'NUMERIC'}, 'Density QC-1 nmol/L': {'ID': '245121', 'TYPE': 'NUMERIC'}, 'Number of QC performed': {'ID': '245122', 'TYPE': 'COMBO'}, 'Density QC-1 ng/uL': {'ID': '245123', 'TYPE': 'NUMERIC'}, 'QC-1 Elution Volume of Library (uL)': {'ID': '245124', 'TYPE': 

In [48]:
for ILib in registered["Indexed Library"].keys():
    #print(ILib)
    id=registered["Indexed Library"][ILib]
    #retrieve data
    DR=requests.get(url + "samples/"+id, headers = headers2)
    if DR.status_code not in [200]:
        print("error retrieving " + niLib)
        print(DR.status_code)
        print(DR.raise_for_status())
        break
    Data=DR.json()
    niLib=None
    idniLib=None
    MDR=requests.get(url + "samples/"+id+"/meta/", headers = headers2)
    ####check the MetaData loading was correct
    if MDR.status_code not in [200,204]:
        print("error retrieving from extract for " + ILib)
        print(MDR.status_code)
        print(MDR.raise_for_status())
        break
    data=MDR.json().get("data")
    for dd in data:
        if dd["key"] == "From Non Indexed Library":
            niLib=dd["value"].split("|")[0]
    if niLib is None:
        print("Non indexed Library not retrieve")
        break
    idniLib=registered["Non Indexed Library"][niLib]
    
    ###parent sample:
    Data["parentSampleID"]=idniLib
    DR=requests.patch(url + "samples/"+id, headers = headers2,data=Data)
    if DR.status_code not in [200,204]:
        print("error patching" + ILib)
        print(DR.status_code)
        print(DR.raise_for_status())
        break
        
    ###From extract
    element=niLib+"|"+idniLib
    samples={"sampleID": idniLib,"name": niLib}
    MetaData={
        "sampleTypeMetaID": int(FeateLabILib["From Non Indexed Library"]['ID']),
        "sampleDataType": FeateLabILib["From Non Indexed Library"]['TYPE'],
        "samples": samples,
        "key": "From Non Indexed Library",
        "value": element
    }
    MDR=requests.put(url + "samples/"+id+"/meta", headers = headers2,data = MetaData)
    ####check the MetaData loading was correct
    if MDR.status_code not in [200,204]:
        print("error retrieving meta for " + ILib)
        print(MDR.status_code)
        print(MDR.raise_for_status())
        break
print("finished")

finished


## Sample assignation to Experiments

first retrieve the eLab ID needed to access the sampleIN and sampleOUT sections.

In [83]:
r = requests.get(url + "experiments", headers = headers2,params = params)
data = r.json()
experiments = {}
for exp in data.get("data"):
    experiments[format(exp.get("name"))] = format(exp.get("experimentID"))



for expe in list(experiments.keys()):
    #print(expe)
    idExpe=experiments[expe]
    r=requests.get("https://elab-dev.pasteur.fr/api/v1/experiments/"+idExpe+"/sections",headers=headers1)
    if r.status_code != 200:
        print(r.status_code)
        print(r.raise_for_status())
    if r.json().get("recordCount") == 0:
        print("no record")
        continue
    SampleIN={}
    SampleOUT={}
    for data in r.json().get("data"):
        if data["sectionType"] == "SAMPLESIN":
            SampleIN[data["sectionHeader"]]=data["expJournalID"]
        elif data["sectionType"] == "SAMPLESOUT":
            SampleOUT[data["sectionHeader"]]=data["expJournalID"]
    experiments[expe]={"ID":idExpe,
                      "sampleIN":SampleIN,
                      "sampleOUT":SampleOUT}
#print(experiments)

### Assign to Labelling sampleIN the individuals and to Labelling sampleOUT the skeleton elements

In [64]:
sampleOUT=experiments["Labelling process"]["sampleOUT"]["Labelled Skeleton elements "]
sampleIN=experiments["Labelling process"]["sampleIN"]["Individuals labelled"]

listOUT=[]
listIN=[]
for inName in registered["Skeleton Element"].keys():
    inID=registered["Skeleton Element"][inName]
    listOUT.append(inID)
    ###get the parent individual
    r=requests.get(url+"/samples/"+inID+"/parent",headers=headers1)
    if r.status_code !=200:
        print(r.status_code)
        r.raise_for_status()
        break
    rjson=r.json()
    outName=rjson.get("name")
    listIN.append(rjson.get("sampleID"))

In [65]:
listOUT=format(listOUT)
#print(listOUT)
r=requests.put(url+"/experiments/sections/"+format(sampleOUT)+"/samples",headers=headers1,data = listOUT)
if r.status_code !=204:
    print(r.status_code)
    r.raise_for_status()

In [66]:
listIN=format(listIN)
r=requests.put(url+"/experiments/sections/"+format(sampleIN)+"/samples",headers=headers1,data = listIN)
if r.status_code !=204:
    print(r.status_code)
    r.raise_for_status()

### Assign to Drilling Rasovan Laboratory Protocols
"pulverized pieces" (sampleIN) and  the skeleton elements they derive from (sampleOUT), all extracts that appear in "DDBB_extract.csv"


Starts with retrieving all field IDs required for that

In [67]:
#the sampleIN and sampleOUT id for the experiment
CorresExtract={"petrous":"Pulverized petrous bone",
                  "dental calculus":"Scratched Dental Calculus",
                  "pulp":"Pulverized Pulp",
                  "root":"Pulverized Root",
                   "root apex":"Pulverized Root Apex",
                  "long bone":"Pulverized long bone",
                    "other":"Pulverized other bone",
              }
CorresSkel={"Petrous":"Petrous bone processed",
            "Tooth":"Tooth processed",
            "Other Bone":"Long bone processed "}



Now we get extract and skeleton element ID and we assign them to different experiment (according to some in-house conditions:
 - name starting with NR means they were processed by Nico at Schroeder lab
 - expediente is "Guareib / Pulverized" means we already received the pulverized pieces
 - the skeleton element is Dental calculus means Mariano del Papa sent us the scratched dental calculus
 - else it it our own protocoles

In [68]:
listOUT={}
listIN={}
for lab in ["Guraeib","Del Papa","Schroeder","Rascovan"]:
    listOUT[lab]={}
    listIN[lab]={}
    for exType in CorresExtract:
        listOUT[lab][CorresExtract[exType]]=[]

    for skelType in CorresSkel:
        listIN[lab][CorresSkel[skelType]]=[]

for index, extract in extractTable["ExtractID"].items():
    if extract.startswith("Blank"):
        continue
    ###prepare sampleOUT for that extract
    idOUT=registered["Extract"][extract]
    #get meta 
    MER=requests.get(url+"/samples/"+idOUT+"/meta",headers=headers1)
    if MER.status_code !=200:
        print(MER.raise_for_status())
        break
    #get Extract Type and check it is found
    exType=None
    for meta in MER.json().get("data"):
        if meta["key"]=="Extract Type":
            exType=meta["value"]
            break
    if exType is None:
        print("Extract Type not found")
        break
    ###prepare sampleIN for that extract
    #get parentSampleID (the skeleton element)
    ER=requests.get(url+"/samples/"+idOUT,headers=headers1)
    if ER.status_code !=200:
        print(ER.raise_for_status())
        break
    idIN=format(ER.json()["parentSampleID"])

    #get meta
    SMR=requests.get(url+"/samples/"+idIN+"/meta",headers=headers1)
    if SMR.status_code !=200:
        print(SMR.raise_for_status())
        break
    ##get skeleton element type and check it is found
    archoID=None
    skelType=None
    expediente=None
    for meta in SMR.json().get("data"):
        if meta["key"]=="Bone type":
            skelType=meta["value"]
        elif meta["key"]=="Exportation Permit Number":
            expediente=meta["value"]
        elif meta["key"]=="Archaeologist sample ID":
            archoID=meta["value"]
    if skelType is None:
        print("Skeleton Ele Type not found")
        print(SMR.json().get("data"))
        break
    if expediente is None:
        print("Expediente not found")
        print(SMR.json().get("data"))
        break
    if archoID is None:
        print("archeo ID not found")
        print(SMR.json().get("data"))
        break

    if skelType == "Dental Calculus":
        #print("del Papa")
        listOUT["Del Papa"][CorresExtract[exType]].append(idOUT)
    elif expediente=="Solana Guraeib / Pulverized":
        #print("Guareib")
        listOUT["Guraeib"][CorresExtract[exType]].append(idOUT)
        listIN["Guraeib"][CorresSkel[skelType]].append(idIN)
    elif archoID.startswith("NR"):
        #print("Schroeder")
        listOUT["Schroeder"][CorresExtract[exType]].append(idOUT)
        listIN["Schroeder"][CorresSkel[skelType]].append(idIN)
    else:
        #print("Rascovan")
        listOUT["Rascovan"][CorresExtract[exType]].append(idOUT)
        listIN["Rascovan"][CorresSkel[skelType]].append(idIN)
        

print(listIN)
print(listOUT)



{'Guraeib': {'Petrous bone processed': [], 'Tooth processed': ['9519957', '9519957', '9519958', '9519959', '9519960', '9519961', '9519962', '9519962', '9519963', '9519964', '9519964'], 'Long bone processed ': ['9519955', '9519955', '9519956', '9519956']}, 'Del Papa': {'Petrous bone processed': [], 'Tooth processed': [], 'Long bone processed ': []}, 'Schroeder': {'Petrous bone processed': ['9520367', '9520372', '9520375', '9520377', '9520379', '9520380', '9520381', '9520382', '9520383', '9520384', '9520385', '9520386', '9520387', '9520388', '9520389', '9520390', '9520391'], 'Tooth processed': ['9520368', '9520369', '9520370', '9520371'], 'Long bone processed ': []}, 'Rascovan': {'Petrous bone processed': ['9519919', '9519919', '9519921', '9519921', '9519921', '9519923', '9519923', '9519924', '9519924', '9519925', '9519925', '9519928', '9519928', '9520182', '9520182', '9520243', '9520243'], 'Tooth processed': ['9519935', '9519935', '9519935', '9519935', '9519941', '9519941', '9519941', '

In [79]:
###upload sample IN
for lab in ["Guraeib","Del Papa","Schroeder","Rascovan"]:
    for type in listIN[lab].keys():
        data=listIN[lab][type]
        if len(data)==0:
            print("sample IN : nothing to upload upload for "+type+" to "+lab)
        else:
            idIN=format(experiments["Drilling. "+lab+" Laboratory Protocols"]["sampleIN"][type])
            print("sample IN : upload for "+type+" to "+lab)
            data=format(data)
            r=requests.put(url+"/experiments/sections/"+idIN+"/samples",headers=headers1,data = data)
            if r.status_code !=204:
                print(r.status_code)
                r.raise_for_status()

sample IN : nothing to upload upload for Petrous bone processed to Guraeib
sample IN : upload for Tooth processed to Guraeib
sample IN : upload for Long bone processed  to Guraeib
sample IN : nothing to upload upload for Petrous bone processed to Del Papa
sample IN : nothing to upload upload for Tooth processed to Del Papa
sample IN : nothing to upload upload for Long bone processed  to Del Papa
sample IN : upload for Petrous bone processed to Schroeder
sample IN : upload for Tooth processed to Schroeder
sample IN : nothing to upload upload for Long bone processed  to Schroeder
sample IN : upload for Petrous bone processed to Rascovan
sample IN : upload for Tooth processed to Rascovan
sample IN : nothing to upload upload for Long bone processed  to Rascovan


In [84]:
###upload sample OUT
for lab in ["Guraeib","Del Papa","Schroeder","Rascovan"]:
    for type in listOUT[lab].keys():
        data=listOUT[lab][type]
        if len(data)==0:
            print("sample OUT : nothing to upload upload for "+type+" to "+lab)
        else:
            idOUT=format(experiments["Drilling. "+lab+" Laboratory Protocols"]["sampleOUT"][type])
            print("sample OUT : upload for "+type+" to "+lab)
            data=format(data)
            r=requests.put(url+"/experiments/sections/"+idOUT+"/samples",headers=headers1,data = data)
            if r.status_code !=204:
                print(r.status_code)
                r.raise_for_status()

sample OUT : nothing to upload upload for Pulverized petrous bone to Guraeib
sample OUT : nothing to upload upload for Scratched Dental Calculus to Guraeib
sample OUT : nothing to upload upload for Pulverized Pulp to Guraeib
sample OUT : upload for Pulverized Root to Guraeib
sample OUT : nothing to upload upload for Pulverized Root Apex to Guraeib
sample OUT : upload for Pulverized long bone to Guraeib
sample OUT : nothing to upload upload for Pulverized other bone to Guraeib
sample OUT : nothing to upload upload for Pulverized petrous bone to Del Papa
sample OUT : upload for Scratched Dental Calculus to Del Papa
sample OUT : nothing to upload upload for Pulverized Pulp to Del Papa
sample OUT : nothing to upload upload for Pulverized Root to Del Papa
sample OUT : nothing to upload upload for Pulverized Root Apex to Del Papa
sample OUT : nothing to upload upload for Pulverized long bone to Del Papa
sample OUT : nothing to upload upload for Pulverized other bone to Del Papa
sample OUT : 

### Experiment: Extraction. Rascovan Laboratory Protocols
Now we add in "as sampleIN and sampleOUT the "pulverized bone" for which there is an non-indexed library


In [85]:
listIN=[]
for lib,libID in registered["Non Indexed Library"].items():
    r=requests.get(url+"/samples/"+libID,headers=headers1)
    if r.status_code !=200:
        print(r.status_code)
        print(r.raise_for_status())
        break
    extID=r.json().get("parentSampleID")
    r=requests.get(url+"/samples/"+format(extID),headers=headers1)
    if r.status_code !=200:
        print(r.status_code)
        print(r.raise_for_status())
        break
    ext=r.json().get("name")
    if ext.startswith("Blank"):
        date="".join(ext.split(".")[1].split("-")[::-1])
        if lib !="BL"+date+"00":
            print("HU? "+ext+" for "+lib)
            break
    elif ext+"00" != lib:
        print("HU? "+ext+" for "+lib)
        print(ext)
        break
    listIN.append(extID)

    

In [86]:
data=format(listIN)
###assign to sampleIN
idExp={"c":str(value) for key, value in experiments["Extraction. Rascovan Lab Protocols"]["sampleIN"].items()}["c"]
print(idExp)
r=requests.put(url+"/experiments/sections/"+idExp+"/samples",headers=headers1,data = data)
print(r)
###assign to sampleOUT
idExp={"c":str(value) for key, value in experiments["Extraction. Rascovan Lab Protocols"]["sampleOUT"].items()}["c"]
print(idExp)
r=requests.put(url+"/experiments/sections/"+idExp+"/samples",headers=headers1,data = data)
print(r)

5423444
<Response [204]>
5423445
<Response [204]>


### Experiment: Library Prep. Rascovan Lab protocols
Now we add in "as sampleIN "pulverized bone" for which there is an "indexed library" and as sampleOUT that indexed library and the corresponding non indexed library




In [87]:
extIN=[]
nonIndLibOUT=[]
indLibOUT=[]
for indLib,indLibID in registered["Indexed Library"].items():
    
    ###retrieve the non Index Librar info
    r=requests.get(url+"/samples/"+indLibID,headers=headers1)
    if r.status_code !=200:
        print(r.status_code)
        print(r.raise_for_status())
        break
    nonIndLibID=r.json().get("parentSampleID")
    r=requests.get(url+"/samples/"+format(nonIndLibID),headers=headers1)
    if r.status_code !=200:
        print(r.status_code)
        print(r.raise_for_status())
        break
    nonIndLib=r.json().get("name")
    ###check inIndLib ifinished with 0
    if nonIndLib[-1] !="0":
        print("hu "+nonIndLib)
        break
    ###check nonIndLib and IndLib corresponds
    if ''.join(nonIndLib[0:(len(nonIndLib)-2)]) != ''.join(indLib[0:(len(indLib)-2)]):
        print("HU? "+indLib+" "+nonIndLib)
        break
        
        
    ###retrieve the extract 
    extID=r.json().get("parentSampleID")
    r=requests.get(url+"/samples/"+format(extID),headers=headers1)
    if r.status_code !=200:
        print(r.status_code)
        print(r.raise_for_status())
        break
    ext=r.json().get("name")
    ##check extract corresponds to non indexed library
    if ext.startswith("Blank"):
        date="".join(ext.split(".")[1].split("-")[::-1])
        if nonIndLib !="BL"+date+"00":
            print("HU? "+ext+" for "+nonIndLib)
            break
    elif ext+"00" != nonIndLib:
        print("HU? "+ext+" for "+nonIndLib)
        print(ext)
        break
    
    extIN.append(extID)
    nonIndLibOUT.append(nonIndLibID)
    indLibOUT.append(indLibID)

print(len(extIN))
print(len(nonIndLibOUT))
print(len(indLibOUT))


72
72
72


In [88]:
###assign to sampleIN extracts
idExp={"c":str(value) for key, value in experiments["Library Prep. Rascovan Lab protocols"]["sampleIN"].items()}["c"]
print(idExp)
data=format(extIN)
r=requests.put(url+"/experiments/sections/"+idExp+"/samples",headers=headers1,data = data)
print(r)

###assign to sampleOUT non indexed Library
idExp=experiments["Library Prep. Rascovan Lab protocols"]["sampleOUT"]["UDG-treated extracts"]
print(idExp)
data=format(nonIndLibOUT)
r=requests.put(url+"/experiments/sections/"+format(idExp)+"/samples",headers=headers1,data = data)
print(r)


###assign to sampleOUT indexed Library
idExp=experiments["Library Prep. Rascovan Lab protocols"]["sampleOUT"]["Library generated"]
print(idExp)
data=format(indLibOUT)
r=requests.put(url+"/experiments/sections/"+format(idExp)+"/samples",headers=headers1,data = data)
print(r)



5423432
<Response [204]>
5423443
<Response [204]>
5423433
<Response [204]>


### Check if all samples in eLab are assigned to an experiment

In [89]:
def condition(name,listToCheck):
    return name not in listToCheck



### For Labelling

In [90]:
###check if all individuals as sampleIN for labelling process
r=requests.get(url+"/experiments/sections/"+format(experiments["Labelling process"]["sampleIN"]["Individuals labelled"])+"/samples",headers=headers2)
listInExp=[]
for i in r.json().get("data"):
    listInExp.append(i["name"])

IndnotInLabelling=[element for idx,element in enumerate(registered["Individual"]) if condition(element,listInExp)]
print(IndnotInLabelling)


[]


In [91]:
###check if all Skeleton element  as sampleOUT for labelling process
r=requests.get(url+"/experiments/sections/"+format(experiments["Labelling process"]["sampleOUT"]["Labelled Skeleton elements "])+"/samples",headers=headers1)
listInExp=[]
for i in r.json().get("data"):
    listInExp.append(i["name"])

SkelnotInLabelling=[element for idx,element in enumerate(registered["Skeleton Element"]) if condition(element,listInExp)]
print(SkelnotInLabelling)



[]


### For Drilling

In [92]:
listInExp=[]
###check if all skeleton element as sampleIN for drilling processes
for key in experiments.keys():
    if not key.startswith("Drilling"):
        continue
    for inty in experiments[key]["sampleIN"]:
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleIN"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different drilling processes")
            else:
                listInExp.append(i["name"])
                
notDrilled=[element for idx,element in enumerate(registered["Skeleton Element"]) if condition(element,listInExp)]
print(format(len(notDrilled))+ " skel element not drilled")
Drilled=[element for idx,element in enumerate(registered["Skeleton Element"]) if not condition(element,listInExp)]
print(format(len(Drilled))+ " skel element drilled")


        
            




779 skel element not drilled
93 skel element drilled


In [99]:
listInExp=[]
###check if all extracts as sampleOUT for drilling processes
for key in experiments.keys():
    if not key.startswith("Drilling"):
        continue
    for inty in experiments[key]["sampleOUT"]:
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleOUT"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different drilling processes")
            else:
                listInExp.append(i["name"])
                
notDrilled=[element for idx,element in enumerate(registered["Extract"]) if condition(element,listInExp)]
print(format(len(notDrilled))+ " extracts not assigned to drilling")
print(notDrilled)
Drilled=[element for idx,element in enumerate(registered["Extract"]) if not condition(element,listInExp)]
print(format(len(Drilled))+ " extracts assigned to drilling")

4 extracts not assigned to drilling
['Blank.2021-07-01', 'Blank.2021-07-08', 'Blank.2021-07-14', 'Blank.2021-05-25']
213 extracts assigned to drilling


### For extraction

In [100]:
listInExp=[]
###check if all extracts as sampleIN for extraction
for key in experiments.keys():
    if not key.startswith("Extraction"):
        continue
    for inty in experiments[key]["sampleIN"]:
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleIN"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different extraction  processes")
            else:
                listInExp.append(i["name"])
                
notExtractedIN=[element for idx,element in enumerate(registered["Extract"]) if condition(element,listInExp)]
print(format(len(notExtractedIN))+ " pulverized pieces not assigned to extraction as IN")
ExtractedIN=[element for idx,element in enumerate(registered["Extract"]) if not condition(element,listInExp)]
print(format(len(ExtractedIN))+ " pulverized pieces assigned to extraction as IN")

#check if all extracted pieces has been drilled
ExtractedNotDrilled=[element for idx,element in enumerate(ExtractedIN) if condition(element,Drilled)]
print(format(len(ExtractedNotDrilled))+ " pulverized pieces assigned to extraction but not Drilling")
print(ExtractedNotDrilled)

144 pulverized pieces not assigned to extraction as IN
73 pulverized pieces assigned to extraction as IN
4 pulverized pieces assigned to extraction but not Drilling
['Blank.2021-07-01', 'Blank.2021-07-08', 'Blank.2021-07-14', 'Blank.2021-05-25']


In [101]:
listInExp=[]
#check if all extracts (ARXXXX.Y.ZZ) assigned to Extraction as sampleOUT
for key in experiments.keys():
    if not key.startswith("Extraction"):
        continue
    for inty in experiments[key]["sampleOUT"]:
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleOUT"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different extraction  processes")
            else:
                listInExp.append(i["name"])
                
notExtractedOUT=[element for idx,element in enumerate(registered["Extract"]) if condition(element,listInExp)]
print(format(len(notExtractedOUT))+ " pulverized pieces not assigned to extraction as OUT")
ExtractedOUT=[element for idx,element in enumerate(registered["Extract"]) if not condition(element,listInExp)]
print(format(len(ExtractedOUT))+ " pulverized pieces assigned to extraction as OUT")

#check if all extracted as IN are as OUT
ExtractedOUTNotIN=[element for idx,element in enumerate(ExtractedOUT) if condition(element,ExtractedIN)]
print(format(len(ExtractedOUTNotIN))+ " pulverized pieces assigned to extraction as OUT but not as IN")

#check if all extracted as OUT are as IN
ExtractedINNotOUT=[element for idx,element in enumerate(ExtractedIN) if condition(element,ExtractedOUT)]
print(format(len(ExtractedINNotOUT))+ " pulverized pieces assigned to extraction as IN but not as OUT")




144 pulverized pieces not assigned to extraction as OUT
73 pulverized pieces assigned to extraction as OUT
0 pulverized pieces assigned to extraction as OUT but not as IN
0 pulverized pieces assigned to extraction as IN but not as OUT


### Library PREP!

In [102]:
listInExp=[]
###check if all extracts as sampleIN for library prep
for key in experiments.keys():
    if not key.startswith("Library Prep"):
        continue
    for inty in experiments[key]["sampleIN"]:
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleIN"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different Lib Prep processes")
            else:
                listInExp.append(i["name"])
                
notLibPrepIN=[element for idx,element in enumerate(registered["Extract"]) if condition(element,listInExp)]
print(format(len(notLibPrepIN))+ " pulverized pieces not assigned to Lib Prep as IN")
LibPrepIN=[element for idx,element in enumerate(registered["Extract"]) if not condition(element,listInExp)]
print(format(len(LibPrepIN))+ " pulverized pieces assigned to Lib Prep as IN")

#check if all extraction as OUT in extract are as IN in LibPrep
ExtractedOUTNotLipPrep=[element for idx,element in enumerate(ExtractedOUT) if condition(element,LibPrepIN)]
print(format(len(ExtractedOUTNotLipPrep))+ " pulverized pieces assigned to extraction as OUT but not as IN in LibPrep")



144 pulverized pieces not assigned to Lib Prep as IN
73 pulverized pieces assigned to Lib Prep as IN
0 pulverized pieces assigned to extraction as OUT but not as IN in LibPrep


In [311]:
listInExp=[]
###check if all Non Indexed libraries as sampleOUT for library prep
for key in experiments.keys():
    if not key.startswith("Library Prep"):
        continue
    for inty in experiments[key]["sampleOUT"]:
        if not inty.startswith("UDG"):
            continue
        print(inty)
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleOUT"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different Lib Prep processes")
            else:
                listInExp.append(i["name"])
            
notNiLibPrepOUT=[element for idx,element in enumerate(registered["Non Indexed Library"]) if condition(element,listInExp)]
print(format(len(notNiLibPrepOUT))+ " non indexed libraries not assigned to Lib Prep as OUT")
print(notNiLibPrepOUT)
niLibPrepOUT=[element for idx,element in enumerate(registered["Non Indexed Library"]) if not condition(element,listInExp)]
print(format(len(niLibPrepOUT))+ "non indexed libaries assigned to Lib Prep as OUT")

#check if all extraction as IN in extract correspond to non indexed lib as OUT in LibPrep
LibPrepINcorres=[s + "00" for s in LibPrepIN]
niLibPrepOUTNotLibPrepIN=[element for idx,element in enumerate(niLibPrepOUT) if condition(element,LibPrepINcorres)]
print(format(len(niLibPrepOUTNotLibPrepIN))+ " non indexed libraries with no correspondance in IN")
print(niLibPrepOUTNotLibPrepIN)
#check if all indexed lib as OUT correspond to non indexed libs as IN in extract in LibPrep
niLibPrepINNotLibPrepOUT=[element for idx,element in enumerate(LibPrepINcorres) if  condition(element,niLibPrepOUT)]
print(format(len(niLibPrepINNotLibPrepOUT))+ " indexed libraries with no correspondance for non indexed libraries")
print(niLibPrepINNotLibPrepOUT)




UDG-treated extracts
0 non indexed libraries not assigned to Lib Prep as OUT
[]
73non indexed libaries assigned to Lib Prep as OUT
4 non indexed libraries with no correspondance in IN
['BL2505202100', 'BL0107202100', 'BL0807202100', 'BL1407202100']
4 indexed libraries with no correspondance for non indexed libraries
['Blank.2021-07-0100', 'Blank.2021-07-0800', 'Blank.2021-07-1400', 'Blank.2021-05-2500']


In [108]:
listInExp=[]
###check if all Indexed libraries as sampleOUT for library prep
for key in experiments.keys():
    if not key.startswith("Library Prep"):
        continue
    for inty in experiments[key]["sampleOUT"]:
        if not inty.startswith("Library"):
            continue
        r=requests.get(url+"/experiments/sections/"+format(experiments[key]["sampleOUT"][inty])+"/samples",headers=headers1)
        if r.status_code!=200:
            print(key+" "+inty+" bad request")
            break
        for i in r.json().get("data"):
            if i["name"] in listInExp:
                print(i["name"]+" assigned to different Lib Prep processes")
            else:
                listInExp.append(i["name"])
            
notLibPrepOUT=[element for idx,element in enumerate(registered["Indexed Library"]) if condition(element,listInExp)]
print(format(len(notLibPrepOUT))+ " indexed libraries not assigned to Lib Prep as OUT")
print(notLibPrepOUT)
LibPrepOUT=[element for idx,element in enumerate(registered["Indexed Library"]) if not condition(element,listInExp)]
print(format(len(LibPrepOUT))+ " indexed libaries assigned to Lib Prep as OUT")

#check if all extraction as IN in extract correspond to indexed lib as OUT in LibPrep
LibPrepINcorres=[s + "01" for s in LibPrepIN]
LibPrepOUTNotLibPrepIN=[element for idx,element in enumerate(LibPrepOUT) if condition(element,LibPrepINcorres)]
print(format(len(LibPrepOUTNotLibPrepIN))+ "  indexed libraries with no correspondance in IN")
print(LibPrepOUTNotLibPrepIN)
#check if all non indexed lib as OUT correspond to extraction as IN in extract in LibPrep
LibPrepINNotLibPrepOUT=[element for idx,element in enumerate(LibPrepINcorres) if  condition(element,LibPrepOUT)]
print(format(len(LibPrepINNotLibPrepOUT))+ " indexed libraries with no correspondance in IN")
print(LibPrepINNotLibPrepOUT)






0 indexed libraries not assigned to Lib Prep as OUT
[]
72 indexed libaries assigned to Lib Prep as OUT
4  indexed libraries with no correspondance in IN
['BL2505202101', 'BL0107202101', 'BL0807202101', 'BL1407202101']
5 indexed libraries with no correspondance in IN
['Blank.2021-07-0101', 'Blank.2021-07-0801', 'Blank.2021-07-1401', 'AR0026.1.0101', 'Blank.2021-05-2501']


## Storage

In [135]:
storageByID={}
r=requests.get(url+"/storageLayers",headers=headers1)
stoData=r.json().get("data")
for sto in stoData:
    storageByID[sto["storageLayerID"]]={"name":sto["name"],"parentID":sto["parentStorageLayerID"]}
    print(sto["name"])
def getParentSto(ID,stoDict):
    if stoDict[ID]["parentID"]==0:
        return(stoDict[ID]["name"])
    else:
        return(getParentSto(stoDict[ID]["parentID"],stoDict)+", "+stoDict[ID]["name"])
    
storage={}
for stoID in storageByID.keys():
    name=getParentSto(stoID,storageByID)
    storage[name]=stoID
    
print(storage)

Nico office
In Copenhagen
Tom Gilbert Freezer
bag A1 + A2
calculus extraction
petrous back-up
petrous extraction
pulp back-up
pulp extraction
root back-up
root extraction
C group sensitive, blue box, back-up
bag Mariano Del Papa calculus
Mariano Del Papa calculus extraction
already processed
Freezer n9
Miren Drawer 2
Blue Rack 1
Freezer 4
drawer 1
samplebox 1
drawer 5
extract box 1
UDG-SCR libraries no... 2
Unknown
Individual
Site
Sequencing
Hannes Freezer
bag A1 + A2
calculus extraction
petrous back-up
petrous extraction
pulp back-up
pulp extraction
root back-up
root extraction
C group sensitive, blue box, back-up
already processed
bag Mariano Del Papa calculus
Mariano Del Papa calculus extraction
bag B1 + B2
pulp back-up
pulp extraction
calculus extraction
petrous extraction
petrous back-up
root extraction
root back-up
C group sensitive, blue box, back-up
already processed
{'Nico office': 774657, 'In Copenhagen': 774658, 'Tom Gilbert Freezer': 774659, 'Tom Gilbert Freezer, bag A1 + A

### Assign Individual to Individual artefactual Storage Location

In [114]:
for StoType in ["Site","Individual"]:
    IDsto=format(storage[StoType])
    print(IDsto)
    for key,id in registered[StoType].items():
        r=requests.post(url+"/samples/moveToLayer/"+IDsto+"?sampleIDs="+id,headers=headers1,data={})
        if r.status_code != 204:
            print("error for "+key+" "+id)

print("finished")


774999
774998
finished


### assign Skeleton Element to some locations
- those for which batch is A1, A2, B1, B2 or C are assigned to Copenhagen
- those for which batch is Sequenced are assigned to Unkown
- all the others are assigned to Nico office


In [111]:
print(table["1st Batch"].value_counts())
print(storage)
for item,name in table["RascovanLabID"].items():
    id=registered["Skeleton Element"][name]
    if table["1st Batch"][item] in ["A1","A2","B1","B2","C","Ready for DNA extraction"]:
        StoType="In Copenhagen"
    elif table["1st Batch"][item] in ["Sequenced"]:
        StoType="Unknown"
    elif format(table["1st Batch"][item]) == "nan":
        StoType="Nico office"
    else:
        print("not recognized condition "+format(table["1st Batch"][item]))
        break
    IDsto=format(storage[StoType])
    r=requests.post(url+"/samples/moveToLayer/"+IDsto+"?sampleIDs="+id,headers=headers1,data={})
    if r.status_code != 204:
            print("error for "+name+" ("+id+")")
    
print("finished")

B1                          40
A1                          35
Sequenced                   21
B2                          15
C                           11
Ready for DNA extraction    11
A2                           9
Name: 1st Batch, dtype: int64
{'Nico office': 774657, 'In Copenhagen': 774658, 'Tom Gilbert Freezer': 774659, 'Tom Gilbert Freezer, bag A1 + A2': 774671, 'Tom Gilbert Freezer, bag A1 + A2, calculus extraction': 774677, 'Tom Gilbert Freezer, bag A1 + A2, petrous back-up': 774678, 'Tom Gilbert Freezer, bag A1 + A2, petrous extraction': 774679, 'Tom Gilbert Freezer, bag A1 + A2, pulp back-up': 774680, 'Tom Gilbert Freezer, bag A1 + A2, pulp extraction': 774681, 'Tom Gilbert Freezer, bag A1 + A2, root back-up': 774682, 'Tom Gilbert Freezer, bag A1 + A2, root extraction': 774683, 'Tom Gilbert Freezer, bag A1 + A2, C group sensitive, blue box, back-up': 774684, 'Tom Gilbert Freezer, bag Mariano Del Papa calculus': 775907, 'Tom Gilbert Freezer, bag Mariano Del Papa calculus, Mari

In [138]:

for index,name in extractTable[ExeDict['Name']].items():
    idEx=registered["Extract"][name]
    r=requests.get(url+"/samples/get?sampleID="+idEx,headers=headers2)
    if r.status_code != 200:
            print("error GET for "+name+" ("+idEx+")")
            print(r.raise_for_status())
    storedIn=r.json()[0]["storageLayerID"]
    if storedIn != 0:
        #print(name+" already in some storage")
        continue
    else:
        freezer=extractTable["Freezer"][index]
        if format(freezer) in ["To be spotted","nan"] :
            freezer="Unknown"
        freezer=freezer.replace("Mariano Del Papa calculus to extract","Mariano Del Papa calculus extraction")
        freezer=freezer.replace("A1+A2","A1 + A2")
        freezer=freezer.replace("B1+B2","B1 + B2")
        freezer=freezer.replace("sub-bag B1+B2 ","")
        freezer=freezer.replace("sub-bag B1 + B2 ","")
        freezer=freezer.replace("sub-bag ","")
        freezer=freezer.replace("pulps","pulp")
        freezer=freezer.replace("roots","root")
        freezer=freezer.replace(" for back-up"," back-up")
        freezer=freezer.replace(" for extraction"," extraction")
        freezer=freezer.replace(" to extract"," extraction")
        freezer=freezer.replace("freezer","Freezer")
        freezer=freezer.replace("Thomas","Tom")
        freezer=freezer.replace("Hannes'","Hannes")
        freezer=freezer.replace("Miren drawer","Miren Drawer 2")
        freezer=freezer.replace("blue rack","Blue Rack 1")
        freezer=freezer.replace(", front extraction clean room 159","")
        freezer=freezer.replace("bag C group sensitive, blue box (back-up)","bag A1 + A2, C group sensitive, blue box, back-up")
        if freezer not in storage:
            print(freezer+" not registsred in eLab")
            break
        IDsto=format(storage[freezer])
        r=requests.post(url+"/samples/moveToLayer/"+IDsto+"?sampleIDs="+idEx,headers=headers1,data={})
        if r.status_code != 204:
            print("error POST for "+name+" ("+idEx+")")
            print(r.raise_for_status())


print("finished")


### Check samples without storage

In [139]:
for type in registered:
    print(type)
    for name in registered[type]:
        idTY=registered[type][name]
        r=requests.get(url+"/samples/get?sampleID="+idTY,headers=headers2)
        if r.status_code != 200:
            print("error GET for "+name+" ("+idTY+")")
            print(r.raise_for_status())
        storedIn=r.json()[0]["storageLayerID"]
        if storedIn == 0:
            print(name+" NO storage")


Individual
Site
Skeleton Element
Extract
Blank.2021-07-01 NO storage
Blank.2021-07-08 NO storage
Blank.2021-07-14 NO storage
Indexed Library
Library pool
Non Indexed Library


## Some Play Around with the Data Base
### get ends points

In [315]:
def recursiveChildren(name,level):
    levelSeq=["Site","Individual","Skeleton Element",
              "Extract","Non Indexed Library","Indexed Library","Library Pool"]
    levelName=levelSeq[level]
    id=registered[levelName][name]
    r=requests.get(url+"/samples/"+id+"/children",headers=headers2)
    json=r.json()
    num=json.get("recordCount")
    print(''.join([char*level for char in "\t"])+"- "+
          levelName+" \""+name+"\": "+format(num)+" "+levelSeq[level+1])
    if num > 0:
        level=level+1
        data=json.get("data")
        ch=-1
        while ch < (num-1):
            ch+=1
            nameCh=data[ch]["name"]
            recursiveChildren(nameCh,level)
    else:
        level-=level
    if level<0:
        return(None)
        


In [316]:
Site="El Alto"
id=registered["Site"][Site]
recursiveChildren(Site,0)

print("Other Test")
Indi="AR0025"
id=registered["Individual"][Indi]
recursiveChildren(Indi,1)

- Site "El Alto": 1 Individual
	- Individual "AR0455": 1 Skeleton Element
		- Skeleton Element "AR0455.1": 0 Extract
Other Test
	- Individual "AR0025": 1 Skeleton Element
		- Skeleton Element "AR0025.1": 2 Extract
			- Extract "AR0025.1.01": 1 Non Indexed Library
				- Non Indexed Library "AR0025.1.0100": 1 Indexed Library
					- Indexed Library "AR0025.1.0101": 0 Library Pool
			- Extract "AR0025.1.02": 0 Non Indexed Library


In [317]:
def recursiveParent(name,level):
    levelSeq=['Library Pool', 'Indexed Library', 'Non Indexed Library', 'Extract',
              'Skeleton Element', 'Individual', 'Site']
    levelName=levelSeq[level]
    id=registered[levelName][name]
    r=requests.get(url+"/samples/"+id+"/parent",headers=headers2)
    json=r.json()
    nameP=json.get("name")
    print(name+" ("+levelName+")\n|\nV")
    if nameP is None:
        print("Endpoint")
        return(None)
    else:
        level=level+1
        recursiveParent(nameP,level)


In [318]:
recursiveParent("AR0019.1.0101",1)


AR0019.1.0101 (Indexed Library)
|
V
AR0019.1.0100 (Non Indexed Library)
|
V
AR0019.1.01 (Extract)
|
V
AR0019.1 (Skeleton Element)
|
V
AR0019 (Individual)
|
V
Valle inferior río Chubut (Site)
|
V
Endpoint
