In [41]:
# Module imports
import datetime
import os
import numpy as np
import pandas as pd

def fix_sample_number(x):
    """Make sure all samples numbers are of form: AXXX (where A is one of A, F, W and X is a digit)"""
    if isinstance(x, float) and np.isnan(x):
        return x # leave NaN's alone    
    try:
        sn = int(x)
        sn = 'F{:04d}'.format(int(x))
    except ValueError:
        # Assume string so make sure it's of the right format
        sn = str(x).capitalize()
    assert sn[0] in ['A', 'F', 'W'], "Bad ID %s" % sn
    return sn

def now():
    return datetime.datetime.now().strftime("%d/%m/%y %H:%M:%S")

In [36]:
ftir_csv = 'MADE/FTIR Analysis Data Recording Form.csv'
catalog_csv = 'MADE/Sample Cataloguing Form.csv'
reagent_csv = 'MADE/Reagent Outcomes.csv'
hr_csv = 'MADE/MADE MAST Intervention Questionnaire.csv'

date_cols = ['Timestamp']
df_ftir = pd.read_csv(ftir_csv, engine="python", parse_dates=date_cols)
df_catalog = pd.read_csv(catalog_csv, engine="python", parse_dates=date_cols)
df_reagent = pd.read_csv(reagent_csv, engine="python", parse_dates=date_cols)
df_hr = pd.read_csv(hr_csv, engine="python", parse_dates=date_cols)


# Sort out column names
df_reagent.rename(columns={'Sample Code':'Sample Number'}, inplace=True)
df_hr.rename(columns={'Sample Number:':'Sample Number'}, inplace=True)

# Make all sample numbers a 4-digit code starting with F
df_ftir['Sample Number'] = df_ftir['Sample Number'].apply(fix_sample_number)
df_catalog['Sample Number'] = df_catalog['Sample Number'].apply(fix_sample_number)
df_reagent['Sample Number'] = df_reagent['Sample Number'].apply(fix_sample_number)
df_hr['Sample Number'] = df_hr['Sample Number'].apply(fix_sample_number)

In [43]:
# Check there are no sample numbers in any of the other spreadsheets that aren't in the cataolog sheet
csn = set(df_catalog['Sample Number'].unique())

ftir_unique = set(df_ftir['Sample Number'].unique()).difference(csn)
if ftir_unique:
    print("Orphaned FTIR sample numbers: %s" % ftir_unique)

reagent_unique = set(df_reagent['Sample Number'].unique()).difference(csn)
if reagent_unique:
    print("Orphaned Reagent Test sample numbers: %s" % reagent_unique)

hr_unique = set(df_hr['Sample Number'].unique()).difference(csn)
if hr_unique:
    print("Orphaned HR sample numbers: %s" % hr_unique)

ftir_unique.update(reagent_unique, hr_unique)
if (ftir_unique):
    print("Please fix orphaned samples")
    quit()

Orphaned FTIR sample numbers: set(['F0547'])
Orphaned HR sample numbers: set(['F0000'])
Please fix orphaned samples


Add FTIR, Reagnent and MAST results to a single spreadsheet
From FTIR:
'Sample Sold As'
'Sample Form'
'Has the Service User or a close friend tried this batch?'

'Substance detected'
'Hit Confidence'
'Compound detected'
'Hit Confidence'
'Is anything detected after subtraction analysis?'
'Compound detected (Subtraction)'
'Hit Confidence'
'Analysis required'
'Substance(s) detected'
'"Strength" of powdered substance'
'Does the substance detected match the substance that was advertised?'
'Note for harm reduction worker'

From Reagent
'Sample Sold As'
'Sample Form'

'Froehde'
'Froehde possible substances'
'Liebermann'
'Liebermann possible substances'
'Marquis'
Marquis possible substances'
Mandelin'
Mandelin possible substances'
'Ehrlich'
Likely drug or class'
Analysis required'
Substance(s) detected'
"Strength" of powdered substance'
Pill Strength in mg (if known)'
Does the substance detected match the substance that was advertised?'
'Note for harm reduction worker'

MAST
'You submitted a substance for analysis. What were you told it was when you got it?'
'Had you already tried this substance before getting it tested?'
