In [1]:
import pandas as pd
import streamlit as st
import numpy as np
import plotly.express as px
import re

In [2]:
df = pd.read_excel("Raw Data/Copy of NeoBANK Linked Sample.xlsx", sheet_name='Metadata')

In [3]:
df.columns

Index(['Subject ID', 'Sample Type_#', 'CGA', 'DOL ', 'Current Weight',
       'Current Height', 'Current HC', 'Scavenged/Fresh?', 'MBM/DMB?',
       'HMF Y/N?', 'TPN Y/N?', 'Iron Y/N? ', 'Iron Date & time',
       'Duration\n(min)', 'Linked? ', 'feeding time ',
       'Collection date/aliquot time for all samples ', '# Aliquots ',
       'Additional Comments', 'Milk Prep Room Expiration Date & Time '],
      dtype='object')

In [4]:
df = df.rename(columns={
    "HMF Y/N?": "HMF",
    "TPN Y/N?": "TPN",
    "Linked?": "Linked",
    "MBM/DMB?": "Type of Milk",
    "Sample Type_#": "sample_unique_id",
    'Duration\n(min)': "Feeding Duration"
})

In [5]:
df.columns = df.columns.str.strip()
df = df.rename(columns={'Iron Y/N?': 'Iron'})

In [6]:
df.columns

Index(['Subject ID', 'sample_unique_id', 'CGA', 'DOL', 'Current Weight',
       'Current Height', 'Current HC', 'Scavenged/Fresh?', 'Type of Milk',
       'HMF', 'TPN', 'Iron', 'Iron Date & time', 'Feeding Duration', 'Linked?',
       'feeding time', 'Collection date/aliquot time for all samples',
       '# Aliquots', 'Additional Comments',
       'Milk Prep Room Expiration Date & Time'],
      dtype='object')

### Clean Columns

In [7]:
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


In [8]:
df.Iron.value_counts()

Iron
Y    54
N    26
Name: count, dtype: int64

In [9]:
df["Type of Milk"].value_counts()

Type of Milk
MBM          108
DBM           23
MBM + DBM      6
DBM+MBM        4
Name: count, dtype: int64

In [10]:
df['Type of Milk'] = df['Type of Milk'].replace({'MBM': 'MOM', 'MBM + DBM': 'MOM+DBM', 'DBM+MBM': 'MOM+DBM'})

In [11]:
df["Type of Milk"].value_counts()

Type of Milk
MOM        108
DBM         23
MOM+DBM     10
Name: count, dtype: int64

In [12]:
df['Scavenged/Fresh?'] = df['Scavenged/Fresh?'].str.strip().str.capitalize()

In [13]:
df["Scavenged/Fresh?"].value_counts()

Scavenged/Fresh?
Scavenged     139
Scaveneged      2
Name: count, dtype: int64

In [14]:
df['Scavenged/Fresh?'] = df['Scavenged/Fresh?'].replace({'Scaveneged': 'Scavenged'})

In [15]:
df["Scavenged/Fresh?"].value_counts()

Scavenged/Fresh?
Scavenged    141
Name: count, dtype: int64

In [16]:
df.HMF.value_counts()

HMF
Y                 127
Nutramigen          4
Y + Nutramigen      4
N                   2
N+Nutramigen        2
Y+Nutramigen        1
Name: count, dtype: int64

In [17]:
df['HMF'] = df['HMF'].replace({'Y + Nutramigen': 'Y+Nutramigen'})

In [18]:
df.HMF.value_counts()

HMF
Y               127
Y+Nutramigen      5
Nutramigen        4
N                 2
N+Nutramigen      2
Name: count, dtype: int64

In [19]:
df['Iron'] = df['Iron'].replace(r'^\s*Y\s*$', 'Y', regex=True)

In [20]:
df["Iron"].value_counts()

Iron
Y    54
N    26
Name: count, dtype: int64

In [21]:
df["Additional Comments"].value_counts()

Additional Comments
Scavenged Feeding Tube              70
Residual from Milk Prep Room        68
Scavenged Bottle - Residual Feed     2
Scavenged Feeding Syringe            1
Name: count, dtype: int64

In [22]:
df['Sample Source'] = df['Additional Comments'].replace({'Residual from Milk Prep Room': 'Prepped in Milk Room', 'Scavenged Feeding Tube': 'Scavenged', 'Scavenged Bottle - Residual Feed': 'Scavenged', 'Scavenged Feeding Syringe': 'Scavenged'})

In [23]:
df["Sample Source"].value_counts()

Sample Source
Scavenged               73
Prepped in Milk Room    68
Name: count, dtype: int64

In [24]:
df = df.drop(columns=['Additional Comments'])

In [25]:
def extract_numeric_aliquots(value):
    match = re.match(r'^\d+', str(value))
    return int(match.group()) if match else None

df['Aliquots_num'] = df['# Aliquots'].apply(extract_numeric_aliquots)

In [26]:
df = df.drop(columns=['# Aliquots'])

In [27]:
df

Unnamed: 0,Subject ID,sample_unique_id,CGA,DOL,Current Weight,Current Height,Current HC,Scavenged/Fresh?,Type of Milk,HMF,TPN,Iron,Iron Date & time,Feeding Duration,Linked?,feeding time,Collection date/aliquot time for all samples,Milk Prep Room Expiration Date & Time,Sample Source,Aliquots_num
0,NB00237,NB00237_M_10,35.0,74.0,2440.0,44.0,29.0,Scavenged,MOM,Y,N,,,,Y,Prepped \n4/22/2025 PM,4/23/2025\nDOL 21 11:13,,Prepped in Milk Room,11
1,NB00237,NB00237_M_8,34.5,72.0,2350.0,43.5,28.5,Scavenged,MOM,Y,N,,,,Y,Prepped 4/20/25 PM,2025-04-21 10:13:00,,Prepped in Milk Room,6
2,NB00237,NB00237_M_17,36.6,87.0,3010.0,46.1,30.0,Scavenged,MOM,Y,N,Y,2025-05-06 20:57:00,unknown,Y,2025-05-06 08:56:00,2025-05-06 13:47:00,Unknown,Scavenged,2
3,NB00237,NB00237_M_19,37.1,89.0,3110.0,46.3,30.5,Scavenged,MOM,Y,N,Y,2025-05-08 20:43:00,45 min,Y,2025-05-08 12:03:00,2025-05-08 14:07:00,2025-05-08 16:49:00,Scavenged,3
4,NB00237,NB00237_M_20,37.5,93.0,3380.0,46.3,30.5,Scavenged,MOM,Y,N,,,,Y,Prepped 5/11/25 PM,2025-05-12 10:58:00,,Prepped in Milk Room,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,NB00469,NB00469_M_2,34.1,25.0,2040.0,42.5,30.3,Scavenged,MOM,Y+Nutramigen,N,Y,2025-05-09 21:09:00,unknown,Y,2025-05-09 08:30:00,2025-05-09 12:12:00,2025-05-09 16:06:00,Scavenged,2
137,NB00486,NB00486_M_1,32.1,9.0,1055.0,36.0,26.7,Scavenged,DBM,Y,,,,,Y,Prepped 5/13/25 PM,2025-05-14 10:45:00,,Prepped in Milk Room,8
138,NB00486,NB00486_M_2,32.1,9.0,1055.0,36.0,26.7,Scavenged,DBM,Y,N,N,,unknown,Y,5/14/2025 done @ 08:11,2025-05-14 10:45:00,2025-05-14 17:37:00,Scavenged,1
139,NB00487,NB00487_M_1,34.2,7.0,1940.0,44.3,29.0,Scavenged,MOM+DBM,Y,N,,,,Y,Prepped 5/13/25 PM,2025-05-14 11:00:00,,Prepped in Milk Room,7


In [28]:
df.to_excel("Cleaned Data/cleaned_linkedmeta_updated.xlsx", index=False)

In [29]:
subject_counts = df['Subject ID'].value_counts()
num_subjects_more_than_3 = (subject_counts > 3).sum()
print(num_subjects_more_than_3)

14


## Area Counts - LINKED

In [30]:
AC = pd.read_excel("Raw Data/Linked AC.xlsx")
AC

Unnamed: 0,sample_unique_id,237_1,238_1,354_1,359_1,360_1,378_1,380_1,393_1,398_1,...,423D_2,423M_2,438_2_M2,456_2,467_2,468_2,469_2,486_2,487_2,438_2_M5
0,2'FL,3.263779,2.595116,0.033812,0.046632,0.046432,6.447691,0.059855,4.269231,3.216195,...,1.828288,4.457615,2.051177,5.080108,2.227513,3.54932,4.362305,1.91352,2.561377,2.369491
1,DFLAC,0.245247,0.661277,0.008166,0.014783,0.015139,0.433841,0.035982,0.238464,0.31462,...,0.149705,0.170626,0.259814,0.32666,0.182753,0.607791,0.315138,0.260761,0.198488,0.180661
2,3'SL,0.246138,0.358765,0.245506,0.243336,0.138026,0.187513,0.39382,0.209507,0.174076,...,0.127119,0.126112,0.146599,0.164332,0.150403,0.225837,0.132063,0.147134,0.193828,0.179458
3,6'SL,0.245952,0.854194,1.238738,0.918798,0.890766,0.436425,0.666566,0.700846,0.235143,...,0.181737,0.837421,0.225997,0.553977,0.147072,0.825833,0.886914,0.196056,0.355669,0.142262
4,LNT,0.21509,1.49763,0.996516,2.487438,1.777826,1.187034,1.669319,1.035175,0.455073,...,0.298689,0.91261,0.538842,0.512072,0.336612,1.492627,0.741349,0.607328,0.731639,0.365443
5,LNnt,0.108374,0.206503,0.049168,0.128543,0.108809,0.505145,0.526597,0.18466,0.186503,...,0.053354,0.188941,0.071368,0.65906,0.06326,0.327358,0.181772,0.064394,0.083748,0.074072
6,LNFPI,0.35715,0.742362,0.10675,0.184628,0.10845,2.622792,0.120719,1.136789,0.435744,...,0.18907,1.532227,0.287791,0.537471,0.243527,0.930058,1.462318,0.25061,0.686389,0.254785
7,LNFP II,0.221063,0.538425,0.714393,1.148745,0.668599,0.241565,1.285885,0.474239,0.393896,...,0.189805,0.246141,0.434911,0.399113,0.244543,0.799113,0.275978,0.3281,0.271961,0.223444
8,LNFPIII,0.018446,0.02809,0.015494,0.041919,0.023067,0.075603,0.029008,0.04586,0.012726,...,0.009894,0.023196,0.008101,0.010085,0.007293,0.03505,0.028002,0.010156,0.004596,0.00744
9,LSTc,0.192892,0.41329,0.252557,0.287947,0.210059,0.512886,0.527577,0.274792,0.188848,...,0.130409,0.437021,0.065033,0.420167,0.056001,0.498346,0.408036,0.071311,0.12511,0.060991


In [31]:
AC_long = AC.set_index('sample_unique_id').transpose().reset_index()
AC_long = AC_long.rename(columns={'index': 'Lab_ID_full'})
AC_long

sample_unique_id,Lab_ID_full,2'FL,DFLAC,3'SL,6'SL,LNT,LNnt,LNFPI,LNFP II,LNFPIII,LSTc,DFLNT,DSLNT,DFLNH,FDSLNH,DSLNH
0,237_1,3.263779,0.245247,0.246138,0.245952,0.21509,0.108374,0.35715,0.221063,0.018446,0.192892,0.325921,0.042409,0.076432,0.057754,0.055316
1,238_1,2.595116,0.661277,0.358765,0.854194,1.49763,0.206503,0.742362,0.538425,0.02809,0.41329,1.241608,0.243189,0.050449,0.029639,0.216274
2,354_1,0.033812,0.008166,0.245506,1.238738,0.996516,0.049168,0.10675,0.714393,0.015494,0.252557,0.147197,0.172833,0.184163,0.411469,0.399198
3,359_1,0.046632,0.014783,0.243336,0.918798,2.487438,0.128543,0.184628,1.148745,0.041919,0.287947,0.14717,0.203755,0.105683,0.298478,0.191455
4,360_1,0.046432,0.015139,0.138026,0.890766,1.777826,0.108809,0.10845,0.668599,0.023067,0.210059,0.081223,0.114285,0.161863,0.433718,0.252726
5,378_1,6.447691,0.433841,0.187513,0.436425,1.187034,0.505145,2.622792,0.241565,0.075603,0.512886,1.446029,0.195917,0.058485,0.090928,0.098906
6,380_1,0.059855,0.035982,0.39382,0.666566,1.669319,0.526597,0.120719,1.285885,0.029008,0.527577,0.308882,0.208685,0.142992,0.194804,0.289149
7,393_1,4.269231,0.238464,0.209507,0.700846,1.035175,0.18466,1.136789,0.474239,0.04586,0.274792,0.678592,0.249529,0.050855,0.048249,0.150707
8,398_1,3.216195,0.31462,0.174076,0.235143,0.455073,0.186503,0.435744,0.393896,0.012726,0.188848,0.541754,0.096023,0.073633,0.05112,0.048748
9,399_1,2.27657,0.255627,0.144714,0.288106,0.699909,0.080879,0.343313,0.396994,0.013983,0.076607,0.460929,0.087741,0.075785,0.067164,0.057831


In [32]:
AC_long = AC_long.rename(columns={
    "2'FL": "2FL",
    "3'SL": "3SL",
    "6'SL": "6SL",
})

In [33]:
AC_long.columns

Index(['Lab_ID_full', '2FL', 'DFLAC', '3SL', '6SL', 'LNT', 'LNnt', 'LNFPI',
       'LNFP II', 'LNFPIII', 'LSTc', 'DFLNT', 'DSLNT', 'DFLNH', 'FDSLNH',
       'DSLNH'],
      dtype='object', name='sample_unique_id')

In [38]:
AC_long.to_excel("Raw Data/AC_long.xlsx", index=False)

In [39]:
AC_long

sample_unique_id,Lab_ID_full,2FL,DFLAC,3SL,6SL,LNT,LNnt,LNFPI,LNFP II,LNFPIII,LSTc,DFLNT,DSLNT,DFLNH,FDSLNH,DSLNH
0,237_1,3.263779,0.245247,0.246138,0.245952,0.21509,0.108374,0.35715,0.221063,0.018446,0.192892,0.325921,0.042409,0.076432,0.057754,0.055316
1,238_1,2.595116,0.661277,0.358765,0.854194,1.49763,0.206503,0.742362,0.538425,0.02809,0.41329,1.241608,0.243189,0.050449,0.029639,0.216274
2,354_1,0.033812,0.008166,0.245506,1.238738,0.996516,0.049168,0.10675,0.714393,0.015494,0.252557,0.147197,0.172833,0.184163,0.411469,0.399198
3,359_1,0.046632,0.014783,0.243336,0.918798,2.487438,0.128543,0.184628,1.148745,0.041919,0.287947,0.14717,0.203755,0.105683,0.298478,0.191455
4,360_1,0.046432,0.015139,0.138026,0.890766,1.777826,0.108809,0.10845,0.668599,0.023067,0.210059,0.081223,0.114285,0.161863,0.433718,0.252726
5,378_1,6.447691,0.433841,0.187513,0.436425,1.187034,0.505145,2.622792,0.241565,0.075603,0.512886,1.446029,0.195917,0.058485,0.090928,0.098906
6,380_1,0.059855,0.035982,0.39382,0.666566,1.669319,0.526597,0.120719,1.285885,0.029008,0.527577,0.308882,0.208685,0.142992,0.194804,0.289149
7,393_1,4.269231,0.238464,0.209507,0.700846,1.035175,0.18466,1.136789,0.474239,0.04586,0.274792,0.678592,0.249529,0.050855,0.048249,0.150707
8,398_1,3.216195,0.31462,0.174076,0.235143,0.455073,0.186503,0.435744,0.393896,0.012726,0.188848,0.541754,0.096023,0.073633,0.05112,0.048748
9,399_1,2.27657,0.255627,0.144714,0.288106,0.699909,0.080879,0.343313,0.396994,0.013983,0.076607,0.460929,0.087741,0.075785,0.067164,0.057831


In [41]:
AC_volumes = pd.read_excel("Raw Data/Linked AC.xlsx", sheet_name="volumes")

In [42]:
AC_volumes['Lab_ID_full'] = AC_volumes['Lab ID'].astype(str) + AC_volumes['Unnamed: 4'].astype(str)

In [43]:
AC_volumes

Unnamed: 0.1,Unnamed: 0,Subject ID,Prepped,Lab ID,Unnamed: 4,Lab_ID_full
0,1.0,NB00237,M_2,237,_1,237_1
1,2.0,NB00238,M_2,238,_1,238_1
2,3.0,NB00354,M_3,354,_1,354_1
3,4.0,NB00359,M_5,359,_1,359_1
4,5.0,NB00360,M_4,360,_1,360_1
5,6.0,NB00378,M_1,378,_1,378_1
6,7.0,NB00380,M_1,380,_1,380_1
7,8.0,NB00393,M_3,393,_1,393_1
8,9.0,NB00398,M_1,398,_1,398_1
9,10.0,NB00399,M_1,399,_1,399_1


In [44]:
AC_volumes['sample_unique_id'] = AC_volumes['Subject ID'].astype(str) + '_' + AC_volumes['Prepped'].astype(str)

In [45]:
AC_volumes

Unnamed: 0.1,Unnamed: 0,Subject ID,Prepped,Lab ID,Unnamed: 4,Lab_ID_full,sample_unique_id
0,1.0,NB00237,M_2,237,_1,237_1,NB00237_M_2
1,2.0,NB00238,M_2,238,_1,238_1,NB00238_M_2
2,3.0,NB00354,M_3,354,_1,354_1,NB00354_M_3
3,4.0,NB00359,M_5,359,_1,359_1,NB00359_M_5
4,5.0,NB00360,M_4,360,_1,360_1,NB00360_M_4
5,6.0,NB00378,M_1,378,_1,378_1,NB00378_M_1
6,7.0,NB00380,M_1,380,_1,380_1,NB00380_M_1
7,8.0,NB00393,M_3,393,_1,393_1,NB00393_M_3
8,9.0,NB00398,M_1,398,_1,398_1,NB00398_M_1
9,10.0,NB00399,M_1,399,_1,399_1,NB00399_M_1


In [46]:
AC_volumes = AC_volumes[['Lab_ID_full', 'sample_unique_id']]

In [None]:
AC_long = AC_long.merge(AC_volumes, on='Lab_ID_full', how='left')

Unnamed: 0,Lab_ID_full,2FL,DFLAC,3SL,6SL,LNT,LNnt,LNFPI,LNFP II,LNFPIII,LSTc,DFLNT,DSLNT,DFLNH,FDSLNH,DSLNH,sample_unique_id
0,237_1,3.263779,0.245247,0.246138,0.245952,0.21509,0.108374,0.35715,0.221063,0.018446,0.192892,0.325921,0.042409,0.076432,0.057754,0.055316,NB00237_M_2
1,238_1,2.595116,0.661277,0.358765,0.854194,1.49763,0.206503,0.742362,0.538425,0.02809,0.41329,1.241608,0.243189,0.050449,0.029639,0.216274,NB00238_M_2
2,354_1,0.033812,0.008166,0.245506,1.238738,0.996516,0.049168,0.10675,0.714393,0.015494,0.252557,0.147197,0.172833,0.184163,0.411469,0.399198,NB00354_M_3
3,359_1,0.046632,0.014783,0.243336,0.918798,2.487438,0.128543,0.184628,1.148745,0.041919,0.287947,0.14717,0.203755,0.105683,0.298478,0.191455,NB00359_M_5
4,360_1,0.046432,0.015139,0.138026,0.890766,1.777826,0.108809,0.10845,0.668599,0.023067,0.210059,0.081223,0.114285,0.161863,0.433718,0.252726,NB00360_M_4
5,378_1,6.447691,0.433841,0.187513,0.436425,1.187034,0.505145,2.622792,0.241565,0.075603,0.512886,1.446029,0.195917,0.058485,0.090928,0.098906,NB00378_M_1
6,380_1,0.059855,0.035982,0.39382,0.666566,1.669319,0.526597,0.120719,1.285885,0.029008,0.527577,0.308882,0.208685,0.142992,0.194804,0.289149,NB00380_M_1
7,393_1,4.269231,0.238464,0.209507,0.700846,1.035175,0.18466,1.136789,0.474239,0.04586,0.274792,0.678592,0.249529,0.050855,0.048249,0.150707,NB00393_M_3
8,398_1,3.216195,0.31462,0.174076,0.235143,0.455073,0.186503,0.435744,0.393896,0.012726,0.188848,0.541754,0.096023,0.073633,0.05112,0.048748,NB00398_M_1
9,399_1,2.27657,0.255627,0.144714,0.288106,0.699909,0.080879,0.343313,0.396994,0.013983,0.076607,0.460929,0.087741,0.075785,0.067164,0.057831,NB00399_M_1


In [50]:
cols = ['sample_unique_id'] + [col for col in AC_long.columns if col != 'sample_unique_id']
AC_long = AC_long[cols]
AC_long

Unnamed: 0,sample_unique_id,Lab_ID_full,2FL,DFLAC,3SL,6SL,LNT,LNnt,LNFPI,LNFP II,LNFPIII,LSTc,DFLNT,DSLNT,DFLNH,FDSLNH,DSLNH
0,NB00237_M_2,237_1,3.263779,0.245247,0.246138,0.245952,0.21509,0.108374,0.35715,0.221063,0.018446,0.192892,0.325921,0.042409,0.076432,0.057754,0.055316
1,NB00238_M_2,238_1,2.595116,0.661277,0.358765,0.854194,1.49763,0.206503,0.742362,0.538425,0.02809,0.41329,1.241608,0.243189,0.050449,0.029639,0.216274
2,NB00354_M_3,354_1,0.033812,0.008166,0.245506,1.238738,0.996516,0.049168,0.10675,0.714393,0.015494,0.252557,0.147197,0.172833,0.184163,0.411469,0.399198
3,NB00359_M_5,359_1,0.046632,0.014783,0.243336,0.918798,2.487438,0.128543,0.184628,1.148745,0.041919,0.287947,0.14717,0.203755,0.105683,0.298478,0.191455
4,NB00360_M_4,360_1,0.046432,0.015139,0.138026,0.890766,1.777826,0.108809,0.10845,0.668599,0.023067,0.210059,0.081223,0.114285,0.161863,0.433718,0.252726
5,NB00378_M_1,378_1,6.447691,0.433841,0.187513,0.436425,1.187034,0.505145,2.622792,0.241565,0.075603,0.512886,1.446029,0.195917,0.058485,0.090928,0.098906
6,NB00380_M_1,380_1,0.059855,0.035982,0.39382,0.666566,1.669319,0.526597,0.120719,1.285885,0.029008,0.527577,0.308882,0.208685,0.142992,0.194804,0.289149
7,NB00393_M_3,393_1,4.269231,0.238464,0.209507,0.700846,1.035175,0.18466,1.136789,0.474239,0.04586,0.274792,0.678592,0.249529,0.050855,0.048249,0.150707
8,NB00398_M_1,398_1,3.216195,0.31462,0.174076,0.235143,0.455073,0.186503,0.435744,0.393896,0.012726,0.188848,0.541754,0.096023,0.073633,0.05112,0.048748
9,NB00399_M_1,399_1,2.27657,0.255627,0.144714,0.288106,0.699909,0.080879,0.343313,0.396994,0.013983,0.076607,0.460929,0.087741,0.075785,0.067164,0.057831


In [51]:
AC_long = AC_long.drop(columns=['Lab_ID_full'])

### Merge AC to Metadata

In [52]:
AC_long

Unnamed: 0,sample_unique_id,2FL,DFLAC,3SL,6SL,LNT,LNnt,LNFPI,LNFP II,LNFPIII,LSTc,DFLNT,DSLNT,DFLNH,FDSLNH,DSLNH
0,NB00237_M_2,3.263779,0.245247,0.246138,0.245952,0.21509,0.108374,0.35715,0.221063,0.018446,0.192892,0.325921,0.042409,0.076432,0.057754,0.055316
1,NB00238_M_2,2.595116,0.661277,0.358765,0.854194,1.49763,0.206503,0.742362,0.538425,0.02809,0.41329,1.241608,0.243189,0.050449,0.029639,0.216274
2,NB00354_M_3,0.033812,0.008166,0.245506,1.238738,0.996516,0.049168,0.10675,0.714393,0.015494,0.252557,0.147197,0.172833,0.184163,0.411469,0.399198
3,NB00359_M_5,0.046632,0.014783,0.243336,0.918798,2.487438,0.128543,0.184628,1.148745,0.041919,0.287947,0.14717,0.203755,0.105683,0.298478,0.191455
4,NB00360_M_4,0.046432,0.015139,0.138026,0.890766,1.777826,0.108809,0.10845,0.668599,0.023067,0.210059,0.081223,0.114285,0.161863,0.433718,0.252726
5,NB00378_M_1,6.447691,0.433841,0.187513,0.436425,1.187034,0.505145,2.622792,0.241565,0.075603,0.512886,1.446029,0.195917,0.058485,0.090928,0.098906
6,NB00380_M_1,0.059855,0.035982,0.39382,0.666566,1.669319,0.526597,0.120719,1.285885,0.029008,0.527577,0.308882,0.208685,0.142992,0.194804,0.289149
7,NB00393_M_3,4.269231,0.238464,0.209507,0.700846,1.035175,0.18466,1.136789,0.474239,0.04586,0.274792,0.678592,0.249529,0.050855,0.048249,0.150707
8,NB00398_M_1,3.216195,0.31462,0.174076,0.235143,0.455073,0.186503,0.435744,0.393896,0.012726,0.188848,0.541754,0.096023,0.073633,0.05112,0.048748
9,NB00399_M_1,2.27657,0.255627,0.144714,0.288106,0.699909,0.080879,0.343313,0.396994,0.013983,0.076607,0.460929,0.087741,0.075785,0.067164,0.057831


In [53]:
df

Unnamed: 0,Subject ID,sample_unique_id,CGA,DOL,Current Weight,Current Height,Current HC,Scavenged/Fresh?,Type of Milk,HMF,TPN,Iron,Iron Date & time,Feeding Duration,Linked?,feeding time,Collection date/aliquot time for all samples,Milk Prep Room Expiration Date & Time,Sample Source,Aliquots_num
0,NB00237,NB00237_M_10,35.0,74.0,2440.0,44.0,29.0,Scavenged,MOM,Y,N,,,,Y,Prepped \n4/22/2025 PM,4/23/2025\nDOL 21 11:13,,Prepped in Milk Room,11
1,NB00237,NB00237_M_8,34.5,72.0,2350.0,43.5,28.5,Scavenged,MOM,Y,N,,,,Y,Prepped 4/20/25 PM,2025-04-21 10:13:00,,Prepped in Milk Room,6
2,NB00237,NB00237_M_17,36.6,87.0,3010.0,46.1,30.0,Scavenged,MOM,Y,N,Y,2025-05-06 20:57:00,unknown,Y,2025-05-06 08:56:00,2025-05-06 13:47:00,Unknown,Scavenged,2
3,NB00237,NB00237_M_19,37.1,89.0,3110.0,46.3,30.5,Scavenged,MOM,Y,N,Y,2025-05-08 20:43:00,45 min,Y,2025-05-08 12:03:00,2025-05-08 14:07:00,2025-05-08 16:49:00,Scavenged,3
4,NB00237,NB00237_M_20,37.5,93.0,3380.0,46.3,30.5,Scavenged,MOM,Y,N,,,,Y,Prepped 5/11/25 PM,2025-05-12 10:58:00,,Prepped in Milk Room,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,NB00469,NB00469_M_2,34.1,25.0,2040.0,42.5,30.3,Scavenged,MOM,Y+Nutramigen,N,Y,2025-05-09 21:09:00,unknown,Y,2025-05-09 08:30:00,2025-05-09 12:12:00,2025-05-09 16:06:00,Scavenged,2
137,NB00486,NB00486_M_1,32.1,9.0,1055.0,36.0,26.7,Scavenged,DBM,Y,,,,,Y,Prepped 5/13/25 PM,2025-05-14 10:45:00,,Prepped in Milk Room,8
138,NB00486,NB00486_M_2,32.1,9.0,1055.0,36.0,26.7,Scavenged,DBM,Y,N,N,,unknown,Y,5/14/2025 done @ 08:11,2025-05-14 10:45:00,2025-05-14 17:37:00,Scavenged,1
139,NB00487,NB00487_M_1,34.2,7.0,1940.0,44.3,29.0,Scavenged,MOM+DBM,Y,N,,,,Y,Prepped 5/13/25 PM,2025-05-14 11:00:00,,Prepped in Milk Room,7


In [55]:
merged_df = df.merge(AC_long, on='sample_unique_id', how='left')
merged_df

Unnamed: 0,Subject ID,sample_unique_id,CGA,DOL,Current Weight,Current Height,Current HC,Scavenged/Fresh?,Type of Milk,HMF,...,LNnt,LNFPI,LNFP II,LNFPIII,LSTc,DFLNT,DSLNT,DFLNH,FDSLNH,DSLNH
0,NB00237,NB00237_M_10,35.0,74.0,2440.0,44.0,29.0,Scavenged,MOM,Y,...,,,,,,,,,,
1,NB00237,NB00237_M_8,34.5,72.0,2350.0,43.5,28.5,Scavenged,MOM,Y,...,,,,,,,,,,
2,NB00237,NB00237_M_17,36.6,87.0,3010.0,46.1,30.0,Scavenged,MOM,Y,...,,,,,,,,,,
3,NB00237,NB00237_M_19,37.1,89.0,3110.0,46.3,30.5,Scavenged,MOM,Y,...,,,,,,,,,,
4,NB00237,NB00237_M_20,37.5,93.0,3380.0,46.3,30.5,Scavenged,MOM,Y,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,NB00469,NB00469_M_2,34.1,25.0,2040.0,42.5,30.3,Scavenged,MOM,Y+Nutramigen,...,0.181772,1.462318,0.275978,0.028002,0.408036,0.712330,0.152412,0.055915,0.057708,0.179686
137,NB00486,NB00486_M_1,32.1,9.0,1055.0,36.0,26.7,Scavenged,DBM,Y,...,0.064261,0.277044,0.352673,0.011546,0.076079,0.398567,0.089176,0.096767,0.103252,0.057142
138,NB00486,NB00486_M_2,32.1,9.0,1055.0,36.0,26.7,Scavenged,DBM,Y,...,0.064394,0.250610,0.328100,0.010156,0.071311,0.372029,0.079250,0.090050,0.094420,0.054976
139,NB00487,NB00487_M_1,34.2,7.0,1940.0,44.3,29.0,Scavenged,MOM+DBM,Y,...,0.117482,0.819799,0.352851,0.019452,0.156635,0.372911,0.149990,0.100168,0.109350,0.084917
