In [25]:
import numpy as np
import pandas as pd

In [26]:
df = pd.read_csv('../data/welm_pdx.csv')
print('Number of rows: ' + str(len(df)))
num_sample_drug_replicate = len(df[['Sample', 'Drug', 'Replicate Number']].drop_duplicates())
print('Number of mice: ' + str(num_sample_drug_replicate))

Number of rows: 5955
Number of mice: 399


In [27]:
df.head()

Unnamed: 0,Day,Drug,Other Info,Replicate Number,Sample,Tumor Volume mm3,source_file,excel_sheet
0,1.0,Navitoclax,,M0,HCI-010,163.9208,43018_2022_337_MOESM7_ESM.xlsx,6d left
1,4.0,Navitoclax,,M0,HCI-010,158.374381,43018_2022_337_MOESM7_ESM.xlsx,6d left
2,8.0,Navitoclax,,M0,HCI-010,197.154048,43018_2022_337_MOESM7_ESM.xlsx,6d left
3,11.0,Navitoclax,,M0,HCI-010,158.582177,43018_2022_337_MOESM7_ESM.xlsx,6d left
4,15.0,Navitoclax,,M0,HCI-010,176.645,43018_2022_337_MOESM7_ESM.xlsx,6d left


### Give each Sample-Drug-Replicate Number a unique MID

In [28]:
def enumerate_mid_names(mid_names):
    mid_dict = {}
    for x in range(0, len(mid_names)):
        mid_dict[mid_names[x]] = x
    return mid_dict

old_len = len(df)
# Assign each Sample-Drug-Replicate an MID
df = df.merge(df.groupby(['Sample', 'Drug', 'Replicate Number']).apply(lambda x: x.name).reset_index(name='MID'), 
              on=['Sample', 'Drug', 'Replicate Number'], 
              validate='many_to_one')

In [29]:
mid_names = df['MID'].unique()

In [30]:
mid_dict = enumerate_mid_names(df['MID'].unique())
df['MID'] = df['MID'].map(mid_dict)

In [20]:
assert df.MID.nunique() == num_sample_drug_replicate
assert len(df) == old_len

### Rename and select columns

In [6]:
df = df.rename(columns = {'Tumor Volume mm3': 'Volume'})
cols = ['MID', 'Sample', 'Drug', 'Day', 'Volume']
df_out = df[cols]
df_out.head()

Unnamed: 0,MID,Sample,Drug,Day,Volume
0,0,HCI-010,Navitoclax,1.0,163.9208
1,0,HCI-010,Navitoclax,4.0,158.374381
2,0,HCI-010,Navitoclax,8.0,197.154048
3,0,HCI-010,Navitoclax,11.0,158.582177
4,0,HCI-010,Navitoclax,15.0,176.645


In [7]:
df_out.to_csv('../data/welm_pdx_w_mid.csv', index=False)

### Investigating odd MIDs

In [33]:
d = pd.read_csv('../data/welm_pdx.csv')
d.head()

Unnamed: 0,Day,Drug,Other Info,Replicate Number,Sample,Tumor Volume mm3,source_file,excel_sheet
0,1.0,Navitoclax,,M0,HCI-010,163.9208,43018_2022_337_MOESM7_ESM.xlsx,6d left
1,4.0,Navitoclax,,M0,HCI-010,158.374381,43018_2022_337_MOESM7_ESM.xlsx,6d left
2,8.0,Navitoclax,,M0,HCI-010,197.154048,43018_2022_337_MOESM7_ESM.xlsx,6d left
3,11.0,Navitoclax,,M0,HCI-010,158.582177,43018_2022_337_MOESM7_ESM.xlsx,6d left
4,15.0,Navitoclax,,M0,HCI-010,176.645,43018_2022_337_MOESM7_ESM.xlsx,6d left


In [32]:
mid_names[21]

('HCI-015', 'Vehicle', 'M0')

In [36]:
s = d.loc[(d['Sample'] == 'HCI-015') & (d['Drug'] == 'Vehicle') & (d['Replicate Number'] == 'M0')]
s

Unnamed: 0,Day,Drug,Other Info,Replicate Number,Sample,Tumor Volume mm3,source_file,excel_sheet
144,1.0,Vehicle,,M0,HCI-015,138.915,43018_2022_337_MOESM7_ESM.xlsx,6d mid
145,4.0,Vehicle,,M0,HCI-015,145.330504,43018_2022_337_MOESM7_ESM.xlsx,6d mid
146,8.0,Vehicle,,M0,HCI-015,182.656688,43018_2022_337_MOESM7_ESM.xlsx,6d mid
147,11.0,Vehicle,,M0,HCI-015,203.187798,43018_2022_337_MOESM7_ESM.xlsx,6d mid
148,15.0,Vehicle,,M0,HCI-015,306.954364,43018_2022_337_MOESM7_ESM.xlsx,6d mid
149,18.0,Vehicle,,M0,HCI-015,338.624248,43018_2022_337_MOESM7_ESM.xlsx,6d mid
150,22.0,Vehicle,,M0,HCI-015,389.207,43018_2022_337_MOESM7_ESM.xlsx,6d mid
396,1.0,Vehicle,,M0,HCI-015,138.915,43018_2022_337_MOESM7_ESM.xlsx,6f top right
397,4.0,Vehicle,,M0,HCI-015,145.330504,43018_2022_337_MOESM7_ESM.xlsx,6f top right
398,8.0,Vehicle,,M0,HCI-015,182.656688,43018_2022_337_MOESM7_ESM.xlsx,6f top right


In [37]:
mid_names[24]

('HCI-015', 'Vehicle', 'M3')

In [38]:
f = d.loc[(d['Sample'] == 'HCI-015') & (d['Drug'] == 'Vehicle') & (d['Replicate Number'] == 'M3')]
f

Unnamed: 0,Day,Drug,Other Info,Replicate Number,Sample,Tumor Volume mm3,source_file,excel_sheet
165,1.0,Vehicle,,M3,HCI-015,130.235,43018_2022_337_MOESM7_ESM.xlsx,6d mid
166,4.0,Vehicle,,M3,HCI-015,101.71303,43018_2022_337_MOESM7_ESM.xlsx,6d mid
167,8.0,Vehicle,,M3,HCI-015,133.490286,43018_2022_337_MOESM7_ESM.xlsx,6d mid
168,11.0,Vehicle,,M3,HCI-015,123.037704,43018_2022_337_MOESM7_ESM.xlsx,6d mid
169,15.0,Vehicle,,M3,HCI-015,106.660069,43018_2022_337_MOESM7_ESM.xlsx,6d mid
170,18.0,Vehicle,,M3,HCI-015,135.502848,43018_2022_337_MOESM7_ESM.xlsx,6d mid
171,22.0,Vehicle,,M3,HCI-015,106.1705,43018_2022_337_MOESM7_ESM.xlsx,6d mid
417,1.0,Vehicle,,M3,HCI-015,130.235,43018_2022_337_MOESM7_ESM.xlsx,6f top right
418,4.0,Vehicle,,M3,HCI-015,101.71303,43018_2022_337_MOESM7_ESM.xlsx,6f top right
419,8.0,Vehicle,,M3,HCI-015,133.490286,43018_2022_337_MOESM7_ESM.xlsx,6f top right
