# Normalizes the growth data and then fits $e^{bx+d}$

First, this no normalizes the growth data based on the first measurement. I.e., for each mouse, divide each measurement by the first measurement. This results in the first measurement being equal to 1. I further multiply these by 100. This allows us, when running later simulations, to initialize our population size to be equal to 100. I allow for the option of saving this file out to the file name set below. The code assumes that the folders in which data and results are to be saved already exist.

Second, I fit the equation $e^{bx+d}$ to the data. $b$ represents the growth rate and $d$ represents the scaled shift. $d/b$ gives the shift in terms of days, so any following analysis will use $d/b$ rather than $d$ directly.

In [1]:
import numpy as np
from scipy.optimize import curve_fit
from sklearn.metrics import r2_score
import pandas as pd
import argparse
import json

In [2]:
# Files
growth_data_file = "../../data/growth_data_df.csv"
save_scaled = True
save_scaled_file = "../../data/growth_data_scaled.csv"
fit_save_file = "../../results/growth_fit_results_scaled/exp_growth.csv"

# Parameters
params = {
    "init_b": .1,
    "init_d": .1,
    "bounds_b": [0, np.inf],
    "bounds_d": [-np.inf, np.inf]
}

In [3]:
growth_data = pd.read_csv(growth_data_file)
growth_data

Unnamed: 0,group,id,day,size
0,Grp. A1 B6 (100% C1),424,7,25.725600
1,Grp. A1 B6 (100% C1),424,10,36.392700
2,Grp. A1 B6 (100% C1),424,14,46.803512
3,Grp. A1 B6 (100% C1),424,17,98.260390
4,Grp. A1 B6 (100% C1),424,21,112.673203
...,...,...,...,...
732,Grp. B5 nude (100% C11),483,14,46.261412
733,Grp. B5 nude (100% C11),483,17,70.397887
734,Grp. B5 nude (100% C11),483,21,111.582900
735,Grp. B5 nude (100% C11),483,24,256.730952


In [4]:
# Scale growth data
scaled_growth_data = growth_data
for mouse_id in scaled_growth_data["id"].unique():
    curr_data = scaled_growth_data.loc[scaled_growth_data["id"] == mouse_id].reset_index()
    scaled_size = curr_data["size"].to_numpy() / curr_data.loc[0, "size"]
    scaled_growth_data.loc[scaled_growth_data["id"] == mouse_id, "size"] = scaled_size
scaled_growth_data

Unnamed: 0,group,id,day,size
0,Grp. A1 B6 (100% C1),424,7,1.000000
1,Grp. A1 B6 (100% C1),424,10,1.414649
2,Grp. A1 B6 (100% C1),424,14,1.819336
3,Grp. A1 B6 (100% C1),424,17,3.819557
4,Grp. A1 B6 (100% C1),424,21,4.379809
...,...,...,...,...
732,Grp. B5 nude (100% C11),483,14,2.365734
733,Grp. B5 nude (100% C11),483,17,3.600034
734,Grp. B5 nude (100% C11),483,21,5.706169
735,Grp. B5 nude (100% C11),483,24,13.128805


In [20]:
if save_scaled:
    print("Saving scaled data to", save_scaled_file)
    scaled_growth_data.to_csv(save_scaled_file, index=False) # Save results

../../data/growth_data_scaled_params.json
Saving scaled data to ../../data/growth_data_scaled.csv


In [6]:
# Exponential function to fit the data to
def exp_funct(data, b, d):
    return np.exp(b*data + d)

In [7]:
results = []
for mouse_id in scaled_growth_data["id"].unique():
    curr_data = scaled_growth_data[scaled_growth_data["id"] == mouse_id]
    curr_data = curr_data.sort_values("day").reset_index()
    x_data = curr_data["day"]
    y_data = curr_data["size"]

    popt, pcov = curve_fit(exp_funct, x_data, y_data, 
                    p0=(params["init_b"], params["init_d"]), 
                    bounds=([params["bounds_b"][0], params["bounds_d"][0]], 
                            [params["bounds_b"][1], params["bounds_d"][1]]))

    opt_line = [exp_funct(x, popt[0], popt[1]) for x in x_data]

    # print(x_data)
    r = r2_score(y_data, opt_line)
    results.append([curr_data.loc[0,"group"], curr_data.loc[0,"id"]] + list(popt) + [r] + [x_data.tolist()[-1]])

cols = ["group", "id", "b", "d", "rscore", "last_day"]
df = pd.DataFrame(results, columns=cols)
df = df[df["id"] != "438"] # Exclude the weird one
df

Unnamed: 0,group,id,b,d,rscore,last_day
0,Grp. A1 B6 (100% C1),424,0.065348,-0.585788,0.904443,52
1,Grp. A1 B6 (100% C1),425,0.053068,-0.750638,0.992446,87
2,Grp. A1 B6 (100% C1),426,0.04545,0.242624,0.964727,49
3,Grp. A1 B6 (100% C1),427,0.055895,-0.611857,0.987059,70
4,Grp. A1 B6 (100% C1),428,0.03191,-1.300194,0.630344,105
5,Grp. A1 B6 (100% C1),429,0.046476,0.159339,0.892269,38
6,Grp. A2 B6 (80% C1; 20% C11),430,0.051135,-0.058465,0.975637,45
7,Grp. A2 B6 (80% C1; 20% C11),431,0.083802,-2.677513,0.969811,77
8,Grp. A2 B6 (80% C1; 20% C11),432,0.087246,-5.300776,0.970396,105
9,Grp. A2 B6 (80% C1; 20% C11),433,0.037504,-0.116996,0.977266,101


In [24]:
print("Saving fit results to", fit_save_file)
df.to_csv(fit_save_file, index=False) # Save results
with open(fit_save_file[:-4]+"_params.json", "w") as f: # Save parameters
    f.write(json.dumps(params, indent=4))

Saving fit results to ../../results/growth_fit_results_scaled/exp_growth.csv
