# Monoculture Logistic Growth Fit
This notebook performs a clean logistic growth fit on the two untreated monoculture datasets.

What you get:
- Data loading and preprocessing (day extraction and averaging)
- Logistic ODE fit with parameter bounds
- Sum of Squared Residuals (SSR) and Bayesian Information Criterion (BIC)
- Plot of data with fitted model curve and a concise parameter report

> Notes:
- Uses CSV, DataFrames, DifferentialEquations, Optimization, and BlackBoxOptim.
- If packages are missing, add them in your Julia REPL: Pkg.add("CSV"), etc.

In [None]:
# 1) Packages
using Pkg
pkgs = [
    "CSV", "DataFrames", "Statistics",
    "DifferentialEquations", "DiffEqParamEstim",
    "Optimization", "OptimizationOptimJL",
    "BlackBoxOptim", "Plots"
 ]
for p in pkgs
    try
        Base.eval(Main, :(import $(Symbol(p))))
    catch
        Pkg.add(p)
        Base.eval(Main, :(import $(Symbol(p))))
    end
end
println("✔ Packages ready: ", join(pkgs, ", "))

In [None]:
# 2) Using imports
using CSV, DataFrames, Statistics
using DifferentialEquations
using DiffEqParamEstim
using Optimization, OptimizationOptimJL
using BlackBoxOptim
using Plots
using Printf

In [None]:
# 3) Models
function logistic!(du, u, p, t)
    r, K = p
    du[1] = r * u[1] * (1 - u[1]/K)
end

In [None]:
# 4) Main functions (fast path; CSVs already contain day averages)
struct FitResult
    r::Float64
    K::Float64
    SSR::Float64
    BIC::Float64
end

# Read pre-averaged data: expect columns named `day` (or similar) and a value column
function load_monoculture_csv(path::AbstractString)
    df = CSV.read(path, DataFrame)
    # Try common column names for day and average
    day_col = findfirst(col -> lowercase(String(col)) in ["day","days"], names(df))
    if day_col === nothing && :day ∉ names(df)
        # Derive day from :Image if present; else error
        if :Image ∈ names(df)
            df[!, :day] = map(name -> begin
                m = match(r"(?i)day(\d+)", String(name))
                m !== nothing ? parse(Int, m.captures[1]) : missing
            end, df.Image)
            df = dropmissing(df, :day)
        else
            error("Could not find a 'day' column or derive from Image column in $(basename(path))")
        end
    end
    # Value column candidates
    val_candidates = [:Average, Symbol("Area µm^2"), :Value, :Cells]
    valcol = nothing
    for c in val_candidates
        if c ∈ names(df)
            valcol = c
            break
        end
    end
    valcol === nothing && error("No suitable value column found in $(basename(path)). Expected one of: " * join(String.(val_candidates), ", "))
    # Normalize types
    x = Float64.(df[!, :day])
    y = Float64.(df[!, valcol])
    # If values look like areas, scale by 157.7 to approximate counts
    scale = 1
    y = y ./ scale
    # Ensure sorted by x
    perm = sortperm(x)
    return x[perm], y[perm]
end

# Fit logistic and compute SSR + BIC
function fit_logistic(x::Vector{<:Real}, y::Vector{<:Real}; r_bounds=(0.0, 2.0), K_bounds=(100.0, 1e6))
    x = collect(Float64.(x)); y = collect(Float64.(y))
    tspan = (x[1], x[end])
    u0 = [y[1]]
    best_p, best_sol, best_loss = nothing, nothing, Inf
    bounds = [r_bounds, K_bounds]
    solver = Rosenbrock23()
    for _ in 1:6
        p0 = [rand(Uniform(r_bounds...)), rand(Uniform(K_bounds...))]
        prob = ODEProblem(logistic!, u0, tspan, p0)
        obj = build_loss_objective(prob, solver, L2Loss(x, y), Optimization.AutoForwardDiff())
        res = bboptimize(obj; SearchRange=bounds, MaxTime=15.0, TraceMode=:silent)
        p̂ = best_candidate(res)
        sol̂ = solve(remake(prob, p=p̂), solver; saveat=x, reltol=1e-9, abstol=1e-9)
        pred = getindex.(sol̂.u, 1)
        loss = sum(abs2.(y .- pred))
        if loss < best_loss
            best_p, best_sol, best_loss = p̂, sol̂, loss
        end
    end
    n = length(y); k = 2
    ssr = best_loss
    bic = n > k ? n*log(ssr/n) + k*log(n) : NaN
    return best_p, best_sol, FitResult(best_p[1], best_p[2], ssr, bic)
end

function plot_data_and_fit(x, y, sol; title_str="Logistic Fit")
    plt = scatter(x, y; label="Data", xlabel="Day", ylabel="Cells", title=title_str)
    plot!(plt, sol.t, getindex.(sol.u,1); label="Model", lw=2)
    display(plt)
end

In [None]:
# 5) Data plot (single CSV quick check)
# Choose one monoculture untreated CSV
naive_csv = joinpath(@__DIR__, "Datasets", "Untreated MonoCulture", "A2780 RFP_20and30_untreated_4500thresh_cell_area_measurements 1.csv")
x_demo, y_demo = load_monoculture_csv(naive_csv)
scatter(x_demo, y_demo; xlabel="Day", ylabel="Cells", title=basename(naive_csv), label="Data")

In [None]:
# 6) Main run cell (fit + plot for the demo CSV)
p̂_demo, sol_demo, fr_demo = fit_logistic(x_demo, y_demo)
@printf("Demo Fit — r = %.6f, K = %.2f\n", fr_demo.r, fr_demo.K)
@printf("SSR = %.4f\n", fr_demo.SSR)
@printf("BIC = %.3f\n", fr_demo.BIC)
plot_data_and_fit(x_demo, y_demo, sol_demo; title_str="Demo Logistic Fit: " * basename(naive_csv))

In [None]:
# 7) Optional: Fit the other untreated monoculture CSV as well
resist_csv = joinpath(@__DIR__, "Datasets", "Untreated MonoCulture", "A2780cis GFP_20and30_untreated_6500Thesh_cell_area_measurements.csv")
x_r, y_r = load_monoculture_csv(resist_csv)
p̂_r, sol_r, fr_r = fit_logistic(x_r, y_r)
@printf("Resistant Fit — r = %.6f, K = %.2f\n", fr_r.r, fr_r.K)
@printf("SSR = %.4f\n", fr_r.SSR)
@printf("BIC = %.3f\n", fr_r.BIC)
plot_data_and_fit(x_r, y_r, sol_r; title_str="Resistant Logistic Fit: " * basename(resist_csv))
println("\nSummary:")
@printf("Naive:  r=%.6f, K=%.2f, SSR=%.4f, BIC=%.3f\n", fr_demo.r, fr_demo.K, fr_demo.SSR, fr_demo.BIC)
@printf("Resist: r=%.6f, K=%.2f, SSR=%.4f, BIC=%.3f\n", fr_r.r, fr_r.K, fr_r.SSR, fr_r.BIC)