# Interface for recording individuals entering and exiting the building

In [None]:
using CSV,DataFrames
include("auxilliary.jl"); include("poimcmc.jl");

## Inputs

In [None]:
blds = [""];
dts = [""];

## Ancillary Functions

In [None]:
function dtrg(v)
    day0 = Date("3000-01-01"); dayf = Date("0000-01-01");
    for i=1:length(v)
        if !ismissing(v[i])
            v0 =  Date(v[i],"m/d/yyyy")
            day0 = v0 < day0 ? v0 : day0;
            
            dayf = v0 > dayf ? v0 : dayf;
        end
    end
    day0 = day0 == Date("3000-01-01") ? missing : day0;
    dayf = dayf == Date("0000-01-01") ? missing : dayf;
    return day0,dayf
end;

function myparsebeg(v)
    return dtrg(v)[1];
end

function myparseend(v)
    return dtrg(v)[2];
end; 

function myuppercase(v)
    if ismissing(v)
        return v
    end
    return uppercase(v)
end;

function mysplit(s::String)
    w=split(s," ")[1]|>Date;
    return w
end;

function myparsedustfname(s::String)
    dt = s[4:5];
    if s[1:3]=="Jan"
        mth = "01";
    elseif s[1:3]=="Feb"
        mth = "02";
    elseif s[1:3]=="Mar"
        mth = "03";
    end
    
    return Date("2022-"*mth*"-"*dt)
end

"""
Used to extract residential buildings from dorm entries of goldstar
"""
function ressplit(s::Union{String,Missing})
    if ismissing(s)
        return s
    end
    val = split(s," - ");
    return convert(String,val[2])
    end;
    
function btwn(x::Date,d1::Date,d2::Date)
    return (x>=d1)&&(x<=d2)
end;

"""
Used to map an iso start date into the range of Chance's dust compliance
"""
function isocmplymap(drm::String,dt::Date,df::DataFrame)
    gdf = groupby(df,"dorm");
    dftemp = gdf[(dorm=drm,)] |> DataFrame;
    sort!(dftemp,"end_date"); dts = dftemp[:,"end_date"];
    n = nrow(dftemp);
    
    dts = [dts[1]-Day(7);dts[:]];
    n+=1;
    
    @inbounds for i=1:n-1
        if (dt>dts[i])&&(dt<=dts[i+1])
            return dts[i+1]
        end
    end
    
    return missing
end
function isocmplymap(drm::Vector{String},dt::Vector{Date},df::DataFrame)
    n = length(drm);
    return [isocmplymap(drm[i],dt[i],df) for i=1:n]
end;

## Load the building dust and individual isolation histories

In [None]:
dfbld = CSV.read("blddust.csv",DataFrame)

In [None]:
dfindiv = CSV.read("indivposbybld.csv",DataFrame);
first(dfindiv,3)

In [None]:
println("Aggregate infection counts by building:")
gdf = groupby(dfindiv,"addr_1");
dfagg = combine(gdf,[dts[i]=>sum=>dts[i] for i=1:length(dts)])

## Run initialize an mcmc run to have a prm for writing individual histories too
Note: data() should already match the number infected to the date ranges we are using in the above table

In [None]:
mcmcrun(1;Δprg=1.0);

In [None]:
df = CSV.read("MCMCsmp.csv",DataFrame,header=false); rename!(df,["Column1"=>"name","Column2"=>"value"]);
df[!,"name"].=Symbol.(df[!,"name"]); 
first(df)

### Set the leave for isolation date to match observed
Note **the serial index of building is a lex order cycling first over dates than over buildings, ie grouped by buildings**.

In [None]:
prm,vkeys = rdprm(df[!,"value"],df[!,"name"]); nmax=Int64(prm[:nmax]);
println("Max number of infected individuals: $nmax")

Below we loop over building and dates and write in number of infected and dust measurements per building

In [None]:
pos = 0; 
gdfagg = groupby(dfagg,"addr_1"); gdfbld = groupby(dfbld,["addr_1","end_date"]);
for bld in blds
    keyagg=(addr_1=bld,);
    for dt in dts
        pos += 1; dt0 = myparsedustfname(dt); keybld = (addr_1=bld,end_date=dt0)
        ni = Symbol(:n,pos)
        Yi = Symbol(:Y,pos)
        
        prm[ni]=gdfagg[keyagg][1,dt];
        prm[Yi]=gdfbld[keybld][1,"Result (cp/mg)"];
    end
end; 

Below we loop over all buildings and dates and write those individuals into prm file according to the listing in aggregate infection counts by building

In [None]:
gdf = groupby(dfindiv,"addr_1");
pos = 0;
for bld in blds
    key = (addr_1=bld,);
    for dt in dts
        dt0 = myparsedustfname(dt)-Day(7);
        for k=1:nrow(gdf[key])
            if gdf[key][k,dt]
                # Store the individual leaving date
                pos += 1;
                tℓp = Symbol(:tℓ,pos)
                prm[tℓp] = getfield(gdf[key][k,"isostart_date"] - dt0,:value);
            end
        end
    end
end                      

Alternatively, we can fix enter exit times according to an equilibrium schedule. Below we are taking a steady flow of individuals from time t=-9->t=10 and 4 coming in/day with them leaving 10 days later. We loop over 20 days and the 4 individuals per day.

gen=[1,0];
@inbounds for i=1:80
    if gen[2]<4
        gen[2]+=1;
    else
        gen[1]+=1; gen[2]=1;
    end
    
    tei = Symbol(:te,i);
    tℓi = Symbol(:tℓ,i);
    
    prm[tei] = gen[1]-10;
    prm[tℓi] = prm[tei] + 10;
end

In [None]:
wrtprm!(prm,vkeys,df[!,"value"]);

In [None]:
CSV.write("MCMCsmp.csv",df,writeheader=false);