In [None]:
import os
import time
import tqdm
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch, FancyArrowPatch
import logging
import yaml
import json
import joblib
from sklearn.impute import SimpleImputer
from scipy.stats.contingency import crosstab
import networkx as nx
from matplotlib.lines import Line2D
import umap
import itertools
from sklearn.preprocessing import power_transform

#home = os.path.expanduser("~")
sys.path.append(os.getcwd())
from functions.load_model import load_tolist
import functions.visualise as vis
import functions.process as proc
from functions.io import setup_logger, makedir
from functions import FeatureEngine
sys.path.append(os.path.expanduser('~'))
from PpaPy.processing.preprocess import addhistory, select_features
from functions.modelfunctions import add_power_transform, select_features, addhistory
import argparse

import pickle
from sklearn import set_config
 
from numba import jit
# set invalid (division by zero error) to ignore
np.seterr(invalid='ignore')


class NpIntEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        return json.JSONEncoder.default(self, obj)

class NanConverter(json.JSONEncoder):
    def nan2None(self, obj):
        if isinstance(obj, dict):
            return {k:self.nan2None(v) for k,v in obj.items()}
        elif isinstance(obj, list):
            return [self.nan2None(v) for v in obj]
        elif isinstance(obj, float) and np.isnan(obj):
            return None
        return obj
    def encode(self, obj, *args, **kwargs):
        return super().encode(self.nan2None(obj), *args, **kwargs)
    
# %% [markdown]
# Please provide where your files are stored and where you would like your data to be saved in the following section.

In [None]:

# %%
inpath = "/gpfs/soma_fs/scratch/src/boeger/data_gueniz/"
inpath_with_subfolders = True
inpath_pattern = ["Exp1_WT_OP50"]
args_out = "/gpfs/soma_fs/scratch/src/boeger/PpaPred_eren_35727184"

In [None]:
base_outpath = makedir(args_out)

# %%
date = time.strftime("%Y%m%d")
datestr = time.strftime("%Y%m%d-%HH%MM")
home = os.path.expanduser("~")

if inpath_with_subfolders:
    new_inpath = [os.path.join(inpath, sub) for sub in os.listdir(inpath) if any(pat in sub for pat in inpath_pattern)]
    inpath = new_inpath
else:
    inpath = [inpath]

outpath = []
for p in inpath:
    in_folder = os.path.basename(p)
    outpath.append(makedir(os.path.abspath(f"{base_outpath}/{in_folder}")))


# %%
# In the following section, standard model parameters are set. Change those only if necessary.
# changes to config file are preferrerable
config = yaml.safe_load(open("config.yml", "r"))

cluster_color = config['cluster_color']
cluster_group = config['cluster_group_man']
cluster_label = config['cluster_names']
clu_group_label = {_:f'{_}, {__}' for _, __ in tuple(zip([c for c in cluster_label.values()],[g for g in cluster_group.values()]))}
skip_already = config['settings']['skip_already']
overwrite = True

model_path = config['settings']['model']
version = os.path.basename(model_path).split("_")[1].split(".")[0]
ASpath = config['settings']['ASpath']
smooth = config['settings']['fbfill']
fps = config['settings']['fps']

# lists to store already processed files in
prediction_done = []

# %% [markdown]
# 1. Feature Engineering
# In the following section, additional features are calculated.
# The engineerd data files are saved under the specified outpath/subfolder.
# (with subfolder being the inpath folder name postfixed by _engine)

In [None]:
inpath

In [None]:
XYs, CLines  = FeatureEngine.run(inpath, outpath, return_XYCLine =True, skip_engine = False, skip_already=False, out_fn_suffix='prediction') # skip_engine skip_already

# %%

set_config(transform_output="pandas")
model = joblib.load(open(model_path, 'rb'))
augsel = joblib.load(ASpath)
imp = SimpleImputer(missing_values=np.nan, strategy='mean')

# %%
all_engine = [os.path.join(root, name) for root, dirs, files in os.walk(base_outpath) for name in files if any(pat in os.path.basename(root) for pat in inpath_pattern)]

# %% [markdown]
# ## 3. Prediction

In [None]:
# %%
for fpath in tqdm.tqdm(all_engine):
    fn = os.path.basename(fpath)
    dir_engine = os.path.dirname(fpath)
    if skip_already and fn in os.listdir(outpath):
        continue
    if not fn[0] == '.' and not fn in prediction_done and os.path.isfile(fpath):
        print(fn)
        break

In [None]:
d = load_tolist(fpath, droplabelcol=False)[0]

In [None]:
X = augsel.fit_transform(d)
X = imp.fit_transform(X) # model seems to run well without
#X = X.add_suffix('_tr') # not longer needed once new model has been trained

pred = model.predict(X)
proba = model.predict_proba(X)

proba_max = np.amax(proba, axis=1) ### New
proba_max_mean = pd.DataFrame(proba_max).rolling(30, min_periods=1).mean().values ### New
proba_low50 = np.all(proba_max_mean < .5, axis=1) ### New
pred[proba_low50] = -1 ### NEW
pred = pd.Series(pred, index=X.index, name='prediction').reindex(d.index, method='bfill', limit=29).fillna(-1) ### NEW
proba = pd.DataFrame(proba, index=X.index, columns=[f'proba_{i}' for i in range(proba.shape[1])]).reindex(d.index, method='bfill', limit=29).fillna(0)

p_out = pd.concat([d, pred, proba], axis=1) #d, 

In [None]:
p_out.columns

In [None]:
if os.path.isfile(fpath):
    with open(fpath, "r") as jsonfile:
        recording = json.load(jsonfile)

In [None]:
recording

In [None]:
recording.update(p_out.to_dict())
jsnF = json.dumps(recording, indent = 4, cls=NanConverter)

In [None]:
with open(fpath, "w") as outfile:
    outfile.write(jsnF)

In [None]:
p_out.to_dict()