In [1]:
from dataclasses import dataclass
from typing import Dict, List
import pandas as pd
from collections import defaultdict
from pathlib import Path

In [28]:
import json
import gzip

class PLIERResults(object):
    def __init__(
        self,
        B: pd.DataFrame = pd.DataFrame(),
        Z: pd.DataFrame = pd.DataFrame(),
        U: pd.DataFrame = pd.DataFrame(),
        C: pd.DataFrame = pd.DataFrame(),
        L1: float = 0.0,
        L2: float = 0.0,
        L3: float = 0.0,
        heldOutGenes: Dict[str, List[str]] = defaultdict(list),
        withPrior: Dict[str, int] = defaultdict(int),
        Uauc: pd.DataFrame = pd.DataFrame(),
        Up: pd.DataFrame = pd.DataFrame(),
        summary: pd.DataFrame = pd.DataFrame(),
        residual: pd.DataFrame = pd.DataFrame(),
    ):
        self.residual=residual
        self.B = B
        self.Z = Z
        self.U = U
        self.C = C
        self.L1 = L1
        self.L2 = L2
        self.L3 = L3
        self.heldOutGenes = heldOutGenes
        self.withPrior = withPrior
        self.Uauc = Uauc
        self.Up = Up
        self.summary = summary
        
    def to_dict(self):
        return {
            "B": self.B.to_dict(),
            "Z": self.Z.to_dict(),
            "U": self.U.to_dict(),
            "C": self.C.to_dict(),
            "L1": self.L1,
            "L2": self.L2,
            "L3": self.L3,
            "heldOutGenes": self.heldOutGenes,
            "withPrior": self.withPrior,
            "residual": self.residual.to_dict(),
            "Uauc": self.Uauc.to_dict(),
            "Up": self.Up.to_dict(),
            "summary": self.summary.to_dict()
        }
    
    def from_dict(self, source):
        if "B" in source:
            self.B = pd.DataFrame.from_dict(source["B"])
        if "Z" in source:
            self.Z = pd.DataFrame.from_dict(source["Z"])
        if "U" in source:
            self.U = pd.DataFrame.from_dict(source["U"])
        if "C" in source:
            self.C = pd.DataFrame.from_dict(source["C"])
        
        if "L1" in source:
            self.L1 = source["L1"]
        if "L2" in source:
            self.L2 = source["L2"]
        if "L3" in source:
            self.L3 = source["L3"]

        if "heldOutGenes" in source:
            self.heldOutGenes = source["heldOutGenes"]
        if "withPrior" in source:
            self.withPrior = source["withPrior"]
        
        if "residual" in source:
            self.residual = pd.DataFrame.from_dict(source["residual"])
        if "Uauc" in source:
            self.Uauc = pd.DataFrame.from_dict(source["Uauc"])
        if "Up" in source:
            self.Up = pd.DataFrame.from_dict(source["Up"])
        if "summary" in source:
            self.summary = pd.DataFrame.from_dict(source["summary"])

    
    def to_disk(self, loc: Path, compress: bool=False):
        if compress or loc.suffix == ".gz":
            with gzip.open(loc, 'wt', encoding='UTF-8') as zipfile:
                json.dump(self.to_dict(), zipfile)
        else:
            with open(loc, "w") as jsonfile:
                json.dump(self.to_dict())

    @classmethod
    def from_disk(cls, loc: Path):
        try:
            with open(loc, "r") as infile:
                input_dict = json.load(fp = infile)
        except UnicodeDecodeError:
            with gzip.open(loc, 'rt', encoding='UTF-8') as infile:
                input_dict = json.load(fp = infile.read())
        print(input_dict.keys())
        pr = cls()
        pr.from_dict(input_dict)
        return pr
            

In [3]:
data_dir = Path("/workspaces/pyplier")

In [4]:
with open(data_dir / "plierresheldoutgenes.json", "r") as infile:
    heldOutGenes = json.load(infile)

In [5]:
from rich import print as rprint

In [14]:
rprint(heldOutGenes)

{
    'IRIS_Bcell-Memory_IgG_IgA': ['RAB30', 'RAB30', 'PKIG', 'PNOC', 'TPD52', 'HEY1', 'ALOX5', 'BLNK', 'POU2AF1', 'CD19', 'GRAMD1C', 'PNOC'],
    'IRIS_Bcell-Memory_IgM': ['PKIG', 'EBF1', 'IGJ', 'CD22', 'TCF4', 'MARCH1', 'AFF3', 'HHEX', 'QRSL1', 'MS4A1', 'EAF2', 'PCDH9'],
    'IRIS_Bcell-naive': [
        'RAB30',
        'BCL7A',
        'RALGPS2',
        'WASF1',
        'QRSL1',
        'WASF1',
        'DENND5B',
        'RALGPS2',
        'BANK1',
        'TCF4',
        'CPNE5',
        'GGA2',
        'LILRA4',
        'CD19'
    ],
    'IRIS_CD4Tcell-N0': ['SNPH', 'CTLA4', 'CD160', 'CD160', 'SNPH', 'LDLR', 'PASK'],
    'IRIS_CD4Tcell-Th1-restimulated12hour': ['BAG2', 'JAKMIP1', 'COL6A3', 'TMEM97'],
    'IRIS_CD4Tcell-Th1-restimulated48hour': ['FOXM1', 'CENPF', 'BUB1B', 'CPOX', 'NDFIP2'],
    'IRIS_CD4Tcell-Th2-restimulated12hour': ['TPR', 'HOMER2', 'HPGD', 'HOMER2', 'RGS9'],
    'IRIS_CD4Tcell-Th2-restimulated48hour': ['DSCC1', 'NPHP4', 'CTLA4', 'BUB1B', 'AURKB', 'NDFIP2'],
 

In [20]:
with gzip.open("/workspaces/pyplier/heldoutgenes.json.gz", 'wb') as outfile:
    json_str = json.dumps(heldOutGenes)
    json_bytes = json_str.encode('utf-8')
    outfile.write(json_bytes)

In [7]:
B = pd.read_csv(data_dir / "plierresb.csv", index_col=0)
Z = pd.read_csv(data_dir / "plierresz.csv", index_col=0)
U = pd.read_csv(data_dir / "plierresu.csv", index_col=0)
C = pd.read_csv(data_dir / "plierresc.csv", index_col=0)
L1 = 18.160582839402732
L2 = 36.321165678805464
L3 = 0.00033546262790251185

with open(data_dir / "plierresheldoutgenes.json", "r") as infile:
    heldOutGenes = json.load(infile)

withPrior = pd.DataFrame()
Uauc = pd.DataFrame()
Up = pd.DataFrame()
summary = pd.DataFrame()
residual = pd.read_csv(data_dir / "plierresresidual.csv", index_col=0)

In [8]:
pr = PLIERResults(
    B=B,
    Z=Z,
    U=U,
    C=C,
    L1=L1,
    L2=L2,
    L3=L3,
    heldOutGenes=heldOutGenes,
    residual=residual
)

In [28]:
new_plierDict = pr.to_dict()

In [None]:
    # def to_disk(self, loc: Path, compress: bool=False):
#         if compress or loc.suffix == ".gz":
#             with gzip.open(loc, 'wt', encoding='UTF-8') as zipfile:
#                 json.dump(self.to_dict(), zipfile)
#         else:
#             with open(loc, "w") as jsonfile:
#                 json.dump(self.to_dict())

In [29]:
with gzip.open("/workspaces/pyplier/test2.json.gz", 'wt', encoding='UTF-8') as zipfile:
    json.dump(new_plierDict, zipfile)

In [9]:
plierDict = {
    "B": B.to_dict(),
    "Z": Z.to_dict(),
    "U": U.to_dict(),
    "C": C.to_dict(),
    "L1": L1,
    "L2": L2,
    "L3": L3,
    "heldOutGenes": heldOutGenes,
    "withPrior": withPrior.to_dict(),
    "residual": residual.to_dict(),
    "Uauc": Uauc.to_dict(),
    "Up": Up.to_dict(),
    "summary": summary.to_dict()
}

In [10]:
with open("/workspaces/pyplier/test_save.json", "w") as out:
    json.dump(obj=plierDict, fp=out)

In [31]:
with open("/workspaces/pyplier/test_save.json.gz", "r") as infile:
    test_load = json.load(infile)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte

In [6]:
input_file = Path("/workspaces/pyplier/test_save.json.gz")
input_file.exists()

True

In [34]:
try:
    with open(input_file, "r") as infile:
        input_dict = json.load(fp = infile)
except UnicodeDecodeError:
    with gzip.open(input_file, 'rt', encoding='UTF-8') as infile:
        input_dict = json.load(fp = infile)

In [35]:
input_dict.keys()

dict_keys(['B', 'Z', 'U', 'C', 'L1', 'L2', 'L3', 'heldOutGenes', 'withPrior', 'residual', 'Uauc', 'Up', 'summary'])

In [None]:
rprint(plierDict)

In [26]:
pr_from_file = PLIERResults.from_disk(input_file)

dict_keys(['B', 'Z', 'U', 'C', 'L1', 'L2', 'L3', 'heldOutGenes', 'withPrior', 'residual', 'Uauc', 'Up', 'summary'])


In [24]:
class otherthing(PLIERResults):
    pass

type(pr_from_file.from_disk(input_file))

<class 'NoneType'>

In [27]:
pr_from_file.B

Unnamed: 0,BD8001,BD8002,BD8003,BD8004,BD8005,BD8006,BD8007,BD8008,BD8009,BD8010,BD8011,BD8012,BD8013,BD8015,BD8017,BD8018,BD8019,BD8020,BD8021,BD8024,BD8025,BD8026,BD8027,BD8028,BD8029,BD8030,BD8031,BD8032,BD8033,BD8034,BD8038,BD8041,BD8042,BD8043,BD8044,BD8045
LV1,0.9863,-0.30895,2.16036,3.080646,0.025228,0.833987,-2.022368,-1.096954,0.12542,1.998529,-0.954977,-0.316529,-0.522068,-0.92301,-1.79845,-2.234985,-0.225439,1.193261,-1.5481,-2.61942,-1.213619,0.681168,1.336698,0.53644,1.148713,0.880375,1.719309,-0.299897,1.227389,1.32458,1.390904,-0.190599,0.180255,-0.718377,-0.483662,-3.352157
LV2,-0.56664,-1.099101,0.404922,1.401323,1.073257,-0.178624,-2.359848,0.722254,-2.518698,-0.520384,2.93565,0.26119,-2.846205,1.252313,1.568029,-1.331439,1.703217,0.098786,1.570987,0.271047,1.460057,-0.064101,0.006073,-0.493105,0.390782,0.08783,2.100276,0.144553,-1.213307,-3.67682,0.490946,-2.153242,0.576816,0.157424,-0.135442,0.479225
LV3,-0.157657,-1.153294,1.094822,-3.76902,-0.968197,-0.962799,-0.695741,-1.489502,-0.145592,-0.413987,1.554045,2.631696,-0.54919,0.736608,0.759483,2.478072,-0.640586,1.690838,0.827723,1.703912,0.916515,-0.906946,-0.889409,-1.712339,1.56782,-2.390672,-0.798146,-1.245389,-1.145045,-1.398209,2.272711,0.773739,1.030818,1.316876,-0.192984,0.269025
LV4,-1.244684,-1.118928,0.195779,-0.379635,0.478649,0.775916,-0.927209,0.521021,-0.390162,-1.461589,-0.389813,0.421494,1.428208,0.150419,-1.171413,0.655989,1.066713,1.389247,-1.064119,-1.072026,-0.092564,0.711021,0.989974,0.713624,-0.013859,-0.23095,-0.066287,-4.19945,1.22111,2.143023,0.195608,0.633927,0.026751,0.565456,-0.222137,-0.239102
LV5,0.645472,0.289995,-0.990165,0.097889,0.703119,-0.367105,1.04564,0.618265,-1.170464,1.006402,-1.938834,-1.277019,0.665557,0.54286,-0.924301,-0.199592,0.784501,0.46778,-1.8148,1.018403,0.021101,1.001145,0.047817,-1.370681,-0.092734,0.218021,0.800532,-1.0829,-0.526759,-0.216904,-0.042812,0.470865,-1.769699,1.249627,0.892275,1.197502
LV6,-1.99307,0.013305,-0.424503,-0.873381,0.724902,0.32812,0.954012,-0.134793,0.902481,-0.241265,0.474741,-1.475072,-0.353985,-0.30672,0.071372,1.536024,0.729541,0.06829,-0.805668,-0.08817,-0.248041,0.523802,0.152317,1.079903,-0.083315,-0.483877,0.136054,-0.226393,1.033804,1.906461,0.392577,-0.040845,0.396397,-0.910129,-0.524784,-2.210094
LV7,2.918699,2.600608,1.034696,2.491937,0.785472,2.154179,-1.450202,0.170517,2.106509,-1.83313,-2.67348,-0.981773,0.413544,-0.791597,-3.818782,-3.714366,-0.107381,0.694549,-2.107645,-2.534747,-1.795829,0.663875,0.81052,0.626689,0.457997,1.123416,0.700622,2.253331,0.485196,1.402392,-0.002745,0.819477,0.256473,-0.763667,0.444805,-2.840159
LV8,0.07417,0.052181,4.408299,-0.260124,-0.950783,-0.691262,1.421729,1.185023,0.701779,0.054212,-1.095768,-0.850605,0.566431,-0.694514,-0.685063,-1.099907,-0.833331,-1.302466,-0.673328,-0.435204,0.387082,-0.400899,0.005602,-0.032233,-0.326764,0.145505,-0.907378,1.260107,0.507019,0.589475,-0.566974,0.622462,-0.918474,-0.027298,0.701433,0.069866
LV9,0.792096,0.541818,-0.069737,0.504711,-0.022598,0.540631,0.485001,0.412369,-0.148949,-0.871733,-1.103118,0.095483,-0.377667,-1.480978,0.607858,-0.308053,0.212525,0.190339,0.848921,-0.528706,-0.323324,0.643758,0.325211,-0.064664,0.964001,-0.067909,0.210219,0.804391,-0.661939,-0.331679,0.367758,-0.485719,-0.064046,-0.077867,-0.264017,-1.294389
LV10,-0.572362,-0.069346,-0.403823,1.247412,0.338802,-0.784478,-1.029439,0.523201,-0.495043,-0.566765,3.173322,0.489595,-0.461222,-0.352653,0.017748,-1.502009,1.054755,-0.607696,0.902278,-0.322161,1.089952,0.17119,-0.432101,0.304853,0.18052,0.493967,1.964763,0.398422,-0.12107,-1.540432,0.138767,-0.641646,-0.341695,-1.326342,-0.930813,0.011547


In [29]:
problem = PLIERResults.from_disk(Path("/workspaces/pyplier/crossval_plierres.json"))

JSONDecodeError: Expecting value: line 1 column 55172188 (char 55172187)

In [31]:
list(pd.Index(['a','b','c']))

['a', 'b', 'c']