# Testing Scenarios

In [None]:
import pandas as pd
import numpy as np
import lamindb as ln
import lamindb.schema as lns
from lnschema_wetlab.dev import parse_and_insert_df
from lndb_setup import init

init(storage="testdb", schema="wetlab,bionty,bfx,retro")

In [None]:
biosample = pd.DataFrame(
    {
        "Name": [
            "dm3_pre_activation",
            "d0_post_activation",
            "d1_GFP",
            "d1_OSKMNL_arm1",
            "d1_OSKMNL_arm_2",
        ],
        "Experiment": ["x80", "x80", "x80", "x80", "x80"],
        "Condition": [
            "pre-activation",
            "post-activation",
            "GFP",
            "OSKMNL_x1",
            "OSKMNL_x3",
        ],
        "Transfection": [np.nan, np.nan, "GFP-LNP", "OSKMNL-LNP", "OSKMNL-LNP"],
        "Day": [-3, 0, 1, 1, 1],
        "Donor": ["Donor 5", "Donor 5", "Donor 5", "Donor 5", "Donor 5"],
        "Species": ["human", "human", "human", "human", "human"],
        "CMO": [np.nan, np.nan, 301, 302, 303],
        "Gene Expression": ["Gm3", "G0", "G1", "G1", "G1"],
        "CMO File": [np.nan, np.nan, "T1", "T1", "T1"],
        "CSP File": [np.nan, np.nan, np.nan, np.nan, np.nan],
    }
)

techsample = pd.DataFrame(
    {
        "Sample Name": ["Gm3", "G0", "G1", "G2", "G3"],
        "Sample ID": ["S1", "S2", "S3", "S4", "S5"],
        "Batch": [1, 1, 1, 1, 1],
        "File Type": ["fastq", "fastq", "fastq", "fastq", "fastq"],
        "Sample Type": [
            "Gene Expression",
            "Gene Expression",
            "Gene Expression",
            "Gene Expression",
            "Gene Expression",
        ],
        "Percent Total Read Allocation": [
            "3.28%",
            "4.69%",
            "13.14%",
            "13.14%",
            "13.14%",
        ],
        "Filepath R1": [
            "Gm3_S1_L003_R1_001.fastq.gz",
            "G0_S2_L003_R1_001.fastq.gz",
            "G1_S3_L003_R1_001.fastq.gz",
            "G2_S4_L003_R1_001.fastq.gz",
            "G3_S5_L003_R1_001.fastq.gz",
        ],
        "Filepath R2": [
            "Gm3_S1_L003_R2_001.fastq.gz",
            "G0_S2_L003_R2_001.fastq.gz",
            "G1_S3_L003_R2_001.fastq.gz",
            "G2_S4_L003_R2_001.fastq.gz",
            "G3_S5_L003_R2_001.fastq.gz",
        ],
    }
)

In [None]:
# Test case #1: no duplicate entries
res1 = parse_and_insert_df(biosample, "biosample")
res2 = parse_and_insert_df(biosample, "biosample")
res3 = parse_and_insert_df(techsample, "techsample")
res4 = parse_and_insert_df(techsample, "techsample")

species = ln.select(lns.bionty.Species).all()
biosamples = ln.select(lns.retro.Biosample).all()
techsamples = ln.select(lns.retro.Techsample).all()

assert len(species) == len(biosample["Species"].unique())
assert len(biosamples) == len(biosample)
assert len(techsamples) == len(techsamples)

In [None]:
# Test case #2: target string unmatched
import pytest

with pytest.raises(ValueError):
    res = parse_and_insert_df(biosample, "biosample_inexistent")

In [None]:
# Test case #3: cast integers according to schema
biosample2 = biosample.copy()
biosample2["Species"][0] = 1
res = parse_and_insert_df(biosample2, "biosample")
species = ln.select(lns.bionty.Species, common_name="1").one()
sample = ln.select(lns.retro.Biosample, species_id=species.id).all()
assert len(sample) > 0