# Testing Scenarios

In [None]:
!lndb init --storage "testsample" --schema "bionty,wetlab"

In [None]:
import pandas as pd
import numpy as np
import lamindb as ln
import lamindb.schema as lns
from lnschema_wetlab.dev import parse_and_insert_df
import pytest

In [None]:
biosample = pd.DataFrame(
    {
        "Name": [
            "hc_dexameth",
            "cv_hydrocort_1",
            "cv_hydrocort_2",
            "hc_predniso",
            "hc_triamcino",
        ],
        "Species": ["human", "human", "human", "human", "human"],
        "Cell Type": ["CD8+T", "CD8+T", "CD8+T", "CD8+T", "CD8+T"],
        "Experiment": ["001", "002", "003", "004", "004"],
        "Donor": ["021", "027", "010", "049", "002"],
        "Disease": ["U07.1", "U07.1", "U07.1", "I51.9", "K83.9"],
        "Experiment": ["001", "002", "003", "004", np.nan],
        "Perturbation": ["A01AC02", "A01AC03", "A01AC03", "A01AC54", "A01AC01"],
        "Custom 1": ["healthy", "acute", "acute", "convalescent", "recovered"],
        "Custom 2": ["control", "covid-19", "covid-19", "control", "control"],
        "Custom 3": [12.11, np.nan, 0.87, np.nan, 11.91],
    }
)

techsample = pd.DataFrame(
    {
        "Name": ["TS001", "TS002", "TS003", "TS004", "TS005"],
        "Batch": [1, 1, 1, 2, 2],
        "File Type": ["fastq", "fastq", "fastq", "fastq", "fastq"],
        "Filepath R1": [
            "SRX1603629_T1_1.fastq.gz",
            "SRX1603629_T1_2.fastq.gz",
            "SRX1603629_T1_3.fastq.gz",
            "SRX1603629_T1_4.fastq.gz",
            "SRX1603629_T1_5.fastq.gz",
        ],
        "Filepath R2": [
            "SRX1603629_T2_1.fastq.gz",
            "SRX1603629_T2_2.fastq.gz",
            "SRX1603629_T2_3.fastq.gz",
            "SRX1603629_T2_4.fastq.gz",
            "SRX1603629_T2_5.fastq.gz",
        ],
        "Custom 1": ["13.42%", "2.43%", "4.57%", np.nan, "9.36%"],
        "Custom 2": [
            "Gene Expression",
            "Gene Expression",
            "Gene Expression",
            "Gene Expression",
            "Gene Expression",
        ],
    }
)

## Test case #1: no duplicate entries

In [None]:
res1 = parse_and_insert_df(biosample, "biosample")
res2 = parse_and_insert_df(biosample, "biosample")
res3 = parse_and_insert_df(techsample, "techsample")
res4 = parse_and_insert_df(techsample, "techsample")

species = ln.select(lns.bionty.Species).all()
biosamples = ln.select(lns.wetlab.Biosample).all()
techsamples = ln.select(lns.wetlab.Techsample).all()

assert len(species) == len(biosample["Species"].unique())
assert len(biosamples) == len(biosample)
assert len(techsamples) == len(techsamples)

## Test case #2: target string unmatched

In [None]:
with pytest.raises(ValueError):
    res = parse_and_insert_df(biosample, "biosample_inexistent")

## Test case #3: cast integers according to schema

In [None]:
biosample2 = biosample.copy()
biosample2["Species"][0] = 1
res = parse_and_insert_df(biosample2, "biosample")
species = ln.select(lns.bionty.Species, common_name="1").one()
sample = ln.select(lns.wetlab.Biosample, species_id=species.id).all()
assert len(sample) > 0