# What drives error?
>Investigating sources of error, per genome:

In [1]:
import sys
sys.path.append('/home/phil/aptr')
%load_ext autoreload
%autoreload 2

In [5]:
from src.simulation import simulate_from_ids
from src.torch_solver import TorchSolver, solve_table
from src.database import RnaDB

import torch
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from tqdm import tqdm

# Simulating and assessing:

## Single-genome solutions:

In [30]:
db = RnaDB()

results = pd.DataFrame(
    columns=["genome", "a", "b", "a_hat", "b_hat", "loss", "a_err", "b_err"]
)
for genome in tqdm(db.complete_genomes[:10]):
    samples, ptrs, abundances, otus = simulate_from_ids(
        ids=[genome], n_samples=10, scale=1e5, verbose=False
    )
    solutions = solve_table(otus, [genome], db=db, verbose=False)
    for (a_hat, b_hat, losses) in solutions:
        a_err = np.abs(a_hat - abundances[0,0])
        b_err = np.abs(b_hat - ptrs[0,0])
        results = results.append({
            "genome": genome,
            "a": abundances[0,0],
            "b": ptrs[0,0],
            "a_hat": a_hat,
            "b_hat": b_hat,
            "loss": losses[-1],
            "a_err": a_err,
            "b_err": b_err,
            "total_abundances": abundances.sum(),
        }, ignore_index=True)

 10%|█         | 1/10 [00:25<03:48, 25.37s/it]


KeyboardInterrupt: 