# Torch sanity checks
> Verifying that the `TorchSolver` class is working as expected.

## Data generation

In [1]:
import sys
# sys.path.append('/home/phil/aptr')
# sys.path.append('~/aptr')
sys.path.append('/Users/phil/Columbia/aPTR')
%load_ext autoreload
%autoreload 2

In [40]:
import numpy as np
import pandas as pd

from src.torch_solver import TorchSolver
from src.database import RnaDB

from src.solve_table import solve_all, score_predictions
from src.simulation import simulate_from_ids

In [3]:
# Load a database object

db = RnaDB()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [56]:
# Get 10 complete genomes

genomes = db.db[db.db["n_contigs"] == 1]["genome"].unique()
genomes_to_use = np.random.choice(genomes, 10, replace=False)

# Simulate reads from those genomes
reads, ptrs, coverages, otus = simulate_from_ids(
    db=db.db,
    ids=genomes_to_use,
    fasta_path="/home/phil/aptr/data/seqs",
    n_samples=5,
    scale=1e6,
    shuffle=False,  # Suppress shuffling to conserve memory
    fastq=False
)


Generating sample 0 for organism 1196835.3...
Generating sample 0 for organism 1316932.3...
Generating sample 0 for organism 509193.4...
Generating sample 0 for organism 584708.3...
Generating sample 0 for organism 1250278.4...
Generating sample 0 for organism 411154.11...
Generating sample 0 for organism 645991.3...
Generating sample 0 for organism 563040.3...
Generating sample 0 for organism 1295.23...
Generating sample 0 for organism 759620.7...
Generating sample 1 for organism 1196835.3...
Generating sample 1 for organism 1316932.3...
Generating sample 1 for organism 509193.4...
Generating sample 1 for organism 584708.3...
Generating sample 1 for organism 1250278.4...
Generating sample 1 for organism 411154.11...
Generating sample 1 for organism 645991.3...
Generating sample 1 for organism 563040.3...
Generating sample 1 for organism 1295.23...
Generating sample 1 for organism 759620.7...


TypeError: Can only append a Series if ignore_index=True or if the Series has a name

Now let's take a second to look at all of the simulation outputs:

In [None]:
print(reads) # Should be None, since we didn't ask for fastq output

None


In [None]:
print(ptrs) # A #{genomes} x #{samples} matrix of PTRs
# In this case, should be 10x5
print(ptrs.shape)

[[1.9772368  1.76662221 1.769148   1.3509318  1.08862458]
 [1.97204644 1.30878618 1.36128514 1.36886784 1.65894057]
 [1.04069026 1.43489619 1.52370339 1.01070249 1.75068667]
 [1.6459498  1.25843575 1.88084104 1.63713746 1.25630302]
 [1.7725251  1.70385484 1.06096726 1.97464839 1.46689938]
 [1.26099685 1.98234143 1.09698355 1.95323754 1.14458104]
 [1.93939513 1.38012865 1.99150981 1.28887478 1.79750956]
 [1.42500306 1.26934621 1.2647923  1.53436258 1.98445658]
 [1.39492184 1.32117958 1.05020632 1.12999548 1.73007173]
 [1.07873129 1.87712176 1.16008953 1.80097735 1.85876405]]
(10, 5)


In [None]:
print(coverages)
print(coverages.shape) # Should also be 10x5, positive integers

[[  88695 1082148 2576140 1608723 1257447]
 [ 954683 1365528  589710   43353 1345653]
 [ 310401  722464  463345   81319  714309]
 [1784242 1003138 1553514  118068  418135]
 [ 721223 3614648  560734   23609  242360]
 [1371887 1202312 1968761   30253  415599]
 [1215136 1244031 1394500  730264 3745876]
 [1121273  228286   27227  288086  405076]
 [ 954716 2440629 2627437  644246 1591459]
 [1450546 1320007  447232  921115  931033]]
(10, 5)


In [None]:
print(otus)

                                       0      1       2      3       4
c2275b5ec6336c90459dc62694d35a62    18.0  156.0   353.0  228.0   170.0
af5156eefea2edb2841e12d26b9e42c1     5.0   42.0    92.0   53.0    48.0
d59b9ceb43b2cecf6f271ca9d061a261   219.0  259.0   114.0    4.0   284.0
7fffb37507dfeb335b3ca1e5c26f811c   203.0  255.0   112.0    7.0   269.0
52e22440e10e89ef7fe7e6314d287e54   194.0  241.0   105.0    5.0   257.0
c321f4407428f33a0f7230f0ac1728de   314.0  795.0   530.0   73.0   810.0
5d682dd2c61aad36a8f9950d40e8abd6    34.0  106.0    74.0   10.0   110.0
e4e95a1eab44ad1660675d16bb4e22ce  1613.0  837.0  1352.0  108.0   380.0
a0ba41cf466f512dab0a4fb366453831  1591.0  888.0  1488.0  119.0   344.0
41bfe4ce2247591bf64622d049a8bc40    58.0  309.0    30.0    3.0    21.0
050830653ba7fd56094f50bbc9b24aad    47.0  267.0    28.0    0.0    18.0
3fd7c8d4fcfb4302f7f3ba2017d11909    70.0  345.0    63.0    3.0    16.0
495e7390646ee81fca804981ba96bf81    65.0  276.0    41.0    1.0    16.0
57ffd9

## Torch checks

In [None]:
Remember, the TorchSolver object assumes you have a 'genomes' dict which has the keys:
* seqs -> DNA sequences for RNAs (can use dummies)

In [None]:
1# Here we initialize a TorchSolver with the genomes and coverages from before

solver = TorchSolver()
solver.set_vals(
    genomes=genomes_to_use,
    coverages=otus[""]
)

NameError: name 'genomes' is not defined