In [1]:
import random
import shutil
import os

import torch
import torch.nn as nn

import boda

In [2]:
def load_model(artifact_path):
    
    USE_CUDA = torch.cuda.device_count() >= 1
    if os.path.isdir('./artifacts'):
        shutil.rmtree('./artifacts')

    boda.common.utils.unpack_artifact(artifact_path)

    model_dir = './artifacts'

    my_model = boda.common.utils.model_fn(model_dir)
    my_model.eval()
    if USE_CUDA:
        my_model.cuda()
    
    return my_model


In [3]:
malinois_path = 'gs://tewhey-public-data/CODA_resources/malinois_model__20211113_021200__287348.tar.gz'
my_model = load_model(malinois_path)

Copying gs://tewhey-public-data/CODA_resources/malinois_model__20211113_021200__287348.tar.gz...
| [1 files][ 49.3 MiB/ 49.3 MiB]                                                
Operation completed over 1 objects/49.3 MiB.                                     
archive unpacked in ./


Loaded model from 20211113_021200 in eval mode


In [7]:
# Constants for flanks
right_flank = boda.common.constants.MPRA_DOWNSTREAM[:200]
left_flank = boda.common.constants.MPRA_UPSTREAM[-200:]

#Setting random seed
random.seed(42)

def generate_random_sequence(length):
    return ''.join(random.choice('ATCG') for _ in range(length))

def generate_random_sequences_with_flanks(num_sequences, sequence_length):
    sequences = []
    for _ in range(num_sequences):
        sequence = generate_random_sequence(sequence_length)
        sequence_with_flanks = left_flank + sequence + right_flank
        sequences.append(sequence_with_flanks)
    return sequences

N = 10  # Number of sequences
sequence_length = 200

random_sequences_for_diffusion = generate_random_sequences_with_flanks(N, sequence_length)

# Save sequences to a TSV file
tsv_file_path = "random_sequences_for_diffusion.tsv"
with open(tsv_file_path, "w") as tsv_file:
    tsv_file.write("Sequence\n")  # Write header
    for sequence in random_sequences_for_diffusion:
        tsv_file.write(sequence + "\n")

print(f"Sequences saved to {tsv_file_path}")
print(random_sequences_for_diffusion)

Sequences saved to random_sequences_for_diffusion.tsv
['GTACGGGAGGTATTGGACAGGCCGCAATAAAATATCTTTATTTTCATTACATCTGTGTGTTGGTTTTTTGTGTGAATCGATAGTACTAACATACGCTCTCCATCAAAACAAAACGAAACAAAACAAACTAGCAAAATAGGCTGTCCCCAGTGCAAGTGCAGGTGCCAGAACATTTCTCTGGCCTAACTGGCCGCTTGACGAACTTTAAGAAATTATGTGCATGCCTTCAAGACCCAGAGACCTAATCATAGCGCTCCTCATTTGGCTCATACGCATCTGGGTCTTCGGCTTGAAATTGAGGGCAACCACGTGACTACTTCTACGAACCTATAAGATTGTCGTTCGCGGATTACATTAAATAACATCGTTGTGGTAAGCGGGAAAGCATTTGTGTCGTAGACACTGCGGCTCCTGCGATCTAACTGGCCGGTACCTGAGCTCGCTAGCCTCGAGGATATCAAGATCTGGCCTCGGCGGCCAAGCTTAGACACTAGAGGGTATATAATGGAAGCTCGACTTCCAGCTTGGCAATCCGGTACTGTTGGTAAAGCCACCATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATC', 'GTACGGGAGGTATTGGACAGGCCGCAATAAAATATCTTTATTTTCATTACATCTGTGTGTTGGTTTTTTGTGTGAATCGATAGTACTAACATACGCTCTCCATCAAAACAAAACGAAACAAAACAAACTAGCAAAATAGGCTGTCCCCAGTGCAAGTGCAGGTGCCAGAACATTTCTCTGGCCTAACTGGCCGCTTGACGAAATTGGGTGATGAGCGCGGTTCTAACAAGTAATAATGATAAGCCTCTCGTCGCAAGAATCTCATCCTGCACATCAATCCTCTCGCAAGCAACTCTGGAAATACTGTACCACTTACGTTTTGATCGTCTAGAGTTGCCTT