In [1]:
import pandas as pd
import random
import math
import numpy as np

In [28]:
class DNA: #inside object
    
    def __init__(self, sequence, snp_freq = 10e-9): #Constructor, make object, set up
        self.__sequence = sequence.upper() #str
        self.__previous = pd.DataFrame(columns = ['Generation', 'Changes'])
        self.__previous.set_index('Generation', inplace = True)
        self.__generation = 0 #int
        self.__mut_freq = snp_freq #float
        
    
    def replicate(self):
        self.__generation += 1
        #sample from a binomial distribution to figure out if it will occur
        num_muts = (np.random.binomial(len(self.__sequence),self.__mut_freq)) #How many bases to change
        #pick random locations
        mut_locs = random.sample(range(len(self.__sequence)), num_muts) #list of locations
        new_seq = self.__sequence
        for location in mut_locs: #why is this ok if there are no locations
            new_base = random.choice('ATCG')
            while new_base == self.__sequence[location]: #what does this do
                new_base = random.choice('ATCG')
            new_seq = new_seq[:location] + new_base + new_seq[location+1:] #what does this do
        self.__previous.loc[self.__generation, 'Changes'] = self.get_position_changes(new_seq) #What does this do?
        self.__sequence = new_seq
        return mut_locs
        
    def transcribe(self): 
        return self.__sequence.replace('T','U')
    
    def rev_comp(self):
        return self.__sequence[::-1].translate(self.__sequence.maketrans('ACTG','TGAC'))
        
    
    def gc_content(self):
        GC = 0
        for letter in self.__sequence:
            if letter in 'GC':
                GC += 1
        return GC/len(self.__sequence)
    
    def get_mut_freq(self):
        return self.__mut_freq
    
    def get_generation(self):
        return self.__generation
    
    def get_sequence(self):
        return self.__sequence
    
    def get_history(self):
        return str(self.__previous)
    
    def get_position_changes(self, other_seq): #debug
        result = []
        for i in range(len(self.__sequence)):
            if self.__sequence[i] != other_seq[i]:
                    result.append(self.__sequence[i] + str(i+1) + other_seq[i])
        return result
     
    def __str__(self): #Debug
        return 'Sequence with length ' + str(len(self.__sequence))

In [29]:
seq = 'ACTGGATTGCA'

In [30]:
#How do we test the DNA class?
#outside object, main program
#create and object!
my_dna = DNA(seq, 0.1) #constructor call
#print(my_dna.rev_comp())
#print(my_dna.transcribe())
#print(my_dna.gc_content())
#print(my_dna.get_position_changes('ATTT'))


['C2T', 'G4T']


In [None]:
class RNA:
    
    def __init__ (self, sequence):
        self.__sequence = sequence.upper()
        aminos = pd.read_csv('codons_R.txt', sep = '\t') #https://github.com/zhanxw/anno/blob/master/codon.txt
        self.__aa_table = dict(zip(aminos['Codon'], aminos['Letter']))
    
    def translate(self, frame = 1, find_orf = True, dashes = False ): #just consider frames 1 through 3 for now,debug
        peptide = ''
        if(find_orf):
            frame = self.__sequence.find('AUG')
        else:
            frame -= 1
        if frame >= 1:
            for i in range(len(self.__sequence),3):
                codon = self.__sequence[i:i+2]
                aa = self.__aa_table(codon)
                peptide += aa
                if dashes:
                    peptide += '-'
            return peptide
    
    def get_sequence(self):
        pass
    
    def __str__(self):
        pass

In [None]:
def rand_RNA_seq(length):
    return ''.join([random.choice('ACUG') for i in range(length)])

In [None]:
#Test the RNA class
