# Encoding

In [1]:
# Phase III - Version 1 - Termination Sequence Added
import numpy as np
import pandas as pd
import itertools

In [2]:
# Read in the ASCII Table
ASCII_List_Raw=pd.read_csv("ASCII_QUAT_CODES.csv", dtype=object,usecols=[0,1,2,3],skiprows=0, nrows=260,encoding='utf-8')

display(ASCII_List_Raw.head())
display(ASCII_List_Raw.tail())

Unnamed: 0,Dec,Char,QUAT CODE,QUAT NUCLEOTIDES
0,0,NUL,0,CGTA
1,1,SOH,1,CGTC
2,2,STX,2,CGTG
3,3,ETX,3,CGTCG
4,4,EOT,10,CGAC


Unnamed: 0,Dec,Char,QUAT CODE,QUAT NUCLEOTIDES
251,251,?,3323,GGCCAGG
252,252,?,3330,GGCCTTA
253,253,²,3331,GGCCTTC
254,254,?,3332,GGCCTTG
255,255,,3333,GGCCTTCG


# Functions

In [3]:
def get_QUAT_NUCLEOTIDES(df, value):
    ''' Takes a character as the input and returns the corresponding QUAT NUCLEOTIDE.'''
    
    # Deal with the special case when the entry is a spacebar
    if value==" ":
        value="space"
    
    elif value=="'":
        pass
    elif value=="\n": # If the character is a new line, swap it for a LF ASSCI instead
        value="LF"
#         return True
#     elif value=="SOMETHING WE CANT TRANSLATE"
#         return True

    # Get bool dataframe with True at positions where the given value exists
    result = df.isin([value])
    # Find the row where result is true
    row = list(result["Char"][result["Char"] == True].index)
    # Find the corresponding QUAT NUCLEOTIDE at the row
    nucleotideCode=df.loc[row,"QUAT NUCLEOTIDES"].tolist()[0]

    return nucleotideCode

In [4]:
# get_QUAT_NUCLEOTIDES-Test
a=get_QUAT_NUCLEOTIDES(ASCII_List_Raw," ")
b=get_QUAT_NUCLEOTIDES(ASCII_List_Raw,"b") 
c=get_QUAT_NUCLEOTIDES(ASCII_List_Raw,"\n") 
print(a,b,c)

# print(type(a))

CACG GCGC CGCA


In [5]:
def noTripleRepeats(code): #This is actually for AA repeats
    """Takes a tuple of letters as an input and returns 0 if there are no triple repeats such as AAB
    and a 1 if there are triple repeats such as AAA"""
    
    # Convert tuple to string:
    testString=""
    for value in code:
        testString=testString+value
    
    # Brains of checking is below
    x=len(testString)
    for i,j in enumerate(code):
        if i==(x-2) or x<=2:
            break
        initialLetter=j
        secondLetter=code[i+1]
        thirdLetter=code[i+2]
        if (initialLetter == secondLetter) and (secondLetter==thirdLetter):
            return 1
    return 0

In [6]:
# noTripleRepeats test
noTripleRepeats(('A', 'A', 'G','T','T','G','C',"C","C"))
noTripleRepeats(('A', 'A', 'G','T','T','G','C',"C","G"))

0

In [7]:
def noDoubleRepeats(code): #This is actually for AA repeats
    """Takes a string of letters as an input and returns 0 if there are no double repeats such as AB
    and a 1 if there are double repeats AA"""
    x=len(code)
    for i,j in enumerate(code):
        if i==(x-1) or x<=1:
            break
        initialLetter=j
        secondLetter=code[i+1]
        if (initialLetter == secondLetter):
            return 1
    return 0

In [8]:
# noDoubleRepeats test
noDoubleRepeats("TCTATCGACA")

0

In [9]:
def codingStrategy(maximumFragments):
    """Need to keep the total length of DNA fragment to its lowest. 
    Each DNA fragment consists of three parts: 
    1) The Address Code at the front 
    2) The Encoded Information
    3) The Termination Sequence
    
    This function is designed to output the smallest address file possible based on the total length of the 
    message. Each one of these address files needs to be unique and have no triple repeats """
    
    noRepeatsList=[]
    # Start at the smallest and work upwards
    for x in range(0,20):
        if maximumFragments<=4**x:
            addressLength=x
            
            # Now create the combinations with this number of digits available
            checkList=list(itertools.product(["A","G","C","T"], repeat=addressLength)) # Repeat here corresponds to the length of the address code such that if repeats = 2 then expect [(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3)] whereas if repeats =3 then expect [(1,1,1), (1,1,2) etc]
            # Note the above code produces a list similar to:
            # [(A,A), (A,B) etc] for repeat =2
            # [(A,A,A), (A,A,B) etc] for repeat =3
            
            # Loop through each tuple in above list and check to see if there is triple repeats. Then put the suitable  
            for entry in checkList:
                if noTripleRepeats(entry)==0:
                    stringCode=""
                    
                    # Build list of strings that can be used as address codes. Put these in the noRepeatsList
                    for letter in entry:
                        stringCode=stringCode+letter
                    noRepeatsList.append(stringCode)
                    
                else: # The tuple has a triple repeat and therefore should not be converted to string address code
                    pass
            
            # Check that no number of codes is still adequate:
            if maximumFragments<=len(noRepeatsList):
                return noRepeatsList
            else:
                return codingStrategy(maximumFragments+1)

In [10]:
# Coding strategy test
a=codingStrategy(60)
print(a)
print(len(a))

['AAG', 'AAC', 'AAT', 'AGA', 'AGG', 'AGC', 'AGT', 'ACA', 'ACG', 'ACC', 'ACT', 'ATA', 'ATG', 'ATC', 'ATT', 'GAA', 'GAG', 'GAC', 'GAT', 'GGA', 'GGC', 'GGT', 'GCA', 'GCG', 'GCC', 'GCT', 'GTA', 'GTG', 'GTC', 'GTT', 'CAA', 'CAG', 'CAC', 'CAT', 'CGA', 'CGG', 'CGC', 'CGT', 'CCA', 'CCG', 'CCT', 'CTA', 'CTG', 'CTC', 'CTT', 'TAA', 'TAG', 'TAC', 'TAT', 'TGA', 'TGG', 'TGC', 'TGT', 'TCA', 'TCG', 'TCC', 'TCT', 'TTA', 'TTG', 'TTC']
60


In [11]:
def complimentaryPrimer(primer):
    """ Takes an address code as an input and gives the complimentary primer as the output"""
    compPrimer=""
    for letter in list(primer):
        if letter == "A":
            comp="T"
        elif letter == "C":
            comp="G"
        elif letter == "T":
            comp="A"
        elif letter == "G":
            comp="C"
        compPrimer+=comp
    return compPrimer

In [12]:
# ComplimentaryPrimer Test Box
b=complimentaryPrimer("ATCGGCCGAG")
print(b)

TAGCCGGCTC


In [13]:
def binaryConverter(addressCode):
    """ Takes an address code as input and returns the binary value as an output.
    The binary code can be found on the online MODEL spreadsheet"""
    
    binaryOutput=""
    for letter in list(addressCode):
        if letter == "A":
            binary="00"
        elif letter == "C":
            binary="11"
        elif letter == "T":
            binary="01"
        elif letter == "G":
            binary="10"
        binaryOutput+=binary
    return binaryOutput

In [14]:
# Binary Converter Test Box
c=binaryConverter("CCGG")
print(c)

11111010


In [15]:
def gcPercentage(fragment):
    number=fragment.count("GC")
    totalLength=len(fragment)
    totalGCLetter=number*2
    percentage=(totalGCLetter/totalLength)*100
    return percentage

In [16]:
def gcPercentageSimple(fragment):
    g_Count=fragment.count("G")
    c_Count=fragment.count("C")
    fragment_length=len(fragment)
    percentage=((g_Count+c_Count)/fragment_length)*100
    return percentage

In [17]:
# Percentage Test Box
test_percent=("AAGAGTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT")
print(gcPercentage(test_percent))
print(gcPercentageSimple(test_percent))

33.84615384615385
67.6923076923077


In [41]:
# Need a function to add a unique code to each DNA fragment

In [42]:
def get_Unique_AddressI(df,nucleotideList,uniqueAddresses):
    
    # Add the list of unique addresses to the nucleotidelist for this fragment
    codeList=nucleotideList+uniqueAddresses
    
    try:
        # Find the dataframe which is left over from codes in nucleotideList
        remainingCodesFrame = df[~df["QUAT NUCLEOTIDES"].isin(codeList)]
        # Make a list of the remaining codes
        remainingCodesList = remainingCodesFrame["QUAT NUCLEOTIDES"].tolist()
        # Take the first value for this list 
        uniqueCode=remainingCodesList[0]

    except:
        return
    
    return uniqueCode

In [43]:
def get_Unique_AddressII(uniqueAddresses,uniqueAddressesLength,counter):
    
    # Take the uniqueAddress list and go through and add a nucleotide:
    uniqueAddresses[counter]
    
    
    counter+=1
    return uniqueCode,counter

In [44]:
uniqueAddresses=[]
counter=0
for i in range(245):
    try:
        a=get_Unique_Address(ASCII_List_Raw,usedCodes,uniqueAddresses)
        print(a)
        uniqueAddresses.append(a)
        uniqueAddressesLength=len(uniqueAddresses)
    except:
        counter+=1

# Main Body

## Loop through ASCII - Quat Codes
### Confirm that there is no Quat Code that ends in "AA" 

In [45]:
# Below loops over the ASCII Table and confirms that there are no fragments which could end in a "AA". This means that we can use something like "AT" for the termination code.

for value in ASCII_List_Raw.loc[:,"QUAT NUCLEOTIDES"]:
    if noDoubleRepeats(value[-2:])==1:
        print("Found one here: ", value[-2:], " - At the end of: ", value)

Found one here:  GG  - At the end of:  CGAGG
Found one here:  TT  - At the end of:  CGCTT
Found one here:  TT  - At the end of:  CGCCTT
Found one here:  GG  - At the end of:  CTAGG
Found one here:  TT  - At the end of:  CTCTT
Found one here:  CC  - At the end of:  CTGCC
Found one here:  CC  - At the end of:  CTCGCC
Found one here:  TT  - At the end of:  CACTT
Found one here:  CC  - At the end of:  CAGCC
Found one here:  CC  - At the end of:  CAGGCC
Found one here:  GG  - At the end of:  CTTAGG
Found one here:  TT  - At the end of:  CTTCTT
Found one here:  CC  - At the end of:  CTTGCC
Found one here:  CC  - At the end of:  CTTCGCC
Found one here:  GG  - At the end of:  GTAGG
Found one here:  TT  - At the end of:  GTCTT
Found one here:  CC  - At the end of:  GTGCC
Found one here:  CC  - At the end of:  GTCGCC
Found one here:  TT  - At the end of:  GACTT
Found one here:  CC  - At the end of:  GAGCC
Found one here:  CC  - At the end of:  GAGGCC
Found one here:  CC  - At the end of:  GCGCC


## Phrase Input

In [46]:
rawInput="""I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation.

Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of their captivity.

But one hundred years later, the Negro still is not free. One hundred years later, the life of the Negro is still sadly crippled by the manacles of segregation and the chains of discrimination. One hundred years later, the Negro lives on a lonely island of poverty in the midst of a vast ocean of material prosperity. One hundred years later, the Negro is still languished in the corners of American society and finds himself an exile in his own land. And so we've come here today to dramatize a shameful condition.

In a sense we've come to our nation's capital to cash a check. When the architects of our republic wrote the magnificent words of the Constitution and the Declaration of Independence, they were signing a promissory note to which every American was to fall heir. This note was a promise that all men, yes, black men as well as white men, would be guaranteed the "unalienable Rights" of "Life, Liberty and the pursuit of Happiness." It is obvious today that America has defaulted on this promissory note, insofar as her citizens of color are concerned. Instead of honoring this sacred obligation, America has given the Negro people a bad check, a check which has come back marked "insufficient funds."

But we refuse to believe that the bank of justice is bankrupt. We refuse to believe that there are insufficient funds in the great vaults of opportunity of this nation. And so, we've come to cash this check, a check that will give us upon demand the riches of freedom and the security of justice.

We have also come to this hallowed spot to remind America of the fierce urgency of Now. This is no time to engage in the luxury of cooling off or to take the tranquilizing drug of gradualism. Now is the time to make real the promises of democracy. Now is the time to rise from the dark and desolate valley of segregation to the sunlit path of racial justice. Now is the time to lift our nation from the quicksands of racial injustice to the solid rock of brotherhood. Now is the time to make justice a reality for all of God's children.

It would be fatal for the nation to overlook the urgency of the moment. This sweltering summer of the Negro's legitimate discontent will not pass until there is an invigorating autumn of freedom and equality. Nineteen sixty-three is not an end, but a beginning. And those who hope that the Negro needed to blow off steam and will now be content will have a rude awakening if the nation returns to business as usual. And there will be neither rest nor tranquility in America until the Negro is granted his citizenship rights. The whirlwinds of revolt will continue to shake the foundations of our nation until the bright day of justice emerges.

But there is something that I must say to my people, who stand on the warm threshold which leads into the palace of justice: In the process of gaining our rightful place, we must not be guilty of wrongful deeds. Let us not seek to satisfy our thirst for freedom by drinking from the cup of bitterness and hatred. We must forever conduct our struggle on the high plane of dignity and discipline. We must not allow our creative protest to degenerate into physical violence. Again and again, we must rise to the majestic heights of meeting physical force with soul force.

The marvelous new militancy which has engulfed the Negro community must not lead us to a distrust of all white people, for many of our white brothers, as evidenced by their presence here today, have come to realize that their destiny is tied up with our destiny. And they have come to realize that their freedom is inextricably bound to our freedom.

We cannot walk alone.

And as we walk, we must make the pledge that we shall always march ahead.

We cannot turn back.

There are those who are asking the devotees of civil rights, "When will you be satisfied?" We can never be satisfied as long as the Negro is the victim of the unspeakable horrors of police brutality. We can never be satisfied as long as our bodies, heavy with the fatigue of travel, cannot gain lodging in the motels of the highways and the hotels of the cities. **We cannot be satisfied as long as the negro's basic mobility is from a smaller ghetto to a larger one. We can never be satisfied as long as our children are stripped of their self-hood and robbed of their dignity by signs stating: "For Whites Only."** We cannot be satisfied as long as a Negro in Mississippi cannot vote and a Negro in New York believes he has nothing for which to vote. No, no, we are not satisfied, and we will not be satisfied until "justice rolls down like waters, and righteousness like a mighty stream."1

I am not unmindful that some of you have come here out of great trials and tribulations. Some of you have come fresh from narrow jail cells. And some of you have come from areas where your quest -- quest for freedom left you battered by the storms of persecution and staggered by the winds of police brutality. You have been the veterans of creative suffering. Continue to work with the faith that unearned suffering is redemptive. Go back to Mississippi, go back to Alabama, go back to South Carolina, go back to Georgia, go back to Louisiana, go back to the slums and ghettos of our northern cities, knowing that somehow this situation can and will be changed.

Let us not wallow in the valley of despair, I say to you today, my friends.

And so even though we face the difficulties of today and tomorrow, I still have a dream. It is a dream deeply rooted in the American dream.

I have a dream that one day this nation will rise up and live out the true meaning of its creed: "We hold these truths to be self-evident, that all men are created equal."

I have a dream that one day on the red hills of Georgia, the sons of former slaves and the sons of former slave owners will be able to sit down together at the table of brotherhood.

I have a dream that one day even the state of Mississippi, a state sweltering with the heat of injustice, sweltering with the heat of oppression, will be transformed into an oasis of freedom and justice.

I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character.

I have a dream today!

I have a dream that one day, down in Alabama, with its vicious racists, with its governor having his lips dripping with the words of "interposition" and "nullification" -- one day right there in Alabama little black boys and black girls will be able to join hands with little white boys and white girls as sisters and brothers.

I have a dream today!

I have a dream that one day every valley shall be exalted, and every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight; "and the glory of the Lord shall be revealed and all flesh shall see it together."2

This is our hope, and this is the faith that I go back to the South with.

With this faith, we will be able to hew out of the mountain of despair a stone of hope. With this faith, we will be able to transform the jangling discords of our nation into a beautiful symphony of brotherhood. With this faith, we will be able to work together, to pray together, to struggle together, to go to jail together, to stand up for freedom together, knowing that we will be free one day.

And this will be the day -- this will be the day when all of God's children will be able to sing with new meaning:

My country 'tis of thee, sweet land of liberty, of thee I sing. Land where my fathers died, land of the Pilgrim's pride,    From every mountainside, let freedom ring!

And if America is to be a great nation, this must become true.

And so let freedom ring from the prodigious hilltops of New Hampshire.

Let freedom ring from the mighty mountains of New York.

Let freedom ring from the heightening Alleghenies of Pennsylvania.

Let freedom ring from the snow-capped Rockies of Colorado.

Let freedom ring from the curvaceous slopes of California.

But not only that:

Let freedom ring from Stone Mountain of Georgia.

Let freedom ring from Lookout Mountain of Tennessee.

Let freedom ring from every hill and molehill of Mississippi.

From every mountainside, let freedom ring.

And when this happens, and when we allow freedom ring, when we let it ring from every village and every hamlet, from every state and every city, we will be able to speed up that day when all of God's children, black men and white men, Jews and Gentiles, Protestants and Catholics, will be able to join hands and sing in the words of the old Negro spiritual:

Free at last! Free at last!

Thank God Almighty, we are free at last!
"""


In [47]:
rawInput="""I am happy to join with you today in what will go down in history as the greatest demonstration for freedom in the history of our nation.

Five score years ago, a great American, in whose symbolic shadow we stand today, signed the Emancipation Proclamation. This momentous decree came as a great beacon light of hope to millions of Negro slaves who had been seared in the flames of withering injustice. It came as a joyous daybreak to end the long night of their captivity.

But one hundred years later, the Negro still is not free. One hundred years later, the life of the Negro is still sadly crippled by the manacles of segregation and the chains of discrimination. One hundred years later, the Negro lives on a lonely island of poverty in the midst of a vast ocean of material prosperity. One hundred years later, the Negro is still languished in the corners of American society and finds himself an exile in his own land. And so we've come here today to dramatize a shameful condition.

In a sense we've come to our nation's capital to cash a check. When the architects of our republic wrote the magnificent words of the Constitution and the Declaration of Independence, they were signing a promissory note to which every American was to fall heir. This note was a promise that all men, yes, black men as well as white men, would be guaranteed the "unalienable Rights" of "Life, Liberty and the pursuit of Happiness." It is obvious today that America has defaulted on this promissory note, insofar as her citizens of color are concerned. Instead of honoring this sacred obligation, America has given the Negro people a bad check, a check which has come back marked "insufficient funds."
"""

In [48]:
rawInput="I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character."

In [80]:
rawInput="I have a dream"

In [81]:
# Need to swap out characters that are not recognised by Python such as invalid quotation marks
transl_table = dict( [ (ord(x), ord(y)) for x,y in zip( u"‘’´“”–-",  u"'''\"\"--") ] ) 
translatedInput=rawInput.translate(transl_table)
print(translatedInput)

I have a dream


In [82]:
# Break down the phrase into a list of each indivdual character. 
phrase=list(translatedInput)
print(phrase)

['I', ' ', 'h', 'a', 'v', 'e', ' ', 'a', ' ', 'd', 'r', 'e', 'a', 'm']


# Stage I
## Convert phrase into a list of DNA fragments no longer than 120 nucleotide bases long

In [83]:
# Edited on 04/09 to include the termination sequence for fragments
# Edited on 23/10 to reduce the length of fragment to 118 so that with 2 nucleotides for the termination total goes to 120
# Edoted pm 26/10 to includ the characters during coding 

nucleotideFragments=[]
usedCodes=[]
usedCodesList=[]
nucleotideFragmentsDict={}
nucleotideFragmentsDictLong={}

characterFragments=[]
characterSequence=""


nucleotideSequence=""
uniqueAddresses=[]

counter=0

lengthOfPhrase=len(phrase)
terminationFragment="AT"

for index, letter  in enumerate(phrase):

    nextCode=get_QUAT_NUCLEOTIDES(ASCII_List_Raw,letter)
    
    # Use this in future if we can't handle a symbol
    if (nextCode)==True:
#         print(nextCode)
        continue
    
    if len(nucleotideSequence)+len(nextCode)<=118:
        
        characterSequence=characterSequence+letter
        
        nucleotideSequence=nucleotideSequence+nextCode
        usedCodes.append(nextCode)
        if index==lengthOfPhrase-1:    
            nucleotideSequence=nucleotideSequence+terminationFragment #Added on 04/09/202
            
            characterFragments.append(characterSequence)
            nucleotideFragments.append(nucleotideSequence)
            usedCodesList.append(usedCodes)
            
            nucleotideFragmentsDict[counter]=nucleotideSequence
            nucleotideFragmentsDictLong[counter]=usedCodes
            
            usedCodes=[]
            counter+=1
            
    else:
        characterSequence=characterSequence
        characterFragments.append(characterSequence)
        
        characterSequence=""
        characterSequence=characterSequence+letter

        
        nucleotideSequence=nucleotideSequence+terminationFragment #Added on 04/09/202
        
        nucleotideFragments.append(nucleotideSequence)     
        
        nucleotideFragmentsDict[counter]=nucleotideSequence
        nucleotideFragmentsDictLong[counter]=usedCodes
        counter+=1
        
        usedCodesList.append(usedCodes)
        
        usedCodes=[]
        usedCodes.append(nextCode)
        
        nucleotideSequence=""
        nucleotideSequence=nucleotideSequence+nextCode
    
 
    
    


## Output Stage I

In [84]:
print("\n")
for a,z in enumerate((nucleotideFragments)):
    print (a,z)
print("--------------------------------")  
for i in nucleotideFragmentsDict:
    print(i,":",nucleotideFragmentsDict[i])
# print("--------------------------------")
# for i in nucleotideFragmentsDictLong:
#     print(i,":",nucleotideFragmentsDictLong[i])

print("--------------------------------")

for i in nucleotideFragmentsDictLong:
    print(i,":",nucleotideFragmentsDictLong[i])

print("--------------------------------")


for i in characterFragments:
    print(i)


print("--------------------------------")
nucleotideSum=0
for i in nucleotideFragmentsDict:
    print("The length of fragment: ", i, " is ", len(nucleotideFragmentsDict[i]))
    nucleotideSum+=len(nucleotideFragmentsDict[i])
print ("Total nucleotide length for phrase is: ", nucleotideSum, " bases")



0 GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT
--------------------------------
0 : GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT
--------------------------------
0 : ['GTGA', 'CACG', 'GCAC', 'GCGA', 'GCCTG', 'GCTC', 'CACG', 'GCGA', 'CACG', 'GCTA', 'GCCGC', 'GCTC', 'GCGA', 'GCTTC']
--------------------------------
I have a dream
--------------------------------
The length of fragment:  0  is  61
Total nucleotide length for phrase is:  61  bases


# Stage II
## Loop over list of DNA fragments and assign a unique primer to each fragment
### This fragment is tested so that it is unique and also does not feature in the information itself. The complimentary primer is also tested

In [88]:
# ORIGINAL BEFORE ADDITIONAL CHECK 23/10/2020

DNAfragmentList=nucleotideFragments


uniqueAddressCounter=0
uniqueAddress=[]
uniqueCompAddress=[]
fragmentDict={}


# Try to produce the unique codes with the smallest nucleotides possible
try:
    uniqueAddressList=codingStrategy(len(DNAfragmentList))
#     print(uniqueAddressList)
    
    for i in DNAfragmentList:
        addressCode=uniqueAddressList[uniqueAddressCounter]
        compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])
        
        if (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
#             print("Issue Here", addressCode, compAddressCode)
            startingUAC=uniqueAddressCounter    
            while (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
#                 print("Found a case")
#                 print(addressCode,compAddressCode)
                uniqueAddressCounter+=1
                addressCode=uniqueAddressList[uniqueAddressCounter]
                compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])


            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter=startingUAC

        else:
#             print("No problem here")
            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter+=1

# If not, restart the process with longer unique address codes. This is needed if the uniqueAddress list values are found in the DNA information frequently            
except Exception as e:
    print(e)
    print("Error occured so went the second route")
    uniqueAddress=[]
    uniqueCompAddress=[]
    fragmentDict={}
    uniqueAddressCounter=0
    
#     for x in range(0,21):
#         if len(DNAfragmentList)<4**x:
#             print(x)
#             uniqueAddressList=codingStrategy((4**x))
#             break

    # Force a coding strategy below:
    y=3
    uniqueAddressList=codingStrategy(4**y)
    
    
    print(uniqueAddressList)
    for i in DNAfragmentList:
        addressCode=uniqueAddressList[uniqueAddressCounter]
        compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])
#         print("Next Fragment")
        if (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
            startingUAC=uniqueAddressCounter    
            while (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
#                 print("Found a case")
#                 print(addressCode,compAddressCode)
                uniqueAddressCounter+=1
                addressCode=uniqueAddressList[uniqueAddressCounter]
                compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])


            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter=startingUAC

        else:
            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter+=1


list index out of range
Error occured so went the second route
['AAGA', 'AAGG', 'AAGC', 'AAGT', 'AACA', 'AACG', 'AACC', 'AACT', 'AATA', 'AATG', 'AATC', 'AATT', 'AGAA', 'AGAG', 'AGAC', 'AGAT', 'AGGA', 'AGGC', 'AGGT', 'AGCA', 'AGCG', 'AGCC', 'AGCT', 'AGTA', 'AGTG', 'AGTC', 'AGTT', 'ACAA', 'ACAG', 'ACAC', 'ACAT', 'ACGA', 'ACGG', 'ACGC', 'ACGT', 'ACCA', 'ACCG', 'ACCT', 'ACTA', 'ACTG', 'ACTC', 'ACTT', 'ATAA', 'ATAG', 'ATAC', 'ATAT', 'ATGA', 'ATGG', 'ATGC', 'ATGT', 'ATCA', 'ATCG', 'ATCC', 'ATCT', 'ATTA', 'ATTG', 'ATTC', 'GAAG', 'GAAC', 'GAAT', 'GAGA', 'GAGG', 'GAGC', 'GAGT', 'GACA', 'GACG', 'GACC', 'GACT', 'GATA', 'GATG', 'GATC', 'GATT', 'GGAA', 'GGAG', 'GGAC', 'GGAT', 'GGCA', 'GGCG', 'GGCC', 'GGCT', 'GGTA', 'GGTG', 'GGTC', 'GGTT', 'GCAA', 'GCAG', 'GCAC', 'GCAT', 'GCGA', 'GCGG', 'GCGC', 'GCGT', 'GCCA', 'GCCG', 'GCCT', 'GCTA', 'GCTG', 'GCTC', 'GCTT', 'GTAA', 'GTAG', 'GTAC', 'GTAT', 'GTGA', 'GTGG', 'GTGC', 'GTGT', 'GTCA', 'GTCG', 'GTCC', 'GTCT', 'GTTA', 'GTTG', 'GTTC', 'CAAG', 'CAAC', 'CAAT', 

In [86]:
# Add the termination fragments 
# terminationFragments=["AT" for i in DNAfragmentList]
# print (terminationFragments)

In [55]:
# UNIQUE PROBLEM ERROR HERE

In [56]:
# This demonstrates that the unique address codes and or comp codes can be found in other DNA fragments
for k,l in zip (uniqueAddress,DNAfragmentList):
    print(k, " - ", l)
    if k in l:
        print("Found something with address code")
    if complimentaryPrimer(k) in l:
        print("Found something with comp code")

AAGA  -  GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCCACGGCCTAGCACGCGAGCCTACACGGCTTCGCCAGCACGGCTGGCTTCGGCCTCAT
AAGG  -  GCCGCCACGGCTTAGCAGGCCTAGCCTAGCTTAGCTCCACGGCGCCGCACGCAGGCTTAGCTAGCCGCGCTCGCTTGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTCGGCTTGAT
AAGT  -  GCTCCACGGCTAGCGAGCCAGCACGGCTTAGCAGGCCTGGCTCCACGGCAGGCTTGCACGGCGACACGGCTTGGCGAGCCTAGCAGGCTTCGGCTTGCACGGCCTCGGCACGCTCAT
AACA  -  GCCGCGCTCCACGGCCTAGCACGCTCGCCAGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTGGCTTCGGCCTACACGGCGCGCTCCACGGCATGCCTCGCTAGCTCGGCTCGCTAAT
AACG  -  CACGGCGCGCCAGCACGGCCTAGCACGCTCCACGGCGCCGCTTCGGCTTAGCTTCGGCCGCCACGGCTTCGGCTGCACGGCCTAGCACGCTCGCAGGCCGCCACGGCCGCCGCAGGAT
AACT  -  GCAGGCTTGCACGGCGCGCCTCGCCTACACGGCGCGCCAGCACGGCCTAGCACGCTCCACGGCGCCGCTTCGGCTTGGCCTAGCTCGCTTGGCCTACACGGCTTCGGCTGCACGAT
AAGC  -  GCCTAGCACGCTCGCAGGCCGCCACGGCGCCGCACGCGAGCCGCGCGAGCGCCGCCTAGCTCGCCGCCAGGCAT


In [57]:
# Problem here whereby the unique address codes are being found in the fragments. Hmmmm...... 
counter=0
innerCounter=[]
for numi,i in enumerate(uniqueAddress):
    for numj,j in enumerate(nucleotideFragments):
        if i in j:
            print("Problem Here, ",i," found out ",numi, " found in ",j," found out ", numj)
            counter=counter+1
            innerCounter.append(numi)
        elif complimentaryPrimer(i) in j:
            print("Comp Primer Problem Here, ",complimentaryPrimer(i)," found out ",numi, " found in ",j," found out ", numj)
            counter=counter+1
            innerCounter.append(numi)

print(a)

Comp Primer Problem Here,  TTCC  found out  1  found in  GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCCACGGCCTAGCACGCGAGCCTACACGGCTTCGCCAGCACGGCTGGCTTCGGCCTCAT  found out  0
Comp Primer Problem Here,  TTGC  found out  4  found in  GCCGCCACGGCTTAGCAGGCCTAGCCTAGCTTAGCTCCACGGCGCCGCACGCAGGCTTAGCTAGCCGCGCTCGCTTGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTCGGCTTGAT  found out  1
Comp Primer Problem Here,  TTGC  found out  4  found in  GCTCCACGGCTAGCGAGCCAGCACGGCTTAGCAGGCCTGGCTCCACGGCAGGCTTGCACGGCGACACGGCTTGGCGAGCCTAGCAGGCTTCGGCTTGCACGGCCTCGGCACGCTCAT  found out  2
Comp Primer Problem Here,  TTGC  found out  4  found in  GCAGGCTTGCACGGCGCGCCTCGCCTACACGGCGCGCCAGCACGGCCTAGCACGCTCCACGGCGCCGCTTCGGCTTGGCCTAGCTCGCTTGGCCTACACGGCTTCGGCTGCACGAT  found out  5
Comp Primer Problem Here,  TTGA  found out  5  found in  GCCGCCACGGCTTAGCAGGCCTAGCCTAGCTTAGCTCCACGGCGCCGCACGCAGGCTTAGCTAGCCGCGCTCGCTTGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTCGGCTTGAT  found out  1
Comp Primer Problem Here,  TTCG  found out  6  found in

In [None]:
# Loop over Unique Addresses and Compare against all DNA fragments. 
# If number is smaller than DNA length then need to go to next level. 
# Build up a list of suitable address and primer codes 
def uniqueChecker(uniqueAddressList,DNAFragmentList):
    

## Stage II Output

In [89]:
for i,j in zip(fragmentDict,characterFragments):
    print(i, " - ", complimentaryPrimer(i), " - ", fragmentDict[i]," - ", j)

AAGA  -  TTCT  -  GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT  -  I have a dream


In [59]:
# Do some testing here:

"TTCG" in "GCTCCACGGCTAGCGAGCCAGCACGGCTTAGCAGGCCTGGCTCCACGGCAGGCTTGCACGGCGACACGGCTTGGCGAGCCTAGCAGGCTTCGGCTTGCACGGCCTCGGCACGCTCAT"

True

In [60]:

print(binaryList)

NameError: name 'binaryList' is not defined

# Stage III
## Compute the parity check for the address codes
### Each address code is converted into its binary form, output is 1 if there are odd number of "1"s and 0 if there are even number of "1"s

In [90]:
parityCheck={}
parityList=[]
binaryList=[]

for testValue in uniqueAddress:
    binaryCode=binaryConverter(testValue)
    binaryList.append(binaryCode)
    numberOccurences=binaryCode.count("1")
    if numberOccurences % 2 == 0:
        parityCheck[testValue]=0
        parityList.append(0)
    else:
        parityCheck[testValue]=1
        parityList.append(1)
    
print (parityCheck)

{'AAGA': 1}


# Stage IV
## Determine the percentage of GC nucleotide bases 

In [91]:
percentageListUniqueAddress=[]
percentageListComplimentary=[]
percentageListFragment=[]
percentageListCombined=[]

for testValue1,testValue2, testValue3 in zip(uniqueAddress, uniqueCompAddress, nucleotideFragments) :
    print(testValue1,testValue2,testValue3)
    percentageValue1=gcPercentageSimple(testValue1)
    percentageValue2=gcPercentageSimple(testValue2)
    percentageValue3=gcPercentageSimple(testValue3)
    percentageValue4=gcPercentageSimple(testValue1+testValue3)
    
    percentageListUniqueAddress.append(percentageValue1)
    percentageListComplimentary.append(percentageValue2)
    percentageListFragment.append(percentageValue3)
    percentageListCombined.append(percentageValue4)

#     print(testValue1, " - ",testValue2, " - ",testValue3[0:4], " - ", "Percentages: ", percentageValue1, " - ", percentageValue2, " - ", percentageValue3, " - ",)

AAGA TTCT GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT


In [92]:
terminationFragment=[]
totalCombined=[]
for i,j in zip(uniqueAddress,nucleotideFragments):
    terminationFragment.append("AT")
    print(i)
    print(j)
    print(len(i+j))
    totalCombined.append(i+j)

AAGA
GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT
65


In [93]:
print(totalCombined[0])
print(len(totalCombined[0]))
print(gcPercentageSimple(totalCombined[0]))

AAGAGTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCAT
65
67.6923076923077


# Output

In [94]:
# # # uniqueAddress
# binaryList
# parityList
# percentageListUniqueAddress

# uniqueCompAddress
# percentageListComplimentary

#  nucleotideFragments,
# percentageListFragment.append(percentageValue3)

outputFrame=pd.DataFrame()

outputFrame.insert(0,"Address Code",uniqueAddress)
outputFrame.insert(1,"Address Code - Binary",binaryList)
outputFrame.insert(2,"Address Code - Parity Check",parityList)
outputFrame.insert(3,"Address Code - GC Percentage",percentageListUniqueAddress)
outputFrame.insert(4,"Complimentary Address Code",uniqueCompAddress)
outputFrame.insert(5,"Complimentary Address Code - GC Percentage",percentageListComplimentary)
outputFrame.insert(6,"Character Message",characterFragments)
outputFrame.insert(7,"Individual Codes", usedCodesList)
outputFrame.insert(8,"Information Fragment",nucleotideFragments)
outputFrame.insert(9,"Information Fragment - GC Percentage",percentageListFragment)
outputFrame.insert(10,"Total Fragment",totalCombined)
outputFrame.insert(11,"Total Fragment - GC Percentage",percentageListCombined)



display(outputFrame)

Unnamed: 0,Address Code,Address Code - Binary,Address Code - Parity Check,Address Code - GC Percentage,Complimentary Address Code,Complimentary Address Code - GC Percentage,Character Message,Individual Codes,Information Fragment,Information Fragment - GC Percentage,Total Fragment,Total Fragment - GC Percentage
0,AAGA,1000,1,25.0,TTCT,25.0,I have a dream,"[GTGA, CACG, GCAC, GCGA, GCCTG, GCTC, CACG, GC...",GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGC...,70.491803,AAGAGTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAG...,67.692308


In [95]:
outputFrame=outputFrame.astype(str)
outputFrame.to_csv("OutputIHAD.bod",encoding="utf-8",index = True)

# Decoding

In [18]:
def decoding_Nucleotides(inputString,df):
    
    outputdictionary={}
    indexList=[]
    charList=[]
    
    # Codes can be 4, 5, 6, 7 or 8 digits long
    for i in range(4,9):
        value=inputString[0:i]
#         print(value)
        try:
            result = ASCII_List_Raw.isin([value])
            # Find the row where result is true
            row = list(result["QUAT NUCLEOTIDES"][result["QUAT NUCLEOTIDES"] == True].index)
            # Find the corresponding QUAT NUCLEOTIDE at the row
            nucleotideCode=ASCII_List_Raw.loc[row,"Char"].tolist()[0]

#             print(nucleotideCode)
                
            if len(nucleotideCode)>0:
                outputdictionary[i]=nucleotideCode
                charList.append(nucleotideCode)
                listVar=[i]
                indexList.append((listVar))
        except:
            pass
        
    return outputdictionary, charList, indexList

        
        

In [19]:
def get_CHARACTERS(df, value):
    ''' Find the QUAT NUCLEOTIDE Corresponding to the input character.'''
    
    # Get bool dataframe with True at positions where the given value exists
    result = df.isin([value])
    # Find the row where result is true
    row = list(result["QUAT NUCLEOTIDES"][result["QUAT NUCLEOTIDES"] == True].index)
    # Find the corresponding QUAT NUCLEOTIDE at the row
    character=df.loc[row,"Char"].tolist()[0]
    
    if character=="space":
        character=" "
        
    if character=="LF":
        character="\n"
    
    return character

In [20]:
a=get_CHARACTERS(ASCII_List_Raw,"GCTG")
print(a)

f


## Decode Single DNA Fragment

In [21]:
# input1="GACTTGCACGCGAGCTTAGCTTACACGGTGACACGGCGCCGCTTCGGCTTCGCCGTGCGAGCCGCGCTCCACGGCCTAGCACGCTCGCTCCACGGCCTAGCTTCGCACGGCGACACG"
# input1= "GCCGCCGCCTCGCTTCGCTTCGCTCGCCGCCAGCCGCCGCCCACGGCTAGCGAGCCAGCTTCGCCCGCAGAGTGCACGCTTCGGCCTCCACGGCGAGCCGCGCCTACACGGCTTC"
# input1= nucleotideFragmentsDict["GATG"]

# input1="GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCCACGGCCTAGCACGCGAGCCTACACGGCTTCGCCAGCACGGCTGGCTTCGGCCTCAT"

input1="GCCGCGCTCCACGGCCTAGCACGCTCGCCAGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTGGCTTCGGCCTACACGGCGCGCTCCACGGCATGCCTCGCTAGCTCGGCTCGCTA"



In [22]:
initialInput=input1[0:8]

# print (dict1,list1,"HA",list2)

dict1,list1,list2=decoding_Nucleotides(initialInput,ASCII_List_Raw)

for index, i in enumerate(list2):
    if index==20000: # Hardcoded top end to prevent crashing, will need to remove this in future
        break
    
    if type(i) is not int:
        counter=0
        for values in i:
            counter=counter+values
    
    entry1,i1,i2=decoding_Nucleotides(input1[counter:],ASCII_List_Raw)
    for key in entry1:
        l=i.copy()
        l.append(key)
        list2.append(l)

# print (dict1,list1,list2)
        
        

In [23]:
for i in list2:
#     print(i)
    if len(input1)==sum(i): #Now have termination sequence so need to subtract 2
        #print(sum((i)))
        phraseOut=""
        counter=0
        for j in i:
            a=get_CHARACTERS(ASCII_List_Raw,input1[counter:counter+j])
            phraseOut=phraseOut+a      
            counter=counter+j
        print(phraseOut)

re they u?ll nm? be jude?d
re they u?ll nm? be judged
re they u?ll nm? be jwDC1DC4ed
re they u?ll not be jude?d
re they u?ll not be judged
re they u?ll not be jwDC1DC4ed
re they will nm? be jude?d
re they will nm? be judged
re they will nm? be jwDC1DC4ed
re they will not be jude?d
re they will not be judged
re they will not be jwDC1DC4ed


In [25]:
for i in list2: 
    
    print(i)

[5]
[5, 4]
[5, 4, 4]
[5, 4, 4, 5]
[5, 4, 4, 5, 4]
[5, 4, 4, 5, 4, 4]
[5, 4, 4, 5, 4, 5]
[5, 4, 4, 5, 4, 4, 5]
[5, 4, 4, 5, 4, 4, 5, 4]
[5, 4, 4, 5, 4, 4, 5, 4, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 6]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 6, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 6, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 5, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 5, 4]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4, 5, 5, 4]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 5, 4, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4, 5, 5, 4, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 5, 4, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 5, 4, 5, 6]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4, 5, 5, 4, 5, 5]
[5, 4, 4, 5, 4, 4, 5, 4, 6, 4, 5, 5, 4, 5, 6]
[5, 4, 4, 5, 4, 4, 5, 4, 5, 5, 5, 5, 4, 5, 5, 6]
[5

In [61]:
for i in nucleotideFragments:
    print(i)
    
    

GTGACACGGCACGCGAGCCTGGCTCCACGGCGACACGGCTAGCCGCGCTCGCGAGCTTCCACGGCCTAGCACGCGAGCCTACACGGCTTCGCCAGCACGGCTGGCTTCGGCCTCAT
GCCGCCACGGCTTAGCAGGCCTAGCCTAGCTTAGCTCCACGGCGCCGCACGCAGGCTTAGCTAGCCGCGCTCGCTTGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTCGGCTTGAT
GCTCCACGGCTAGCGAGCCAGCACGGCTTAGCAGGCCTGGCTCCACGGCAGGCTTGCACGGCGACACGGCTTGGCGAGCCTAGCAGGCTTCGGCTTGCACGGCCTCGGCACGCTCAT
GCCGCGCTCCACGGCCTAGCACGCTCGCCAGCACGGCCTCGGCAGGCTTAGCTTACACGGCTTGGCTTCGGCCTACACGGCGCGCTCCACGGCATGCCTCGCTAGCTCGGCTCGCTAAT
CACGGCGCGCCAGCACGGCCTAGCACGCTCCACGGCGCCGCTTCGGCTTAGCTTCGGCCGCCACGGCTTCGGCTGCACGGCCTAGCACGCTCGCAGGCCGCCACGGCCGCCGCAGGAT
GCAGGCTTGCACGGCGCGCCTCGCCTACACGGCGCGCCAGCACGGCCTAGCACGCTCCACGGCGCCGCTTCGGCTTGGCCTAGCTCGCTTGGCCTACACGGCTTCGGCTGCACGAT
GCCTAGCACGCTCGCAGGCCGCCACGGCGCCGCACGCGAGCCGCGCGAGCGCCGCCTAGCTCGCCGCCAGGCAT


In [49]:
nucleotideFragmentsDictLong["GATC"]
# "oo hot the eye of heaven s"
counter=0
for i in nucleotideFragmentsDictLong["GATC"]:
    counter=counter+len(i)
print(counter)

KeyError: 'GATC'

In [80]:
for i in nucleotideFragmentsDictLong["GATC"]:
    print(len(i))

6
6
4
4
6
5
4
5
4
4
4
4
5
4
4
6
4
4
4
4
4
5
4
5
4
6


# OLD CODE

In [None]:
# ORIGINAL BEFORE ADDITIONAL CHECK 23/10/2020. Makes up unique codes to each DNA framgnet. But currently allows unique Identifier to feature in any other information. 

DNAfragmentList=nucleotideFragments


uniqueAddressCounter=0
uniqueAddress=[]
uniqueCompAddress=[]
fragmentDict={}


# Try to produce the unique codes with the smallest nucleotides possible
try:
    uniqueAddressList=codingStrategy(len(DNAfragmentList))
#     print(uniqueAddressList)
    
    for i in DNAfragmentList:
        addressCode=uniqueAddressList[uniqueAddressCounter]
        compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])
        
        if (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
#             print("Issue Here", addressCode, compAddressCode)
            startingUAC=uniqueAddressCounter    
            while (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
#                 print("Found a case")
#                 print(addressCode,compAddressCode)
                uniqueAddressCounter+=1
                addressCode=uniqueAddressList[uniqueAddressCounter]
                compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])


            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter=startingUAC

        else:
#             print("No problem here")
            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter+=1

# If not, restart the process with longer unique address codes. This is needed if the uniqueAddress list values are found in the DNA information frequently            
except Exception as e:
    print(e)
    print("Error occured so went the second route")
    uniqueAddress=[]
    uniqueCompAddress=[]
    fragmentDict={}
    uniqueAddressCounter=0
    for x in range(0,21):
        if len(DNAfragmentList)<4**x:
            print(x)
            uniqueAddressList=codingStrategy((4**x))
            break
    print(uniqueAddressList)
    for i in DNAfragmentList:
        addressCode=uniqueAddressList[uniqueAddressCounter]
        compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])
#         print("Next Fragment")
        if (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
            startingUAC=uniqueAddressCounter    
            while (addressCode in i) or (compAddressCode in i) or (addressCode in uniqueAddress):
#                 print("Found a case")
#                 print(addressCode,compAddressCode)
                uniqueAddressCounter+=1
                addressCode=uniqueAddressList[uniqueAddressCounter]
                compAddressCode=complimentaryPrimer(uniqueAddressList[uniqueAddressCounter])


            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter=startingUAC

        else:
            fragmentDict[addressCode]=i

            uniqueAddress.append(addressCode)
            uniqueCompAddress.append(compAddressCode)
            uniqueAddressCounter+=1
