In [27]:
import argparse

parser = argparse.ArgumentParser(description='Looks at a fasta file on a sliding window and turns sequence into GC percentage, or CpG content (Default is GC percentage).')
parser.add_argument('-f', '--file', metavar='Input file name/path', required=True,type=str, help='Input file name or path')
parser.add_argument('-o', '--output', metavar='Output File Name', required=True,type=str, help='Name for Output File')
parser.add_argument('-n', '--window', metavar='Sliding Window Size', type=int, default=10000, help='Desired Size of Sliding Window')
parser.add_argument('-c', '--cpg', action='store_true', default=False, help='Designate if desired run mode is CpG density instead of GC content')

parser = parser.parse_args()

n = parser.window

##Looks at a fasta file on a sliding window and turns sequence into GC percentage
#It is easy to customize the sliding window by changing the "n" variable

myfasta = open(parser.file)
fh = open(parser.output, "a")

#First open the file, pull out the header lines and store them as the first item in the list
for lines in myfasta:
    lines = lines.rstrip()
    if lines.startswith(">"):
        output = []
        header = lines.split(">")
        header = header[1]
        output.append(header)
        
        
    # If it is not a header line then first we divide them into n sized items (default = 10000)
    else:
        windows = [lines[i:i+n] for i in range(0, len(lines), n)]
        
        #Then for each of these items we calculate the amount of G and C occurences or CG and divide by length of string.
        for each in windows:
            if parser.cpg:
                CpGCount = each.count('CG')
                CpGDensity = round(CpGCount / len(each), 4)
                output.append(CpGDensity)
            else:
                GCount = each.count('G')
                CCount = each.count('C')
                GCCount = GCount + CCount
                GCContent = round(GCCount / len(each), 4)
                output.append(GCContent) 
            
        #Convert our list to a string and write it out to our output file
        outputstring = ' '.join(str(e) for e in output)
        outputstring = outputstring + "\n"
        fh.write(outputstring)
            
fh.close()
myfasta.close()