In [None]:
!pip install biopython

In [6]:
from Bio.Seq import Seq
from Bio.SeqUtils import gc_fraction
import pandas as pd

df = pd.read_csv(r"/Users/sabhyalokhande/Downloads/human.txt", sep="\t")

results = []

for idx, row in df.iterrows():
    sequence = Seq(row["sequence"])   # convert to Biopython Seq object

    # 1. Finding motifs
    motif = "ATG"
    positions = [i+1 for i in range(len(sequence)-len(motif)+1)
                 if sequence[i:i+len(motif)] == motif]

    # 2. Calculating GC content
    GC_content = gc_fraction(sequence) * 100

    # 3. Identifying coding region (first ORF only)
    start = sequence.find("ATG")
    stop_codons = ["TAA", "TAG", "TGA"]
    coding_region = ""
    if start != -1:
        for i in range(start + 3, len(sequence), 3):
            if sequence[i:i+3] in stop_codons:
                coding_region = sequence[start:i+3]
                break

    # Store results
    results.append({
        "Index": idx,
        "Class": row["class"],
        "GC_Content(%)": round(GC_content, 2),
        "Motif_Positions(ATG)": positions if positions else "None",
        "Coding_Region": str(coding_region) if coding_region else "None"
    })

# Convert to DataFrame and display
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,Index,Class,GC_Content(%),Motif_Positions(ATG),Coding_Region
0,0,4,39.61,"[1, 24, 162]",ATGCCCCAACTAAATACTACCGTATGGCCCACCATAATTACCCCCA...
1,1,4,44.20,"[1, 141, 169, 310, 324, 368, 511]",ATGAACGAAAATCTGTTCGCTTCATTCATTGCCCCCACAATCCTAG...
2,2,3,43.12,"[1, 32, 64, 89, 107, 113, 175, 224, 250, 290, ...",ATGTGTGGCATTTGGGCGCTGTTTGGCAGTGATGATTGCCTTTCTG...
3,3,3,41.79,"[1, 32, 64, 89, 107, 113, 175, 224, 250, 290, ...",ATGTGTGGCATTTGGGCGCTGTTTGGCAGTGATGATTGCCTTTCTG...
4,4,3,42.73,"[1, 41, 65, 97, 104, 170, 196, 206, 222, 305, ...",ATGCAACAGCATTTTGAATTTGAATACCAGACCAAAGTGGATGGTG...
...,...,...,...,...,...
4375,4375,0,29.82,"[1, 50]",
4376,4376,6,52.10,"[1, 109, 205, 274, 353, 389, 404, 631, 779, 78...",ATGCAGTCCTTTCGGGAGCAAAGCAGTTACCACGGAAACCAGCAAA...
4377,4377,6,51.88,"[1, 109, 205, 274, 353, 389, 404, 631, 779, 78...",ATGCAGTCCTTTCGGGAGCAAAGCAGTTACCACGGAAACCAGCAAA...
4378,4378,6,55.11,"[1, 52, 89, 136, 176, 452, 467, 491, 521, 556,...",ATGGGGCACCTGGTTTGCTGTCTGTGTGGCAAGTGGGCCAGTTACC...
