In [2]:
import csv

def convert_ptt_to_csv(ptt_file, csv_file):
    # Open the PTT file for reading
    with open(ptt_file, 'r') as ptt_file:
        # Read all lines from the PTT file
        ptt_lines = ptt_file.readlines()

    # Open the CSV file for writing
    with open(csv_file, 'w', newline='') as csv_file:
        # Create a CSV writer object
        writer = csv.writer(csv_file, delimiter=',')

        # Write the header row
        writer.writerow(['Location', 'Strand', 'Length', 'PID', 'Gene', 'Synonym', 'Code', 'COG', 'Product'])

        # Iterate through each line in the PTT file
        for line in ptt_lines[3:]:  # Skip header lines
            # Split the line by tab (\t) to extract the gene information
            gene_info = line.strip().split('\t')
            
            # Write the extracted information to the CSV file
            writer.writerow(gene_info[:-1])  # Exclude the last empty field

    print(f"Conversion complete. CSV file saved as {csv_file}")

ptt_input_file = 'E_coli_K12_MG1655.ptt'  
csv_output_file = 'Ecoli_i.csv'  

convert_ptt_to_csv(ptt_input_file, csv_output_file)


Conversion complete. CSV file saved as <_io.TextIOWrapper name='Ecoli_i.csv' mode='w' encoding='utf-8'>


In [3]:
import pandas as pd 
df_1 = pd.read_csv('Ecoli_i.csv')
columns_to_drop = ['Gene', 'Synonym' , 'Code', 'COG', 'Product']  # Replace 'Column1' and 'Column2' with the actual column names you want to drop
df_1.drop(columns=columns_to_drop, inplace=True)

# Save the modified DataFrame to a new CSV file
df_1.to_csv('Ecoli.csv', index=False)

In [4]:
df_1

Unnamed: 0,Location,Strand,Length,PID
0,190..255,+,21,16127995
1,337..2799,+,820,16127996
2,2801..3733,+,310,16127997
3,3734..5020,+,428,16127998
4,5234..5530,+,98,16127999
...,...,...,...,...
4141,4634719..4636143,+,474,16132216
4142,4636201..4637553,+,450,16132217
4143,4637613..4638329,-,238,16132218
4144,4638425..4638565,+,46,16132219


In [5]:
element = df_1.iloc[0, 0]  # Row 0, Column 1

print(element)
split_parts = element.split('..')

first_part = split_parts[0]
second_part = split_parts[1]

print(first_part)  # Output: '190'
print(second_part) 

190..255
190
255


In [6]:
column_shape = df_1[['Location']].shape

print(f"The shape of column '{'Location'}' is {column_shape}")


The shape of column 'Location' is (4146, 1)


In [7]:
p1 = {}
p2 = {}

for i in range(len(df_1)):
    if '..' in df_1['Location'].loc[i]:
        p1[df_1['PID'].loc[i]] = df_1['Location'].loc[i].split('..')[0]
        p2[df_1['PID'].loc[i]] = df_1['Location'].loc[i].split('..')[1]

df_1['start'] = df_1['PID'].map(p1)
df_1['end'] = df_1['PID'].map(p2)

In [8]:
df_1

Unnamed: 0,Location,Strand,Length,PID,start,end
0,190..255,+,21,16127995,190,255
1,337..2799,+,820,16127996,337,2799
2,2801..3733,+,310,16127997,2801,3733
3,3734..5020,+,428,16127998,3734,5020
4,5234..5530,+,98,16127999,5234,5530
...,...,...,...,...,...,...
4141,4634719..4636143,+,474,16132216,4634719,4636143
4142,4636201..4637553,+,450,16132217,4636201,4637553
4143,4637613..4638329,-,238,16132218,4637613,4638329
4144,4638425..4638565,+,46,16132219,4638425,4638565


In [9]:
df_1.to_csv('Ecoli.csv', index=False)

In [10]:
df = pd.read_csv('Ecoli.csv')

In [11]:
operons = []
current_operon = []

# Iterate through the sorted DataFrame
for index, row in df.iterrows():
    if not current_operon:  # If current_operon is empty, add the gene to start a new operon
        current_operon.append(row)
    else:
        last_gene = current_operon[-1]  # Get the last gene in the current operon
        distance = row['start'] - last_gene['end']
        if row['Strand'] == last_gene['Strand'] and distance <= 50:
            current_operon.append(row)  # Add the gene to the current operon
        else:
            operons.append(current_operon)  # Finish the current operon and start a new one
            current_operon = [row]

# Add the last operon (if any) to the list of operons
if current_operon:
    operons.append(current_operon)

# Print the operons
for i, operon in enumerate(operons, start=1):
    print(f'Operon {i}:')
    for gene in operon:
        print(gene)
    print('----------------------')

Operon 1:
Location    190..255
Strand             +
Length            21
PID         16127995
start            190
end              255
Name: 0, dtype: object
----------------------
Operon 2:
Location    337..2799
Strand              +
Length            820
PID          16127996
start             337
end              2799
Name: 1, dtype: object
Location    2801..3733
Strand               +
Length             310
PID           16127997
start             2801
end               3733
Name: 2, dtype: object
Location    3734..5020
Strand               +
Length             428
PID           16127998
start             3734
end               5020
Name: 3, dtype: object
----------------------
Operon 3:
Location    5234..5530
Strand               +
Length              98
PID           16127999
start             5234
end               5530
Name: 4, dtype: object
----------------------
Operon 4:
Location    5683..6459
Strand               -
Length             258
PID           16128000
start       

In [12]:
with open('Ecoli_operons.txt', 'w') as file:
    for i, operon in enumerate(operons, start=1):
        file.write(f'Operon {i}:\n')
        for gene in operon:
            file.write(str(gene) + '\n')
        file.write('----------------------\n')

print('Operons written to operons_output.txt file.')

Operons written to operons_output.txt file.


Now let us predict operons for Bacillus Subtilis

In [13]:
ptt_input_file_bs = 'B_subtilis_168.ptt'  
csv_output_file_bs = 'B_subtillis_input.csv'  

convert_ptt_to_csv(ptt_input_file_bs, csv_output_file_bs)

Conversion complete. CSV file saved as <_io.TextIOWrapper name='B_subtillis_input.csv' mode='w' encoding='utf-8'>


In [14]:
df_2 = pd.read_csv('B_subtillis_input.csv')
columns_to_drop = ['Gene', 'Synonym' , 'Code', 'COG', 'Product']  # Replace 'Column1' and 'Column2' with the actual column names you want to drop
df_2.drop(columns=columns_to_drop, inplace=True)

# Save the modified DataFrame to a new CSV file
df_2.to_csv('B_subtillis.csv', index=False)
df_2

Unnamed: 0,Location,Strand,Length,PID
0,410..1750,+,446,16077069
1,1939..3075,+,378,16077070
2,3206..3421,+,71,16077071
3,3437..4549,+,370,16077072
4,4567..4812,+,81,255767014
...,...,...,...,...
4171,4211510..4212889,-,459,16081154
4172,4213200..4213826,-,208,16081155
4173,4213823..4214608,-,261,16081156
4174,4214753..4215103,-,116,16081157


In [15]:
element = df_2.iloc[0, 0]  

print(element)
split_parts = element.split('..')

first_part = split_parts[0]
second_part = split_parts[1]

print(first_part)  
print(second_part) 


410..1750
410
1750


In [16]:
column_shape = df_2[['Location']].shape

print(f"The shape of column '{'Location'}' is {column_shape}")


The shape of column 'Location' is (4176, 1)


In [17]:
q1 = []
q2 = []

for i in range(len(df_2)):
    if '..' in df_2['Location'].loc[i]:
        q1.append(df_2['Location'].loc[i].split('..')[0])
        q2.append(df_2['Location'].loc[i].split('..')[1])

df_2['start'] = q1
df_2['end'] = q2

df_2['start'] = pd.to_numeric(df_2['start'])
df_2['end'] = pd.to_numeric(df_2['end'])



In [18]:
df_2

Unnamed: 0,Location,Strand,Length,PID,start,end
0,410..1750,+,446,16077069,410,1750
1,1939..3075,+,378,16077070,1939,3075
2,3206..3421,+,71,16077071,3206,3421
3,3437..4549,+,370,16077072,3437,4549
4,4567..4812,+,81,255767014,4567,4812
...,...,...,...,...,...,...
4171,4211510..4212889,-,459,16081154,4211510,4212889
4172,4213200..4213826,-,208,16081155,4213200,4213826
4173,4213823..4214608,-,261,16081156,4213823,4214608
4174,4214753..4215103,-,116,16081157,4214753,4215103


In [19]:
df_2.to_csv('Bsubtillis.csv', index=False)
df_2

Unnamed: 0,Location,Strand,Length,PID,start,end
0,410..1750,+,446,16077069,410,1750
1,1939..3075,+,378,16077070,1939,3075
2,3206..3421,+,71,16077071,3206,3421
3,3437..4549,+,370,16077072,3437,4549
4,4567..4812,+,81,255767014,4567,4812
...,...,...,...,...,...,...
4171,4211510..4212889,-,459,16081154,4211510,4212889
4172,4213200..4213826,-,208,16081155,4213200,4213826
4173,4213823..4214608,-,261,16081156,4213823,4214608
4174,4214753..4215103,-,116,16081157,4214753,4215103


In [20]:
operons = []
current_operon = []

# Iterate through the sorted DataFrame
for index, row in df_2.iterrows():
    if not current_operon:  # If current_operon is empty, add the gene to start a new operon
        current_operon.append(row)
    else:
        last_gene = current_operon[-1]  # Get the last gene in the current operon
        distance = row['start'] - last_gene['end']
        if row['Strand'] == last_gene['Strand'] and distance <= 50:
            current_operon.append(row)  # Add the gene to the current operon
        else:
            operons.append(current_operon)  # Finish the current operon and start a new one
            current_operon = [row]

# Add the last operon (if any) to the list of operons
if current_operon:
    operons.append(current_operon)

# Print the operons
for i, operon in enumerate(operons, start=1):
    print(f'Operon {i}:')
    for gene in operon:
        print(gene)
    print('----------------------')

Operon 1:
Location    410..1750
Strand              +
Length            446
PID          16077069
start             410
end              1750
Name: 0, dtype: object
----------------------
Operon 2:
Location    1939..3075
Strand               +
Length             378
PID           16077070
start             1939
end               3075
Name: 1, dtype: object
----------------------
Operon 3:
Location    3206..3421
Strand               +
Length              71
PID           16077071
start             3206
end               3421
Name: 2, dtype: object
Location    3437..4549
Strand               +
Length             370
PID           16077072
start             3437
end               4549
Name: 3, dtype: object
Location    4567..4812
Strand               +
Length              81
PID          255767014
start             4567
end               4812
Name: 4, dtype: object
----------------------
Operon 4:
Location    4867..6783
Strand               +
Length             638
PID           16077074


In [21]:
with open('B_Subtilis_operons.txt', 'w') as file:
    for i, operon in enumerate(operons, start=1):
        file.write(f'Operon {i}:\n')
        for gene in operon:
            file.write(str(gene) + '\n')
        file.write('----------------------\n')

print('Operons written to operons_output.txt file.')

Operons written to operons_output.txt file.


Now for Halobacterium_NRC1

In [22]:
ptt_input_file_bs = 'Halobacterium_NRC1.ptt'  
csv_output_file_bs = 'Halobacterium_input.csv'  

convert_ptt_to_csv(ptt_input_file_bs, csv_output_file_bs)

Conversion complete. CSV file saved as <_io.TextIOWrapper name='Halobacterium_input.csv' mode='w' encoding='utf-8'>


In [23]:
df_3 = pd.read_csv('Halobacterium_input.csv')
columns_to_drop = ['Gene', 'Synonym' , 'Code', 'COG', 'Product']  # Replace 'Column1' and 'Column2' with the actual column names you want to drop
df_3.drop(columns=columns_to_drop, inplace=True)

# Save the modified DataFrame to a new CSV file
df_3.to_csv('Halobacterium.csv', index=False)
df_3

Unnamed: 0,Location,Strand,Length,PID
0,248..1453,+,401,15789341
1,1450..2115,+,221,15789342
2,2145..3254,+,369,15789343
3,3322..5643,+,773,15789344
4,5646..7451,-,601,15789345
...,...,...,...,...
2070,2006446..2006856,-,136,15791394
2071,2006920..2007438,-,172,15791395
2072,2007840..2009699,+,619,15791396
2073,2009709..2011541,-,610,15791397


In [24]:
q1 = []
q2 = []

for i in range(len(df_3)):
    if '..' in df_3['Location'].loc[i]:
        q1.append(df_3['Location'].loc[i].split('..')[0])
        q2.append(df_3['Location'].loc[i].split('..')[1])

df_3['start'] = q1
df_3['end'] = q2

df_3['start'] = pd.to_numeric(df_3['start'])
df_3['end'] = pd.to_numeric(df_3['end'])
df_3



Unnamed: 0,Location,Strand,Length,PID,start,end
0,248..1453,+,401,15789341,248,1453
1,1450..2115,+,221,15789342,1450,2115
2,2145..3254,+,369,15789343,2145,3254
3,3322..5643,+,773,15789344,3322,5643
4,5646..7451,-,601,15789345,5646,7451
...,...,...,...,...,...,...
2070,2006446..2006856,-,136,15791394,2006446,2006856
2071,2006920..2007438,-,172,15791395,2006920,2007438
2072,2007840..2009699,+,619,15791396,2007840,2009699
2073,2009709..2011541,-,610,15791397,2009709,2011541


In [25]:
df_3.to_csv('Halobact.csv', index=False)
df_3

Unnamed: 0,Location,Strand,Length,PID,start,end
0,248..1453,+,401,15789341,248,1453
1,1450..2115,+,221,15789342,1450,2115
2,2145..3254,+,369,15789343,2145,3254
3,3322..5643,+,773,15789344,3322,5643
4,5646..7451,-,601,15789345,5646,7451
...,...,...,...,...,...,...
2070,2006446..2006856,-,136,15791394,2006446,2006856
2071,2006920..2007438,-,172,15791395,2006920,2007438
2072,2007840..2009699,+,619,15791396,2007840,2009699
2073,2009709..2011541,-,610,15791397,2009709,2011541


In [26]:
operons = []
current_operon = []

# Iterate through the sorted DataFrame
for index, row in df_3.iterrows():
    if not current_operon:  # If current_operon is empty, add the gene to start a new operon
        current_operon.append(row)
    else:
        last_gene = current_operon[-1]  # Get the last gene in the current operon
        distance = row['start'] - last_gene['end']
        if row['Strand'] == last_gene['Strand'] and distance <= 50:
            current_operon.append(row)  # Add the gene to the current operon
        else:
            operons.append(current_operon)  # Finish the current operon and start a new one
            current_operon = [row]

# Add the last operon (if any) to the list of operons
if current_operon:
    operons.append(current_operon)

# Print the operons
for i, operon in enumerate(operons, start=1):
    print(f'Operon {i}:')
    for gene in operon:
        print(gene)
    print('----------------------')

Operon 1:
Location    248..1453
Strand              +
Length            401
PID          15789341
start             248
end              1453
Name: 0, dtype: object
Location    1450..2115
Strand               +
Length             221
PID           15789342
start             1450
end               2115
Name: 1, dtype: object
Location    2145..3254
Strand               +
Length             369
PID           15789343
start             2145
end               3254
Name: 2, dtype: object
----------------------
Operon 2:
Location    3322..5643
Strand               +
Length             773
PID           15789344
start             3322
end               5643
Name: 3, dtype: object
----------------------
Operon 3:
Location    5646..7451
Strand               -
Length             601
PID           15789345
start             5646
end               7451
Name: 4, dtype: object
Location    7454..8641
Strand               -
Length             395
PID           15789346
start             7454
end       

In [27]:
with open('Halobacterium_operons.txt', 'w') as file:
    for i, operon in enumerate(operons, start=1):
        file.write(f'Operon {i}:\n')
        for gene in operon:
            file.write(str(gene) + '\n')
        file.write('----------------------\n')

print('Operons written to Halobacterium_operons.txt file.')

Operons written to Halobacterium_operons.txt file.


Now for Synechocystis_PCC6803_uid159873

In [28]:
ptt_input_file_bs = 'Synechocystis_PCC6803_uid159873.ptt'  
csv_output_file_bs = 'Synechocystis.csv'  

convert_ptt_to_csv(ptt_input_file_bs, csv_output_file_bs)

Conversion complete. CSV file saved as <_io.TextIOWrapper name='Synechocystis.csv' mode='w' encoding='utf-8'>


In [29]:
df_4 = pd.read_csv('Synechocystis.csv')
columns_to_drop = ['Gene', 'Synonym' , 'Code', 'COG', 'Product']  # Replace 'Column1' and 'Column2' with the actual column names you want to drop
df_4.drop(columns=columns_to_drop, inplace=True)

# Save the modified DataFrame to a new CSV file
df_4.to_csv('B_subtillis.csv', index=False)
df_4

Unnamed: 0,Location,Strand,Length,PID
0,937..1494,+,185,384435231
1,1577..2098,+,173,384435232
2,2172..2873,-,233,384435233
3,3192..4268,-,358,384435234
4,4451..5389,-,312,384435235
...,...,...,...,...
3165,3566767..3567156,+,129,384438396
3166,3567305..3567952,+,215,384438397
3167,3568057..3569208,+,383,384438398
3168,3569344..3570036,+,230,384438399


In [30]:
q1 = []
q2 = []

for i in range(len(df_4)):
    if '..' in df_4['Location'].loc[i]:
        q1.append(df_4['Location'].loc[i].split('..')[0])
        q2.append(df_4['Location'].loc[i].split('..')[1])

df_4['start'] = q1
df_4['end'] = q2

df_4['start'] = pd.to_numeric(df_4['start'])
df_4['end'] = pd.to_numeric(df_4['end'])



In [31]:
df_4

Unnamed: 0,Location,Strand,Length,PID,start,end
0,937..1494,+,185,384435231,937,1494
1,1577..2098,+,173,384435232,1577,2098
2,2172..2873,-,233,384435233,2172,2873
3,3192..4268,-,358,384435234,3192,4268
4,4451..5389,-,312,384435235,4451,5389
...,...,...,...,...,...,...
3165,3566767..3567156,+,129,384438396,3566767,3567156
3166,3567305..3567952,+,215,384438397,3567305,3567952
3167,3568057..3569208,+,383,384438398,3568057,3569208
3168,3569344..3570036,+,230,384438399,3569344,3570036


In [32]:
df_4.to_csv('Synechocystis.csv', index=False)
df_4

Unnamed: 0,Location,Strand,Length,PID,start,end
0,937..1494,+,185,384435231,937,1494
1,1577..2098,+,173,384435232,1577,2098
2,2172..2873,-,233,384435233,2172,2873
3,3192..4268,-,358,384435234,3192,4268
4,4451..5389,-,312,384435235,4451,5389
...,...,...,...,...,...,...
3165,3566767..3567156,+,129,384438396,3566767,3567156
3166,3567305..3567952,+,215,384438397,3567305,3567952
3167,3568057..3569208,+,383,384438398,3568057,3569208
3168,3569344..3570036,+,230,384438399,3569344,3570036


In [33]:
operons = []
current_operon = []

# Iterate through the sorted DataFrame
for index, row in df_2.iterrows():
    if not current_operon:  # If current_operon is empty, add the gene to start a new operon
        current_operon.append(row)
    else:
        last_gene = current_operon[-1]  # Get the last gene in the current operon
        distance = row['start'] - last_gene['end']
        if row['Strand'] == last_gene['Strand'] and distance <= 50:
            current_operon.append(row)  # Add the gene to the current operon
        else:
            operons.append(current_operon)  # Finish the current operon and start a new one
            current_operon = [row]

# Add the last operon (if any) to the list of operons
if current_operon:
    operons.append(current_operon)

# Print the operons
for i, operon in enumerate(operons, start=1):
    print(f'Operon {i}:')
    for gene in operon:
        print(gene)
    print('----------------------')

Operon 1:
Location    410..1750
Strand              +
Length            446
PID          16077069
start             410
end              1750
Name: 0, dtype: object
----------------------
Operon 2:
Location    1939..3075
Strand               +
Length             378
PID           16077070
start             1939
end               3075
Name: 1, dtype: object
----------------------
Operon 3:
Location    3206..3421
Strand               +
Length              71
PID           16077071
start             3206
end               3421
Name: 2, dtype: object
Location    3437..4549
Strand               +
Length             370
PID           16077072
start             3437
end               4549
Name: 3, dtype: object
Location    4567..4812
Strand               +
Length              81
PID          255767014
start             4567
end               4812
Name: 4, dtype: object
----------------------
Operon 4:
Location    4867..6783
Strand               +
Length             638
PID           16077074


In [34]:
with open('Synechocystis.txt', 'w') as file:
    for i, operon in enumerate(operons, start=1):
        file.write(f'Operon {i}:\n')
        for gene in operon:
            file.write(str(gene) + '\n')
        file.write('----------------------\n')

print('Operons written to Synechocystis.txt file.')

Operons written to Synechocystis.txt file.


Crop microbiome

In [35]:
def convert_gff_to_csv(gff_file, csv_file):
    # Open the PTT file for reading
    with open(gff_file, 'r') as gff_file:
        # Read all lines from the PTT file
        gff_lines = gff_file.readlines()

    # Open the CSV file for writing
    with open(csv_file, 'w', newline='') as csv_file:
        # Create a CSV writer object
        writer = csv.writer(csv_file, delimiter=',')

        # Write the header row
        writer.writerow(['Contig', 'Img-mer', 'CDS', 'Start', 'End', '.', 'Strand', '0', 'ID'])

        # Iterate through each line in the PTT file
        for line in gff_lines[3:]:  # Skip header lines
            # Split the line by tab (\t) to extract the gene information
            gene_info = line.strip().split('\t')
            
            # Write the extracted information to the CSV file
            writer.writerow(gene_info[:-1])  # Exclude the last empty field

    print(f"Conversion complete. CSV file saved as {csv_file}")

In [36]:
gff_input_file_bs = '2088090036.gff'  
csv_output_file_bs = 'microbiome.csv'  

convert_gff_to_csv(gff_input_file_bs, csv_output_file_bs)

Conversion complete. CSV file saved as <_io.TextIOWrapper name='microbiome.csv' mode='w' encoding='utf-8'>


In [37]:
df_microbiome = pd.read_csv('microbiome.csv')

In [38]:
columns_to_drop = ['Img-mer', 'CDS' , '.', '0', 'ID']
df_microbiome.drop(columns=columns_to_drop, inplace=True)
df_microbiome.to_csv('Crop_microbiome.csv', index=False)
df_microbiome

Unnamed: 0,Contig,Start,End,Strand
0,HCP21_5716,41,313,-
1,HCP21_5491,232,309,-
2,HCP21_5089,81,530,+
3,HCP21_5196,1,312,+
4,HCP21_6312,89,592,-
...,...,...,...,...
23902,HCP21_2_152357,4,105,+
23903,HCP21_2_153857,3,110,-
23904,HCP21_2_156357,1,120,+
23905,HCP21_2_158857,2,118,+


In [39]:
type(df_microbiome['Start'])

pandas.core.series.Series

In [41]:
operons = []
current_operon = []

# Iterate through the sorted DataFrame
for index, row in df_microbiome.iterrows():
    if not current_operon:  # If current_operon is empty, add the gene to start a new operon
        current_operon.append(row)
    else:
        last_gene = current_operon[-1]  # Get the last gene in the current operon
        distance = row['Start'] - last_gene['End']
        if row['Strand'] == last_gene['Strand'] and distance <= 50:
            current_operon.append(row)  # Add the gene to the current operon
        else:
            operons.append(current_operon)  # Finish the current operon and start a new one
            current_operon = [row]

# Add the last operon (if any) to the list of operons
if current_operon:
    operons.append(current_operon)

# Print the operons
for i, operon in enumerate(operons, start=1):
    print(f'Operon {i}:')
    for gene in operon:
        print(gene)
    print('----------------------')

Operon 1:
Contig    HCP21_5716
Start             41
End              313
Strand             -
Name: 0, dtype: object
Contig    HCP21_5491
Start            232
End              309
Strand             -
Name: 1, dtype: object
----------------------
Operon 2:
Contig    HCP21_5089
Start             81
End              530
Strand             +
Name: 2, dtype: object
Contig    HCP21_5196
Start              1
End              312
Strand             +
Name: 3, dtype: object
----------------------
Operon 3:
Contig    HCP21_6312
Start             89
End              592
Strand             -
Name: 4, dtype: object
----------------------
Operon 4:
Contig    HCP21_10684
Start              77
End               361
Strand              +
Name: 5, dtype: object
Contig    HCP21_9072
Start              3
End              320
Strand             +
Name: 6, dtype: object
----------------------
Operon 5:
Contig    HCP21_3159
Start             58
End              879
Strand             -
Name: 7, dtype: objec

In [42]:
with open('Microbiome.txt', 'w') as file:
    for i, operon in enumerate(operons, start=1):
        file.write(f'Operon {i}:\n')
        for gene in operon:
            file.write(str(gene) + '\n')
        file.write('----------------------\n')

print('Operons written to Microbiome.txt file.')

Operons written to Microbiome.txt file.
