# to extract a specific column from a TSV (Tab-Separated Values) file, along with the rows that contain it, and then write these rows into separate TXT files

In [1]:
import csv

# Function to extract the "Orthogroup" column and save to a TXT file
def extract_orthogroup(input_tsv, output_txt):
    # Open the input TSV file
    with open(input_tsv, 'r', newline='', encoding='utf-8') as tsvfile:
        reader = csv.reader(tsvfile, delimiter='\t')  # TSV uses tab as delimiter
        
        # Open the output TXT file
        with open(output_txt, 'w', encoding='utf-8') as outfile:
            # Write the "Orthogroup" column to the TXT file
            for row in reader:
                orthogroup = row[0]  # The first column (index 0)
                outfile.write(orthogroup + '\n')  # Write to the output TXT file

# Example usage
input_tsv = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_orthogroups_single_copy_per_species_original_2.tsv"
output_txt = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_singlecopyorthogroups.txt"  # Output TXT file to save the Orthogroup column

extract_orthogroup(input_tsv, output_txt)


In [3]:
import csv

# Function to convert TSV to a TXT with custom delimiter
def convert_tsv_to_txt(input_tsv, output_txt, delimiter='\t'):
    with open(input_tsv, 'r', newline='', encoding='utf-8') as tsvfile:
        reader = csv.reader(tsvfile, delimiter='\t')
        with open(output_txt, 'w', newline='', encoding='utf-8') as txtfile:
            writer = csv.writer(txtfile, delimiter=delimiter)
            for row in reader:
                writer.writerow(row)

# Example usage
input_tsv = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_orthogroups_single_copy_per_species_original_2.tsv"
output_txt = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_orthogroups_single_copy_per_species_original_2.txt"

# Convert TSV to TXT with comma-separated values
convert_tsv_to_txt(input_tsv, output_txt, delimiter=' ')


In [None]:
# Function to clean the text file by removing quotation marks and commas
def clean_txt(input_txt, output_txt):
    with open(input_txt, 'r', encoding='utf-8') as infile:
        content = infile.read()  # Read the whole content
    
    # Remove all double quotes and commas
    cleaned_content = content.replace('"', '').replace(',', ' ')
    
    with open(output_txt, 'w', encoding='utf-8') as outfile:
        outfile.write(cleaned_content)  # Write the cleaned content

# Example usage
input_txt = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_Orthogroups_31_species.txt"  # The TXT file to be cleaned
output_txt = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_Orthogroups_31_species_cleaned.txt"  # Name of the cleaned output TXT file

clean_txt(input_txt, output_txt)


In [None]:
# Function to add a colon at the end of each orthogroup
def add_colon(input_txt, output_txt):
    with open(input_txt, 'r', encoding='utf-8') as infile:
        lines = infile.readlines()  # Read all lines in the file
    
    # Open the output TXT file
    with open(output_txt, 'w', encoding='utf-8') as outfile:
        for line in lines:
            # Split the line to isolate the orthogroup
            split_line = line.strip().split()
            if split_line:
                # Add a colon to the orthogroup and rejoin the rest
                new_line = split_line[0] + ':'  # Add colon after orthogroup
                outfile.write(new_line + '\n')  # Write the modified line

# Example usage
input_txt = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_Orthogroups_31_species_cleaned.txt"  # The TXT file to be modified
output_txt = "/usr2/people/shollyt22/shollyt22/JGIsordariomycete/orthofinder_outputs/for_filtering_for_2_or_3_copies_orthogroups/filtered_Orthogroups_31_species_cleaned_1.txt"  # Name of the new output TXT file

add_colon(input_txt, output_txt)
