# Feature Preprocessing

## Append all Motifs Feature into Excel File

In [1]:
import pandas as pd
import os
import xml.etree.ElementTree as ET

### 1) Create a new empty excel file with the header

In [2]:
# Create a DataFrame with columns as sequence IDs ranging from sequence_0 to sequence_258
sequence_ids = [f"sequence_{i}" for i in range(258)]
df = pd.DataFrame(columns=sequence_ids)

# Insert a new column named "motifs" at the beginning of the DataFrame
df.insert(0, "motifs", '')

# Write the DataFrame to an Excel file
excel_file_path = "motifs.xlsx"
df.to_excel(excel_file_path, index=False)
print(f"Excel file with sequence headers created at: {excel_file_path}")

Excel file with sequence headers created at: motifs.xlsx


### 2) Loop through all XML files containing motif information, extract the motifs, and append them to a pre-created Excel file, applying a threshold of e-value < 0.05


In [3]:
def process_xml_file(file_path, sequence_headers):
    # Extract XML filename without extension
    xml_filename = os.path.splitext(os.path.basename(file_path))[0]

    # Load the XML file
    tree = ET.parse(file_path)
    root = tree.getroot()

    # Initialize a flag to check if any motif is found in the XML file
    motif_found = False

    # Loop through all motifs in the XML file
    for motif in root.findall('.//motif'):
        motif_id = motif.attrib.get('id')
        e_value = float(motif.attrib.get('e_value', 0))  # Convert e_value to float, default to 0 if not present

        # Check if e_value is smaller than 0.05
        if e_value < 0.05:
            # Find contributing sites under the current motif
            contributing_sites = motif.find('.//contributing_sites')
            if contributing_sites is not None:
                motif_found = True  # Set motif_found flag to True if at least one motif is found
                for site in contributing_sites.findall('.//contributing_site'):
                    sequence_id = site.attrib.get('sequence_id')
                    letter_ids = ''.join([letter.attrib.get('letter_id') for letter in site.findall('.//letter_ref')])

                    # Rename motif ID by adding XML filename prefix
                    motif_id_with_filename = f"{xml_filename}_{motif_id}"

                    # Append the letter IDs to the respective sequence ID in the DataFrame
                    if sequence_id in sequence_headers.columns:
                        sequence_headers.loc[motif_id_with_filename, sequence_id] = letter_ids

    # If no motif is found in the XML file, append "NO" under all sequence IDs
    if not motif_found:
        sequence_headers.loc[f"{xml_filename}_NO", :] = "NO"

# Load "sequence_headers.xlsx" file to get sequence IDs
sequence_headers = pd.read_excel("motifs.xlsx", index_col=0)

# Directory containing XML files
xml_directory = 'C:/Users/jingj/Desktop/MASTER/Paper/meme/xml'

# Loop through each XML file in the directory
for file_name in os.listdir(xml_directory):
    if file_name.endswith('.xml'):
        xml_file_path = os.path.join(xml_directory, file_name)
        # Process the XML file
        process_xml_file(xml_file_path, sequence_headers)

# Save the updated DataFrame to the same Excel file
sequence_headers.to_excel("motifs.xlsx")
print("Data appended to 'motifs.xlsx' successfully.")

Data appended to 'motifs.xlsx' successfully.


### 3) Transpose the dimensions and rename the headers accordingly to match the strain

In [5]:
# Header mapping dictionary
header_mapping = {
    'sequence_0': 'Agona_1',
    'sequence_1': 'Agona_10',
    'sequence_2': 'Agona_11',
    'sequence_3': 'Agona_12',
    'sequence_4': 'Agona_13',
    'sequence_5': 'Agona_14',
    'sequence_6': 'Agona_15',
    'sequence_7': 'Agona_2',
    'sequence_8': 'Agona_3',
    'sequence_9': 'Agona_4',
    'sequence_10': 'Agona_5',
    'sequence_11': 'Agona_6',
    'sequence_12': 'Agona_7',
    'sequence_13': 'Agona_8',
    'sequence_14': 'Agona_9',
    'sequence_15': 'Dublin_1',
    'sequence_16': 'Dublin_10',
    'sequence_17': 'Dublin_11',
    'sequence_18': 'Dublin_12',
    'sequence_19': 'Dublin_13',
    'sequence_20': 'Dublin_14',
    'sequence_21': 'Dublin_15',
    'sequence_22': 'Dublin_2',
    'sequence_23': 'Dublin_3',
    'sequence_24': 'Dublin_4',
    'sequence_25': 'Dublin_5',
    'sequence_26': 'Dublin_6',
    'sequence_27': 'Dublin_7',
    'sequence_28': 'Dublin_8',
    'sequence_29': 'Dublin_9',
    'sequence_30': 'Enteritidis_1',
    'sequence_31': 'Enteritidis_10',
    'sequence_32': 'Enteritidis_11',
    'sequence_33': 'Enteritidis_12',
    'sequence_34': 'Enteritidis_13',
    'sequence_35': 'Enteritidis_14',
    'sequence_36': 'Enteritidis_15',
    'sequence_37': 'Enteritidis_2',
    'sequence_38': 'Enteritidis_3',
    'sequence_39': 'Enteritidis_4',
    'sequence_40': 'Enteritidis_5',
    'sequence_41': 'Enteritidis_6',
    'sequence_42': 'Enteritidis_7',
    'sequence_43': 'Enteritidis_8',
    'sequence_44': 'Enteritidis_9',
    'sequence_45': 'Heidelberg_1',
    'sequence_46': 'Heidelberg_10',
    'sequence_47': 'Heidelberg_11',
    'sequence_48': 'Heidelberg_12',
    'sequence_49': 'Heidelberg_13',
    'sequence_50': 'Heidelberg_14',
    'sequence_51': 'Heidelberg_15',
    'sequence_52': 'Heidelberg_2',
    'sequence_53': 'Heidelberg_3',
    'sequence_54': 'Heidelberg_4',
    'sequence_55': 'Heidelberg_5',
    'sequence_56': 'Heidelberg_6',
    'sequence_57': 'Heidelberg_7',
    'sequence_58': 'Heidelberg_8',
    'sequence_59': 'Heidelberg_9',
    'sequence_60': 'Infantis_1',
    'sequence_61': 'Infantis_10',
    'sequence_62': 'Infantis_11',
    'sequence_63': 'Infantis_12',
    'sequence_64': 'Infantis_13',
    'sequence_65': 'Infantis_14',
    'sequence_66': 'Infantis_15',
    'sequence_67': 'Infantis_2',
    'sequence_68': 'Infantis_3',
    'sequence_69': 'Infantis_4',
    'sequence_70': 'Infantis_5',
    'sequence_71': 'Infantis_6',
    'sequence_72': 'Infantis_7',
    'sequence_73': 'Infantis_8',
    'sequence_74': 'Infantis_9',
    'sequence_75': 'Montevideo_1',
    'sequence_76': 'Montevideo_10',
    'sequence_77': 'Montevideo_11',
    'sequence_78': 'Montevideo_12',
    'sequence_79': 'Montevideo_13',
    'sequence_80': 'Montevideo_14',
    'sequence_81': 'Montevideo_15',
    'sequence_82': 'Montevideo_2',
    'sequence_83': 'Montevideo_3',
    'sequence_84': 'Montevideo_4',
    'sequence_85': 'Montevideo_5',
    'sequence_86': 'Montevideo_6',
    'sequence_87': 'Montevideo_7',
    'sequence_88': 'Montevideo_8',
    'sequence_89': 'Montevideo_9',
    'sequence_90': 'Newport_1',
    'sequence_91': 'Newport_10',
    'sequence_92': 'Newport_11',
    'sequence_93': 'Newport_12',
    'sequence_94': 'Newport_13',
    'sequence_95': 'Newport_14',
    'sequence_96': 'Newport_15',
    'sequence_97': 'Newport_2',
    'sequence_98': 'Newport_3',
    'sequence_99': 'Newport_4',
    'sequence_100': 'Newport_5',
    'sequence_101': 'Newport_6',
    'sequence_102': 'Newport_7',
    'sequence_103': 'Newport_8',
    'sequence_104': 'Newport_9',
    'sequence_105': 'Schwarzengrund_1',
    'sequence_106': 'Schwarzengrund_10',
    'sequence_107': 'Schwarzengrund_11',
    'sequence_108': 'Schwarzengrund_12',
    'sequence_109': 'Schwarzengrund_13',
    'sequence_110': 'Schwarzengrund_14',
    'sequence_111': 'Schwarzengrund_15',
    'sequence_112': 'Schwarzengrund_2',
    'sequence_113': 'Schwarzengrund_3',
    'sequence_114': 'Schwarzengrund_4',
    'sequence_115': 'Schwarzengrund_5',
    'sequence_116': 'Schwarzengrund_6',
    'sequence_117': 'Schwarzengrund_7',
    'sequence_118': 'Schwarzengrund_8',
    'sequence_119': 'Schwarzengrund_9',
    'sequence_120': 'Testing_10_Schwarzengrund',
    'sequence_121': 'Testing_11_enterica',
    'sequence_122': 'Testing_12_enterica',
    'sequence_123': 'Testing_13_enterica',
    'sequence_124': 'Testing_14_arizonae',
    'sequence_125': 'Testing_15_diarizonae',
    'sequence_126': 'Testing_16_houtenae',
    'sequence_127': 'Testing_17_indica',
    'sequence_128': 'Testing_18_salamae',
    'sequence_129': 'Testing_19_salamae',
    'sequence_130': 'Testing_1_Enteritidis',
    'sequence_131': 'Testing_2_Typhimurium',
    'sequence_132': 'Testing_3_Newport',
    'sequence_133': 'Testing_4_Typhi',
    'sequence_134': 'Testing_5_Infantis',
    'sequence_135': 'Testing_6_Agona',
    'sequence_136': 'Testing_7_Heidelberg',
    'sequence_137': 'Testing_8_Dublin',
    'sequence_138': 'Typhi_1',
    'sequence_139': 'Typhi_10',
    'sequence_140': 'Typhi_11',
    'sequence_141': 'Typhi_12',
    'sequence_142': 'Typhi_13',
    'sequence_143': 'Typhi_14',
    'sequence_144': 'Typhi_15',
    'sequence_145': 'Typhi_2',
    'sequence_146': 'Typhi_3',
    'sequence_147': 'Typhi_4',
    'sequence_148': 'Typhi_5',
    'sequence_149': 'Typhi_6',
    'sequence_150': 'Typhi_7',
    'sequence_151': 'Typhi_8',
    'sequence_152': 'Typhi_9',
    'sequence_153': 'Typhimurium_1',
    'sequence_154': 'Typhimurium_10',
    'sequence_155': 'Typhimurium_11',
    'sequence_156': 'Typhimurium_12',
    'sequence_157': 'Typhimurium_13',
    'sequence_158': 'Typhimurium_14',
    'sequence_159': 'Typhimurium_15',
    'sequence_160': 'Typhimurium_2',
    'sequence_161': 'Typhimurium_3',
    'sequence_162': 'Typhimurium_4',
    'sequence_163': 'Typhimurium_5',
    'sequence_164': 'Typhimurium_6',
    'sequence_165': 'Typhimurium_7',
    'sequence_166': 'Typhimurium_8',
    'sequence_167': 'Typhimurium_9',
    'sequence_168': 'arizonae_1',
    'sequence_169': 'arizonae_10',
    'sequence_170': 'arizonae_11',
    'sequence_171': 'arizonae_12',
    'sequence_172': 'arizonae_13',
    'sequence_173': 'arizonae_14',
    'sequence_174': 'arizonae_15',
    'sequence_175': 'arizonae_2',
    'sequence_176': 'arizonae_3',
    'sequence_177': 'arizonae_4',
    'sequence_178': 'arizonae_5',
    'sequence_179': 'arizonae_6',
    'sequence_180': 'arizonae_7',
    'sequence_181': 'arizonae_8',
    'sequence_182': 'arizonae_9',
    'sequence_183': 'diarizonae_1',
    'sequence_184': 'diarizonae_10',
    'sequence_185': 'diarizonae_11',
    'sequence_186': 'diarizonae_12',
    'sequence_187': 'diarizonae_13',
    'sequence_188': 'diarizonae_14',
    'sequence_189': 'diarizonae_15',
    'sequence_190': 'diarizonae_2',
    'sequence_191': 'diarizonae_3',
    'sequence_192': 'diarizonae_4',
    'sequence_193': 'diarizonae_5',
    'sequence_194': 'diarizonae_6',
    'sequence_195': 'diarizonae_7',
    'sequence_196': 'diarizonae_8',
    'sequence_197': 'diarizonae_9',
    'sequence_198': 'enterica_1',
    'sequence_199': 'enterica_10',
    'sequence_200': 'enterica_11',
    'sequence_201': 'enterica_12',
    'sequence_202': 'enterica_13',
    'sequence_203': 'enterica_14',
    'sequence_204': 'enterica_15',
    'sequence_205': 'enterica_2',
    'sequence_206': 'enterica_3',
    'sequence_207': 'enterica_4',
    'sequence_208': 'enterica_5',
    'sequence_209': 'enterica_6',
    'sequence_210': 'enterica_7',
    'sequence_211': 'enterica_8',
    'sequence_212': 'enterica_9',
    'sequence_213': 'houtenae_1',
    'sequence_214': 'houtenae_10',
    'sequence_215': 'houtenae_11',
    'sequence_216': 'houtenae_12',
    'sequence_217': 'houtenae_13',
    'sequence_218': 'houtenae_14',
    'sequence_219': 'houtenae_15',
    'sequence_220': 'houtenae_2',
    'sequence_221': 'houtenae_3',
    'sequence_222': 'houtenae_4',
    'sequence_223': 'houtenae_5',
    'sequence_224': 'houtenae_6',
    'sequence_225': 'houtenae_7',
    'sequence_226': 'houtenae_8',
    'sequence_227': 'houtenae_9',
    'sequence_228': 'indica_1',
    'sequence_229': 'indica_10',
    'sequence_230': 'indica_11',
    'sequence_231': 'indica_12',
    'sequence_232': 'indica_13',
    'sequence_233': 'indica_14',
    'sequence_234': 'indica_15',
    'sequence_235': 'indica_2',
    'sequence_236': 'indica_3',
    'sequence_237': 'indica_4',
    'sequence_238': 'indica_5',
    'sequence_239': 'indica_6',
    'sequence_240': 'indica_7',
    'sequence_241': 'indica_8',
    'sequence_242': 'indica_9',
    'sequence_243': 'salamae_1',
    'sequence_244': 'salamae_10',
    'sequence_245': 'salamae_11',
    'sequence_246': 'salamae_12',
    'sequence_247': 'salamae_13',
    'sequence_248': 'salamae_14',
    'sequence_249': 'salamae_15',
    'sequence_250': 'salamae_2',
    'sequence_251': 'salamae_3',
    'sequence_252': 'salamae_4',
    'sequence_253': 'salamae_5',
    'sequence_254': 'salamae_6',
    'sequence_255': 'salamae_7',
    'sequence_256': 'salamae_8',
    'sequence_257': 'salamae_9',
}


file_path = 'motifs.xlsx'

# Read the Excel file into a DataFrame
df = pd.read_excel(file_path)

# Renaming the column headers based on the header_mapping dictionary
df = df.rename(columns=header_mapping)

# Transpose the DataFrame to swap rows and columns
df = df.transpose()

# Reset index to set the first row as column headers
df.columns = df.iloc[0]

# Drop the first row since it's now the column headers
df = df.drop(df.index[0])

# Reset index to set the first column as row headers
df = df.reset_index()

# Rename the first column to 'Row Header'
df = df.rename(columns={'index': 'Row Header'})

# Display the DataFrame
print(df)

motifs Row Header                                  OG0001278_motif_1  \
0         Agona_1  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
1        Agona_10  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
2        Agona_11  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
3        Agona_12  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
4        Agona_13  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
..            ...                                                ...   
253     salamae_5  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
254     salamae_6  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
255     salamae_7  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
256     salamae_8  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
257     salamae_9  ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   

motifs                                  OG0001278_motif_2  \
0       ATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAG...   
1       ATGTG

### 4) Add a 'Subspecies' column to serve as the class label

In [6]:
# Extract subspecies from row header
df.insert(1, 'Subspecies', df['Row Header'].str.split('_').str[0])

# Display the DataFrame with the new 'Subspecies' column
print(df)

motifs Row Header Subspecies  \
0         Agona_1      Agona   
1        Agona_10      Agona   
2        Agona_11      Agona   
3        Agona_12      Agona   
4        Agona_13      Agona   
..            ...        ...   
253     salamae_5    salamae   
254     salamae_6    salamae   
255     salamae_7    salamae   
256     salamae_8    salamae   
257     salamae_9    salamae   

motifs                                  OG0001278_motif_1  \
0       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
1       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
2       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
3       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
4       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
..                                                    ...   
253     ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
254     ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
255     ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
256  

In [7]:
# View the DataFrame
print("DataFrame Header:")
print(df.head()) 

DataFrame Header:
motifs Row Header Subspecies  \
0         Agona_1      Agona   
1        Agona_10      Agona   
2        Agona_11      Agona   
3        Agona_12      Agona   
4        Agona_13      Agona   

motifs                                  OG0001278_motif_1  \
0       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
1       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
2       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
3       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   
4       ATGAAATTTACCGTTGAACGTGAACATTTATTAAAACCGCTTCAGC...   

motifs                                  OG0001278_motif_2  \
0       ATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAG...   
1       ATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAG...   
2       ATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAG...   
3       ATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAG...   
4       ATGTGCGCTACTACTTAAACGGTATGCTGTTTGAAACGGAAGGTAG...   

motifs                                  OG0001278_motif

In [9]:
print("DataFrame Dimensions:")
print(df.shape)   

DataFrame Dimensions:
(258, 7124)


In [11]:
print("Column Names:")
print(df.columns)

Column Names:
Index(['Row Header', 'Subspecies', 'OG0001278_motif_1', 'OG0001278_motif_2',
       'OG0001278_motif_3', 'OG0001278_motif_4', 'OG0001278_motif_5',
       'OG0001279_motif_1', 'OG0001279_motif_2', 'OG0001279_motif_3',
       ...
       'OG0002829_motif_1', 'OG0002829_motif_2', 'OG0002829_motif_3',
       'OG0002829_motif_4', 'OG0002829_motif_5', 'OG0002830_motif_1',
       'OG0002830_motif_2', 'OG0002830_motif_3', 'OG0002830_motif_4',
       'OG0002830_motif_5'],
      dtype='object', name='motifs', length=7124)


In [12]:
# Check for empty values in the entire DataFrame
empty_values = df.isnull().sum().sum()

if empty_values == 0:
    print("No empty values found in the DataFrame.")
else:
    print("There are {} empty values in the DataFrame.".format(empty_values))

There are 3722 empty values in the DataFrame.


In [13]:
df.to_excel("C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs2.xlsx", index=False)

### 5) Extract each motif feature from the column individually into separate files for motif alignment, ensuring that each file corresponds to a single motif column

In [14]:
output_directory = "C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs"

# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Iterate over each column (excluding the first two columns)
for column in df.columns[2:]:
    # Extract column header and sequences
    header = column
    sequences = df[column]

    # Create a list to store the sequences in FASTA format
    fasta_sequences = []

    # Iterate over rows and extract header and sequence
    for index, sequence in sequences.items():
        # Check if the sequence is empty or NaN
        if pd.isnull(sequence) or sequence == "":
            # If empty or NaN, set sequence to "N"
            sequence = "N"
        # Format the sequence as FASTA
        fasta_sequence = f">{df['Row Header'][index]}\n{sequence}"
        
        # Append to the list
        fasta_sequences.append(fasta_sequence)

    # Write the FASTA sequences to a file named after the column header in the output directory
    filename = os.path.join(output_directory, f"{header}.fasta")
    with open(filename, "w") as f:
        for fasta_sequence in fasta_sequences:
            f.write(fasta_sequence + "\n")

    print(f"Saved {filename}")

print("All sequences saved.")

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001278_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001278_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001278_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001278_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001278_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001279_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001279_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001279_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001279_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001308_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001308_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001309_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001309_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001309_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001309_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001309_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001310_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001310_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001333_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001333_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001334_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001334_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001334_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001334_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001334_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001335_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001335_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001358_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001358_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001358_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001358_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001359_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001359_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001359_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001359_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001359_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001380_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001381_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001381_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001381_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001381_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001381_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001382_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001382_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001382_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001404_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001405_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001405_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001405_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001405_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001405_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001406_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001406_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001406_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001429_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001430_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001430_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001430_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001430_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001430_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001431_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001431_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001431_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001452_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001452_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001453_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001453_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001453_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001453_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001453_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001454_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001454_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001477_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001478_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001478_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001478_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001478_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001478_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001479_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001479_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001479_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001502_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001503_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001503_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001503_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001503_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001503_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001504_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001504_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001504_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001526_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001526_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001526_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001526_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001526_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001527_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001527_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001527_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001527_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001550_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001551_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001551_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001551_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001551_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001551_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001552_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001552_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001552_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001572_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001572_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001573_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001573_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001573_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001573_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001573_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001574_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001574_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001595_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001595_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001595_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001595_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001595_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001596_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001596_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001596_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001596_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001619_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001620_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001620_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001620_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001620_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001620_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001621_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001621_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001621_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001642_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001642_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001642_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001643_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001643_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001643_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001644_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001644_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001644_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001668_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001668_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001669_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001669_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001669_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001669_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001669_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001670_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001670_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001691_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001691_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001691_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001691_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001692_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001692_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001692_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001692_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001692_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001716_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001716_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001716_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001716_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001717_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001717_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001717_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001717_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001717_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001739_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001739_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001740_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001740_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001740_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001740_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001740_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001741_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001741_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001762_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001762_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001762_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001762_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001763_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001763_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001763_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001763_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001763_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001784_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001784_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001784_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001784_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001785_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001785_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001785_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001785_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001785_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001809_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001809_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001809_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001809_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001809_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001810_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001810_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001810_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001810_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001831_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001832_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001832_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001832_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001832_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001832_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001833_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001833_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001833_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001855_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001855_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001855_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001855_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001856_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001856_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001856_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001856_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001856_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001880_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001880_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001880_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001880_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001881_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001881_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001881_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001881_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001881_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001904_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001904_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001904_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001904_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001905_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001905_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001905_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001905_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001905_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001930_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001930_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001930_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001930_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001930_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001931_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001931_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001931_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001931_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001951_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001951_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001951_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001951_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001951_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001952_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001952_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001952_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001952_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001975_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001975_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001975_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001975_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001976_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001976_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001976_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001976_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001976_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001997_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001997_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001997_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001997_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001998_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001998_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001998_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001998_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0001998_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002019_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002019_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002020_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002020_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002020_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002020_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002020_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002021_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002021_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002117_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002118_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002118_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002118_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002118_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002118_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002119_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002119_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002119_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002139_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002139_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002140_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002140_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002140_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002140_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002140_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002141_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002141_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002163_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002164_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002164_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002164_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002164_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002164_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002165_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002165_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002165_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002189_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002189_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002189_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002189_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002189_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002190_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002190_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002190_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002190_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002214_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002214_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002214_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002214_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002215_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002215_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002215_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002215_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002215_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002242_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002242_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002243_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002243_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002243_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002243_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002243_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002244_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002244_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002265_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002265_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002265_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002266_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002266_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002266_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002266_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002266_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002267_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002289_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002289_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002289_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002289_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002290_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002290_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002290_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002290_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002290_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002312_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002312_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002312_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002313_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002313_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002313_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002313_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002313_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002314_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002337_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002337_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002338_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002338_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002338_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002338_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002338_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002339_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002339_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002362_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002362_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002362_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002363_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002363_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002363_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002363_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002363_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002364_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002388_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002389_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002389_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002389_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002389_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002389_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002390_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002390_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002390_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002413_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002413_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002414_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002414_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002414_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002414_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002414_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002415_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002415_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002436_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002436_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002436_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002437_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002437_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002437_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002437_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002437_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002438_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002464_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002465_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002465_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002465_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002465_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002465_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002466_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002466_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002466_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002487_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002487_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002487_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002487_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002488_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002488_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002488_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002488_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002488_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002512_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002512_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002512_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002513_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002513_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002513_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002513_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002513_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002514_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002536_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002536_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002536_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002536_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002537_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002537_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002537_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002537_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002537_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002561_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002562_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002562_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002562_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002562_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002562_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002563_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002563_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002563_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002588_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002588_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002588_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002589_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002589_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002589_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002589_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002589_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002590_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002614_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002614_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002614_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002614_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002615_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002615_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002615_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002615_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002615_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002638_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002638_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002638_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002638_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002639_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002639_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002639_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002639_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002639_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002662_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002662_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002662_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002662_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002662_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002663_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002663_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002663_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002663_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002699_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002699_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002699_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002699_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002699_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002700_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002700_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002700_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002700_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002721_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002722_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002722_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002722_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002722_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002722_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002723_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002723_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002723_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002745_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002745_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002745_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002745_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002745_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002746_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002746_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002746_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002746_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002768_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002769_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002769_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002769_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002769_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002769_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002770_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002770_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002770_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002792_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002792_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002792_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002792_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002793_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002793_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002793_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002793_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002793_motif_

Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002819_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002819_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002819_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002820_motif_1.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002820_motif_2.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002820_motif_3.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002820_motif_4.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002820_motif_5.fasta
Saved C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs\OG0002821_motif_

### 6) Extract the list of motif column headers for the purpose of performing alignment

In [17]:
# Get all the column headers except the first two columns
headers = df.columns[2:]

# Write the headers to a text file
with open("alignment_motif_header.txt", "w") as f:
    for header in headers:
        f.write(header + "\n")

print("Header names extracted and stored in alignment_motif_header.txt.txt")

Header names extracted and stored in alignment_motif_header.txt.txt


### 7) Perform multiple sequence alignment (MSA) using the MUSCLE tool on the server

### 8)  Update the Excel file with the aligned motif FASTA files

In [19]:
# Define the directory storing aligned files
aligned_files_directory = "C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs_a"

# Iterate through each aligned file in the directory
for aligned_file in os.listdir(aligned_files_directory):
    if aligned_file.endswith(".afa"):
        file_path = os.path.join(aligned_files_directory, aligned_file)
        with open(file_path, 'r') as file:
            fasta_header = None
            aligned_sequence = None
            for line in file:
                if line.startswith(">"):
                    if aligned_sequence is not None:
                        # Find the corresponding row based on the fasta header
                        row_index = df[df['Row Header'] == fasta_header].index[0]

                        # Find the motif column based on the aligned file name
                        motif_column = aligned_file.replace('.afa', '')

                        # Update the DataFrame with aligned sequence
                        df.at[row_index, motif_column] = aligned_sequence

                    # Reset aligned_sequence for the new sequence
                    aligned_sequence = ""
                    fasta_header = line.strip()[1:]
                else:
                    aligned_sequence += line.strip()

            # Update the last aligned sequence in the file
            if aligned_sequence is not None:
                row_index = df[df['Row Header'] == fasta_header].index[0]
                motif_column = aligned_file.replace('.afa', '')
                df.at[row_index, motif_column] = aligned_sequence

# Write the updated DataFrame back to the Excel file
df.to_excel("C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/motifs_updated.xlsx", index=False)

### 9) In the Excel file, replace all '-' symbols with 'N' to represent missing values or gaps

### 10) Remove columns that have a row with an 'N' nucleotide frequency greater than 5%

In [20]:
df = pd.read_excel("motifs_updated.xlsx")

dimensions = df.shape
print("Dimensions of the DataFrame (rows, columns):", dimensions)

size = df.size
print("Size of the DataFrame (number of elements):", size)

Dimensions of the DataFrame (rows, columns): (258, 7124)
Size of the DataFrame (number of elements): 1837992


In [21]:
# Define the threshold for 'N' nucleotide frequency (5%)
threshold = 0.05

# Iterate over each column (excluding the first two columns)
columns_to_drop = []
for idx, column in enumerate(df.columns[2:]):
    drop_column = False
    for row_index, row in df.iterrows():
        # Calculate the percentage of 'N' nucleotides in each row within the column
        n_count = row[column].count("N")
        total_count = len(row[column])
        n_percentage = n_count / total_count

        # Check if the percentage of 'N' nucleotides in the row is greater than the threshold
        if n_percentage > threshold:
            drop_column = True
            break

    # Print progress for each column
    print(f"Progress: Column {idx + 1}/{len(df.columns[2:])}")

    # If any row has 'N' percentage greater than 5%, mark the column for dropping
    if drop_column:
        columns_to_drop.append(column)

# Drop the marked columns from the DataFrame
df.drop(columns=columns_to_drop, inplace=True)

Progress: Column 1/7122
Progress: Column 2/7122
Progress: Column 3/7122
Progress: Column 4/7122
Progress: Column 5/7122
Progress: Column 6/7122
Progress: Column 7/7122
Progress: Column 8/7122
Progress: Column 9/7122
Progress: Column 10/7122
Progress: Column 11/7122
Progress: Column 12/7122
Progress: Column 13/7122
Progress: Column 14/7122
Progress: Column 15/7122
Progress: Column 16/7122
Progress: Column 17/7122
Progress: Column 18/7122
Progress: Column 19/7122
Progress: Column 20/7122
Progress: Column 21/7122
Progress: Column 22/7122
Progress: Column 23/7122
Progress: Column 24/7122
Progress: Column 25/7122
Progress: Column 26/7122
Progress: Column 27/7122
Progress: Column 28/7122
Progress: Column 29/7122
Progress: Column 30/7122
Progress: Column 31/7122
Progress: Column 32/7122
Progress: Column 33/7122
Progress: Column 34/7122
Progress: Column 35/7122
Progress: Column 36/7122
Progress: Column 37/7122
Progress: Column 38/7122
Progress: Column 39/7122
Progress: Column 40/7122
Progress:

Progress: Column 322/7122
Progress: Column 323/7122
Progress: Column 324/7122
Progress: Column 325/7122
Progress: Column 326/7122
Progress: Column 327/7122
Progress: Column 328/7122
Progress: Column 329/7122
Progress: Column 330/7122
Progress: Column 331/7122
Progress: Column 332/7122
Progress: Column 333/7122
Progress: Column 334/7122
Progress: Column 335/7122
Progress: Column 336/7122
Progress: Column 337/7122
Progress: Column 338/7122
Progress: Column 339/7122
Progress: Column 340/7122
Progress: Column 341/7122
Progress: Column 342/7122
Progress: Column 343/7122
Progress: Column 344/7122
Progress: Column 345/7122
Progress: Column 346/7122
Progress: Column 347/7122
Progress: Column 348/7122
Progress: Column 349/7122
Progress: Column 350/7122
Progress: Column 351/7122
Progress: Column 352/7122
Progress: Column 353/7122
Progress: Column 354/7122
Progress: Column 355/7122
Progress: Column 356/7122
Progress: Column 357/7122
Progress: Column 358/7122
Progress: Column 359/7122
Progress: Co

Progress: Column 640/7122
Progress: Column 641/7122
Progress: Column 642/7122
Progress: Column 643/7122
Progress: Column 644/7122
Progress: Column 645/7122
Progress: Column 646/7122
Progress: Column 647/7122
Progress: Column 648/7122
Progress: Column 649/7122
Progress: Column 650/7122
Progress: Column 651/7122
Progress: Column 652/7122
Progress: Column 653/7122
Progress: Column 654/7122
Progress: Column 655/7122
Progress: Column 656/7122
Progress: Column 657/7122
Progress: Column 658/7122
Progress: Column 659/7122
Progress: Column 660/7122
Progress: Column 661/7122
Progress: Column 662/7122
Progress: Column 663/7122
Progress: Column 664/7122
Progress: Column 665/7122
Progress: Column 666/7122
Progress: Column 667/7122
Progress: Column 668/7122
Progress: Column 669/7122
Progress: Column 670/7122
Progress: Column 671/7122
Progress: Column 672/7122
Progress: Column 673/7122
Progress: Column 674/7122
Progress: Column 675/7122
Progress: Column 676/7122
Progress: Column 677/7122
Progress: Co

Progress: Column 959/7122
Progress: Column 960/7122
Progress: Column 961/7122
Progress: Column 962/7122
Progress: Column 963/7122
Progress: Column 964/7122
Progress: Column 965/7122
Progress: Column 966/7122
Progress: Column 967/7122
Progress: Column 968/7122
Progress: Column 969/7122
Progress: Column 970/7122
Progress: Column 971/7122
Progress: Column 972/7122
Progress: Column 973/7122
Progress: Column 974/7122
Progress: Column 975/7122
Progress: Column 976/7122
Progress: Column 977/7122
Progress: Column 978/7122
Progress: Column 979/7122
Progress: Column 980/7122
Progress: Column 981/7122
Progress: Column 982/7122
Progress: Column 983/7122
Progress: Column 984/7122
Progress: Column 985/7122
Progress: Column 986/7122
Progress: Column 987/7122
Progress: Column 988/7122
Progress: Column 989/7122
Progress: Column 990/7122
Progress: Column 991/7122
Progress: Column 992/7122
Progress: Column 993/7122
Progress: Column 994/7122
Progress: Column 995/7122
Progress: Column 996/7122
Progress: Co

Progress: Column 1265/7122
Progress: Column 1266/7122
Progress: Column 1267/7122
Progress: Column 1268/7122
Progress: Column 1269/7122
Progress: Column 1270/7122
Progress: Column 1271/7122
Progress: Column 1272/7122
Progress: Column 1273/7122
Progress: Column 1274/7122
Progress: Column 1275/7122
Progress: Column 1276/7122
Progress: Column 1277/7122
Progress: Column 1278/7122
Progress: Column 1279/7122
Progress: Column 1280/7122
Progress: Column 1281/7122
Progress: Column 1282/7122
Progress: Column 1283/7122
Progress: Column 1284/7122
Progress: Column 1285/7122
Progress: Column 1286/7122
Progress: Column 1287/7122
Progress: Column 1288/7122
Progress: Column 1289/7122
Progress: Column 1290/7122
Progress: Column 1291/7122
Progress: Column 1292/7122
Progress: Column 1293/7122
Progress: Column 1294/7122
Progress: Column 1295/7122
Progress: Column 1296/7122
Progress: Column 1297/7122
Progress: Column 1298/7122
Progress: Column 1299/7122
Progress: Column 1300/7122
Progress: Column 1301/7122
P

Progress: Column 1571/7122
Progress: Column 1572/7122
Progress: Column 1573/7122
Progress: Column 1574/7122
Progress: Column 1575/7122
Progress: Column 1576/7122
Progress: Column 1577/7122
Progress: Column 1578/7122
Progress: Column 1579/7122
Progress: Column 1580/7122
Progress: Column 1581/7122
Progress: Column 1582/7122
Progress: Column 1583/7122
Progress: Column 1584/7122
Progress: Column 1585/7122
Progress: Column 1586/7122
Progress: Column 1587/7122
Progress: Column 1588/7122
Progress: Column 1589/7122
Progress: Column 1590/7122
Progress: Column 1591/7122
Progress: Column 1592/7122
Progress: Column 1593/7122
Progress: Column 1594/7122
Progress: Column 1595/7122
Progress: Column 1596/7122
Progress: Column 1597/7122
Progress: Column 1598/7122
Progress: Column 1599/7122
Progress: Column 1600/7122
Progress: Column 1601/7122
Progress: Column 1602/7122
Progress: Column 1603/7122
Progress: Column 1604/7122
Progress: Column 1605/7122
Progress: Column 1606/7122
Progress: Column 1607/7122
P

Progress: Column 1876/7122
Progress: Column 1877/7122
Progress: Column 1878/7122
Progress: Column 1879/7122
Progress: Column 1880/7122
Progress: Column 1881/7122
Progress: Column 1882/7122
Progress: Column 1883/7122
Progress: Column 1884/7122
Progress: Column 1885/7122
Progress: Column 1886/7122
Progress: Column 1887/7122
Progress: Column 1888/7122
Progress: Column 1889/7122
Progress: Column 1890/7122
Progress: Column 1891/7122
Progress: Column 1892/7122
Progress: Column 1893/7122
Progress: Column 1894/7122
Progress: Column 1895/7122
Progress: Column 1896/7122
Progress: Column 1897/7122
Progress: Column 1898/7122
Progress: Column 1899/7122
Progress: Column 1900/7122
Progress: Column 1901/7122
Progress: Column 1902/7122
Progress: Column 1903/7122
Progress: Column 1904/7122
Progress: Column 1905/7122
Progress: Column 1906/7122
Progress: Column 1907/7122
Progress: Column 1908/7122
Progress: Column 1909/7122
Progress: Column 1910/7122
Progress: Column 1911/7122
Progress: Column 1912/7122
P

Progress: Column 2182/7122
Progress: Column 2183/7122
Progress: Column 2184/7122
Progress: Column 2185/7122
Progress: Column 2186/7122
Progress: Column 2187/7122
Progress: Column 2188/7122
Progress: Column 2189/7122
Progress: Column 2190/7122
Progress: Column 2191/7122
Progress: Column 2192/7122
Progress: Column 2193/7122
Progress: Column 2194/7122
Progress: Column 2195/7122
Progress: Column 2196/7122
Progress: Column 2197/7122
Progress: Column 2198/7122
Progress: Column 2199/7122
Progress: Column 2200/7122
Progress: Column 2201/7122
Progress: Column 2202/7122
Progress: Column 2203/7122
Progress: Column 2204/7122
Progress: Column 2205/7122
Progress: Column 2206/7122
Progress: Column 2207/7122
Progress: Column 2208/7122
Progress: Column 2209/7122
Progress: Column 2210/7122
Progress: Column 2211/7122
Progress: Column 2212/7122
Progress: Column 2213/7122
Progress: Column 2214/7122
Progress: Column 2215/7122
Progress: Column 2216/7122
Progress: Column 2217/7122
Progress: Column 2218/7122
P

Progress: Column 2487/7122
Progress: Column 2488/7122
Progress: Column 2489/7122
Progress: Column 2490/7122
Progress: Column 2491/7122
Progress: Column 2492/7122
Progress: Column 2493/7122
Progress: Column 2494/7122
Progress: Column 2495/7122
Progress: Column 2496/7122
Progress: Column 2497/7122
Progress: Column 2498/7122
Progress: Column 2499/7122
Progress: Column 2500/7122
Progress: Column 2501/7122
Progress: Column 2502/7122
Progress: Column 2503/7122
Progress: Column 2504/7122
Progress: Column 2505/7122
Progress: Column 2506/7122
Progress: Column 2507/7122
Progress: Column 2508/7122
Progress: Column 2509/7122
Progress: Column 2510/7122
Progress: Column 2511/7122
Progress: Column 2512/7122
Progress: Column 2513/7122
Progress: Column 2514/7122
Progress: Column 2515/7122
Progress: Column 2516/7122
Progress: Column 2517/7122
Progress: Column 2518/7122
Progress: Column 2519/7122
Progress: Column 2520/7122
Progress: Column 2521/7122
Progress: Column 2522/7122
Progress: Column 2523/7122
P

Progress: Column 2794/7122
Progress: Column 2795/7122
Progress: Column 2796/7122
Progress: Column 2797/7122
Progress: Column 2798/7122
Progress: Column 2799/7122
Progress: Column 2800/7122
Progress: Column 2801/7122
Progress: Column 2802/7122
Progress: Column 2803/7122
Progress: Column 2804/7122
Progress: Column 2805/7122
Progress: Column 2806/7122
Progress: Column 2807/7122
Progress: Column 2808/7122
Progress: Column 2809/7122
Progress: Column 2810/7122
Progress: Column 2811/7122
Progress: Column 2812/7122
Progress: Column 2813/7122
Progress: Column 2814/7122
Progress: Column 2815/7122
Progress: Column 2816/7122
Progress: Column 2817/7122
Progress: Column 2818/7122
Progress: Column 2819/7122
Progress: Column 2820/7122
Progress: Column 2821/7122
Progress: Column 2822/7122
Progress: Column 2823/7122
Progress: Column 2824/7122
Progress: Column 2825/7122
Progress: Column 2826/7122
Progress: Column 2827/7122
Progress: Column 2828/7122
Progress: Column 2829/7122
Progress: Column 2830/7122
P

Progress: Column 3101/7122
Progress: Column 3102/7122
Progress: Column 3103/7122
Progress: Column 3104/7122
Progress: Column 3105/7122
Progress: Column 3106/7122
Progress: Column 3107/7122
Progress: Column 3108/7122
Progress: Column 3109/7122
Progress: Column 3110/7122
Progress: Column 3111/7122
Progress: Column 3112/7122
Progress: Column 3113/7122
Progress: Column 3114/7122
Progress: Column 3115/7122
Progress: Column 3116/7122
Progress: Column 3117/7122
Progress: Column 3118/7122
Progress: Column 3119/7122
Progress: Column 3120/7122
Progress: Column 3121/7122
Progress: Column 3122/7122
Progress: Column 3123/7122
Progress: Column 3124/7122
Progress: Column 3125/7122
Progress: Column 3126/7122
Progress: Column 3127/7122
Progress: Column 3128/7122
Progress: Column 3129/7122
Progress: Column 3130/7122
Progress: Column 3131/7122
Progress: Column 3132/7122
Progress: Column 3133/7122
Progress: Column 3134/7122
Progress: Column 3135/7122
Progress: Column 3136/7122
Progress: Column 3137/7122
P

Progress: Column 3406/7122
Progress: Column 3407/7122
Progress: Column 3408/7122
Progress: Column 3409/7122
Progress: Column 3410/7122
Progress: Column 3411/7122
Progress: Column 3412/7122
Progress: Column 3413/7122
Progress: Column 3414/7122
Progress: Column 3415/7122
Progress: Column 3416/7122
Progress: Column 3417/7122
Progress: Column 3418/7122
Progress: Column 3419/7122
Progress: Column 3420/7122
Progress: Column 3421/7122
Progress: Column 3422/7122
Progress: Column 3423/7122
Progress: Column 3424/7122
Progress: Column 3425/7122
Progress: Column 3426/7122
Progress: Column 3427/7122
Progress: Column 3428/7122
Progress: Column 3429/7122
Progress: Column 3430/7122
Progress: Column 3431/7122
Progress: Column 3432/7122
Progress: Column 3433/7122
Progress: Column 3434/7122
Progress: Column 3435/7122
Progress: Column 3436/7122
Progress: Column 3437/7122
Progress: Column 3438/7122
Progress: Column 3439/7122
Progress: Column 3440/7122
Progress: Column 3441/7122
Progress: Column 3442/7122
P

Progress: Column 3712/7122
Progress: Column 3713/7122
Progress: Column 3714/7122
Progress: Column 3715/7122
Progress: Column 3716/7122
Progress: Column 3717/7122
Progress: Column 3718/7122
Progress: Column 3719/7122
Progress: Column 3720/7122
Progress: Column 3721/7122
Progress: Column 3722/7122
Progress: Column 3723/7122
Progress: Column 3724/7122
Progress: Column 3725/7122
Progress: Column 3726/7122
Progress: Column 3727/7122
Progress: Column 3728/7122
Progress: Column 3729/7122
Progress: Column 3730/7122
Progress: Column 3731/7122
Progress: Column 3732/7122
Progress: Column 3733/7122
Progress: Column 3734/7122
Progress: Column 3735/7122
Progress: Column 3736/7122
Progress: Column 3737/7122
Progress: Column 3738/7122
Progress: Column 3739/7122
Progress: Column 3740/7122
Progress: Column 3741/7122
Progress: Column 3742/7122
Progress: Column 3743/7122
Progress: Column 3744/7122
Progress: Column 3745/7122
Progress: Column 3746/7122
Progress: Column 3747/7122
Progress: Column 3748/7122
P

Progress: Column 4017/7122
Progress: Column 4018/7122
Progress: Column 4019/7122
Progress: Column 4020/7122
Progress: Column 4021/7122
Progress: Column 4022/7122
Progress: Column 4023/7122
Progress: Column 4024/7122
Progress: Column 4025/7122
Progress: Column 4026/7122
Progress: Column 4027/7122
Progress: Column 4028/7122
Progress: Column 4029/7122
Progress: Column 4030/7122
Progress: Column 4031/7122
Progress: Column 4032/7122
Progress: Column 4033/7122
Progress: Column 4034/7122
Progress: Column 4035/7122
Progress: Column 4036/7122
Progress: Column 4037/7122
Progress: Column 4038/7122
Progress: Column 4039/7122
Progress: Column 4040/7122
Progress: Column 4041/7122
Progress: Column 4042/7122
Progress: Column 4043/7122
Progress: Column 4044/7122
Progress: Column 4045/7122
Progress: Column 4046/7122
Progress: Column 4047/7122
Progress: Column 4048/7122
Progress: Column 4049/7122
Progress: Column 4050/7122
Progress: Column 4051/7122
Progress: Column 4052/7122
Progress: Column 4053/7122
P

Progress: Column 4323/7122
Progress: Column 4324/7122
Progress: Column 4325/7122
Progress: Column 4326/7122
Progress: Column 4327/7122
Progress: Column 4328/7122
Progress: Column 4329/7122
Progress: Column 4330/7122
Progress: Column 4331/7122
Progress: Column 4332/7122
Progress: Column 4333/7122
Progress: Column 4334/7122
Progress: Column 4335/7122
Progress: Column 4336/7122
Progress: Column 4337/7122
Progress: Column 4338/7122
Progress: Column 4339/7122
Progress: Column 4340/7122
Progress: Column 4341/7122
Progress: Column 4342/7122
Progress: Column 4343/7122
Progress: Column 4344/7122
Progress: Column 4345/7122
Progress: Column 4346/7122
Progress: Column 4347/7122
Progress: Column 4348/7122
Progress: Column 4349/7122
Progress: Column 4350/7122
Progress: Column 4351/7122
Progress: Column 4352/7122
Progress: Column 4353/7122
Progress: Column 4354/7122
Progress: Column 4355/7122
Progress: Column 4356/7122
Progress: Column 4357/7122
Progress: Column 4358/7122
Progress: Column 4359/7122
P

Progress: Column 4630/7122
Progress: Column 4631/7122
Progress: Column 4632/7122
Progress: Column 4633/7122
Progress: Column 4634/7122
Progress: Column 4635/7122
Progress: Column 4636/7122
Progress: Column 4637/7122
Progress: Column 4638/7122
Progress: Column 4639/7122
Progress: Column 4640/7122
Progress: Column 4641/7122
Progress: Column 4642/7122
Progress: Column 4643/7122
Progress: Column 4644/7122
Progress: Column 4645/7122
Progress: Column 4646/7122
Progress: Column 4647/7122
Progress: Column 4648/7122
Progress: Column 4649/7122
Progress: Column 4650/7122
Progress: Column 4651/7122
Progress: Column 4652/7122
Progress: Column 4653/7122
Progress: Column 4654/7122
Progress: Column 4655/7122
Progress: Column 4656/7122
Progress: Column 4657/7122
Progress: Column 4658/7122
Progress: Column 4659/7122
Progress: Column 4660/7122
Progress: Column 4661/7122
Progress: Column 4662/7122
Progress: Column 4663/7122
Progress: Column 4664/7122
Progress: Column 4665/7122
Progress: Column 4666/7122
P

Progress: Column 4938/7122
Progress: Column 4939/7122
Progress: Column 4940/7122
Progress: Column 4941/7122
Progress: Column 4942/7122
Progress: Column 4943/7122
Progress: Column 4944/7122
Progress: Column 4945/7122
Progress: Column 4946/7122
Progress: Column 4947/7122
Progress: Column 4948/7122
Progress: Column 4949/7122
Progress: Column 4950/7122
Progress: Column 4951/7122
Progress: Column 4952/7122
Progress: Column 4953/7122
Progress: Column 4954/7122
Progress: Column 4955/7122
Progress: Column 4956/7122
Progress: Column 4957/7122
Progress: Column 4958/7122
Progress: Column 4959/7122
Progress: Column 4960/7122
Progress: Column 4961/7122
Progress: Column 4962/7122
Progress: Column 4963/7122
Progress: Column 4964/7122
Progress: Column 4965/7122
Progress: Column 4966/7122
Progress: Column 4967/7122
Progress: Column 4968/7122
Progress: Column 4969/7122
Progress: Column 4970/7122
Progress: Column 4971/7122
Progress: Column 4972/7122
Progress: Column 4973/7122
Progress: Column 4974/7122
P

Progress: Column 5242/7122
Progress: Column 5243/7122
Progress: Column 5244/7122
Progress: Column 5245/7122
Progress: Column 5246/7122
Progress: Column 5247/7122
Progress: Column 5248/7122
Progress: Column 5249/7122
Progress: Column 5250/7122
Progress: Column 5251/7122
Progress: Column 5252/7122
Progress: Column 5253/7122
Progress: Column 5254/7122
Progress: Column 5255/7122
Progress: Column 5256/7122
Progress: Column 5257/7122
Progress: Column 5258/7122
Progress: Column 5259/7122
Progress: Column 5260/7122
Progress: Column 5261/7122
Progress: Column 5262/7122
Progress: Column 5263/7122
Progress: Column 5264/7122
Progress: Column 5265/7122
Progress: Column 5266/7122
Progress: Column 5267/7122
Progress: Column 5268/7122
Progress: Column 5269/7122
Progress: Column 5270/7122
Progress: Column 5271/7122
Progress: Column 5272/7122
Progress: Column 5273/7122
Progress: Column 5274/7122
Progress: Column 5275/7122
Progress: Column 5276/7122
Progress: Column 5277/7122
Progress: Column 5278/7122
P

Progress: Column 5549/7122
Progress: Column 5550/7122
Progress: Column 5551/7122
Progress: Column 5552/7122
Progress: Column 5553/7122
Progress: Column 5554/7122
Progress: Column 5555/7122
Progress: Column 5556/7122
Progress: Column 5557/7122
Progress: Column 5558/7122
Progress: Column 5559/7122
Progress: Column 5560/7122
Progress: Column 5561/7122
Progress: Column 5562/7122
Progress: Column 5563/7122
Progress: Column 5564/7122
Progress: Column 5565/7122
Progress: Column 5566/7122
Progress: Column 5567/7122
Progress: Column 5568/7122
Progress: Column 5569/7122
Progress: Column 5570/7122
Progress: Column 5571/7122
Progress: Column 5572/7122
Progress: Column 5573/7122
Progress: Column 5574/7122
Progress: Column 5575/7122
Progress: Column 5576/7122
Progress: Column 5577/7122
Progress: Column 5578/7122
Progress: Column 5579/7122
Progress: Column 5580/7122
Progress: Column 5581/7122
Progress: Column 5582/7122
Progress: Column 5583/7122
Progress: Column 5584/7122
Progress: Column 5585/7122
P

Progress: Column 5858/7122
Progress: Column 5859/7122
Progress: Column 5860/7122
Progress: Column 5861/7122
Progress: Column 5862/7122
Progress: Column 5863/7122
Progress: Column 5864/7122
Progress: Column 5865/7122
Progress: Column 5866/7122
Progress: Column 5867/7122
Progress: Column 5868/7122
Progress: Column 5869/7122
Progress: Column 5870/7122
Progress: Column 5871/7122
Progress: Column 5872/7122
Progress: Column 5873/7122
Progress: Column 5874/7122
Progress: Column 5875/7122
Progress: Column 5876/7122
Progress: Column 5877/7122
Progress: Column 5878/7122
Progress: Column 5879/7122
Progress: Column 5880/7122
Progress: Column 5881/7122
Progress: Column 5882/7122
Progress: Column 5883/7122
Progress: Column 5884/7122
Progress: Column 5885/7122
Progress: Column 5886/7122
Progress: Column 5887/7122
Progress: Column 5888/7122
Progress: Column 5889/7122
Progress: Column 5890/7122
Progress: Column 5891/7122
Progress: Column 5892/7122
Progress: Column 5893/7122
Progress: Column 5894/7122
P

Progress: Column 6164/7122
Progress: Column 6165/7122
Progress: Column 6166/7122
Progress: Column 6167/7122
Progress: Column 6168/7122
Progress: Column 6169/7122
Progress: Column 6170/7122
Progress: Column 6171/7122
Progress: Column 6172/7122
Progress: Column 6173/7122
Progress: Column 6174/7122
Progress: Column 6175/7122
Progress: Column 6176/7122
Progress: Column 6177/7122
Progress: Column 6178/7122
Progress: Column 6179/7122
Progress: Column 6180/7122
Progress: Column 6181/7122
Progress: Column 6182/7122
Progress: Column 6183/7122
Progress: Column 6184/7122
Progress: Column 6185/7122
Progress: Column 6186/7122
Progress: Column 6187/7122
Progress: Column 6188/7122
Progress: Column 6189/7122
Progress: Column 6190/7122
Progress: Column 6191/7122
Progress: Column 6192/7122
Progress: Column 6193/7122
Progress: Column 6194/7122
Progress: Column 6195/7122
Progress: Column 6196/7122
Progress: Column 6197/7122
Progress: Column 6198/7122
Progress: Column 6199/7122
Progress: Column 6200/7122
P

Progress: Column 6468/7122
Progress: Column 6469/7122
Progress: Column 6470/7122
Progress: Column 6471/7122
Progress: Column 6472/7122
Progress: Column 6473/7122
Progress: Column 6474/7122
Progress: Column 6475/7122
Progress: Column 6476/7122
Progress: Column 6477/7122
Progress: Column 6478/7122
Progress: Column 6479/7122
Progress: Column 6480/7122
Progress: Column 6481/7122
Progress: Column 6482/7122
Progress: Column 6483/7122
Progress: Column 6484/7122
Progress: Column 6485/7122
Progress: Column 6486/7122
Progress: Column 6487/7122
Progress: Column 6488/7122
Progress: Column 6489/7122
Progress: Column 6490/7122
Progress: Column 6491/7122
Progress: Column 6492/7122
Progress: Column 6493/7122
Progress: Column 6494/7122
Progress: Column 6495/7122
Progress: Column 6496/7122
Progress: Column 6497/7122
Progress: Column 6498/7122
Progress: Column 6499/7122
Progress: Column 6500/7122
Progress: Column 6501/7122
Progress: Column 6502/7122
Progress: Column 6503/7122
Progress: Column 6504/7122
P

Progress: Column 6776/7122
Progress: Column 6777/7122
Progress: Column 6778/7122
Progress: Column 6779/7122
Progress: Column 6780/7122
Progress: Column 6781/7122
Progress: Column 6782/7122
Progress: Column 6783/7122
Progress: Column 6784/7122
Progress: Column 6785/7122
Progress: Column 6786/7122
Progress: Column 6787/7122
Progress: Column 6788/7122
Progress: Column 6789/7122
Progress: Column 6790/7122
Progress: Column 6791/7122
Progress: Column 6792/7122
Progress: Column 6793/7122
Progress: Column 6794/7122
Progress: Column 6795/7122
Progress: Column 6796/7122
Progress: Column 6797/7122
Progress: Column 6798/7122
Progress: Column 6799/7122
Progress: Column 6800/7122
Progress: Column 6801/7122
Progress: Column 6802/7122
Progress: Column 6803/7122
Progress: Column 6804/7122
Progress: Column 6805/7122
Progress: Column 6806/7122
Progress: Column 6807/7122
Progress: Column 6808/7122
Progress: Column 6809/7122
Progress: Column 6810/7122
Progress: Column 6811/7122
Progress: Column 6812/7122
P

Progress: Column 7081/7122
Progress: Column 7082/7122
Progress: Column 7083/7122
Progress: Column 7084/7122
Progress: Column 7085/7122
Progress: Column 7086/7122
Progress: Column 7087/7122
Progress: Column 7088/7122
Progress: Column 7089/7122
Progress: Column 7090/7122
Progress: Column 7091/7122
Progress: Column 7092/7122
Progress: Column 7093/7122
Progress: Column 7094/7122
Progress: Column 7095/7122
Progress: Column 7096/7122
Progress: Column 7097/7122
Progress: Column 7098/7122
Progress: Column 7099/7122
Progress: Column 7100/7122
Progress: Column 7101/7122
Progress: Column 7102/7122
Progress: Column 7103/7122
Progress: Column 7104/7122
Progress: Column 7105/7122
Progress: Column 7106/7122
Progress: Column 7107/7122
Progress: Column 7108/7122
Progress: Column 7109/7122
Progress: Column 7110/7122
Progress: Column 7111/7122
Progress: Column 7112/7122
Progress: Column 7113/7122
Progress: Column 7114/7122
Progress: Column 7115/7122
Progress: Column 7116/7122
Progress: Column 7117/7122
P

In [22]:
columns_to_drop

['OG0001279_motif_2',
 'OG0001279_motif_4',
 'OG0001279_motif_5',
 'OG0001281_motif_1',
 'OG0001281_motif_5',
 'OG0001282_motif_1',
 'OG0001282_motif_2',
 'OG0001282_motif_4',
 'OG0001283_motif_2',
 'OG0001283_motif_3',
 'OG0001285_motif_2',
 'OG0001285_motif_5',
 'OG0001289_motif_3',
 'OG0001290_motif_1',
 'OG0001290_motif_2',
 'OG0001290_motif_3',
 'OG0001291_motif_1',
 'OG0001291_motif_2',
 'OG0001293_motif_3',
 'OG0001294_motif_2',
 'OG0001294_motif_4',
 'OG0001294_motif_5',
 'OG0001295_motif_2',
 'OG0001295_motif_4',
 'OG0001296_motif_2',
 'OG0001297_motif_3',
 'OG0001297_motif_4',
 'OG0001299_motif_4',
 'OG0001300_motif_3',
 'OG0001304_motif_3',
 'OG0001304_motif_4',
 'OG0001305_motif_1',
 'OG0001305_motif_2',
 'OG0001305_motif_3',
 'OG0001305_motif_4',
 'OG0001305_motif_5',
 'OG0001306_motif_1',
 'OG0001306_motif_2',
 'OG0001306_motif_4',
 'OG0001307_motif_1',
 'OG0001307_motif_3',
 'OG0001307_motif_4',
 'OG0001308_motif_3',
 'OG0001309_motif_1',
 'OG0001309_motif_2',
 'OG000130

In [23]:
# Print the number of columns marked for dropping
print(f"Number of columns marked for dropping: {len(columns_to_drop)}")

Number of columns marked for dropping: 1667


In [24]:
dimensions2 = df.shape
print("Dimensions of the DataFrame (rows, columns) after dropped:", dimensions2)

size2 = df.size
print("Size of the DataFrame (number of elements) after dropped:", size2)

Dimensions of the DataFrame (rows, columns) after dropped: (258, 5457)
Size of the DataFrame (number of elements) after dropped: 1407906


In [25]:
# Path to the text file where you want to store the column names
output_file = "feature_preprocessing_columns_to_drop_1.txt"

# Write the column names to the text file
with open(output_file, "w") as file:
    for column_name in columns_to_drop:
        file.write(f"{column_name}\n")

print("Column names marked for dropping have been saved to 'feature_preprocessing_columns_to_drop_1.txt' successfully.")

Column names marked for dropping have been saved to 'feature_preprocessing_columns_to_drop_1.txt' successfully.


### 11) Drop columns that contain identical motif features for all rows throughout the same column

In [26]:
# Find columns that start with 'OG' and contain '_motif_'
desired_columns = [column for column in df.columns if column.startswith('OG') and '_motif_' in column]

# Find feature columns that have the same values for all rows among the desired columns
same_value_columns = []
for column in desired_columns:
    # Convert the list values to tuples for checking uniqueness
    unique_values = df[column].apply(tuple)
    if unique_values.nunique() == 1:
        same_value_columns.append(column)

# Display the feature columns with the same values for all rows among the desired columns
print("Feature columns with no changes in 'OG' columns with '_motif_':", same_value_columns)

Feature columns with no changes in 'OG' columns with '_motif_': ['OG0001278_motif_1', 'OG0001315_motif_5', 'OG0001322_motif_1', 'OG0001323_motif_2', 'OG0001327_motif_2', 'OG0001328_motif_2', 'OG0001329_motif_1', 'OG0001329_motif_2', 'OG0001329_motif_3', 'OG0001330_motif_2', 'OG0001330_motif_4', 'OG0001332_motif_1', 'OG0001332_motif_2', 'OG0001333_motif_3', 'OG0001334_motif_1', 'OG0001334_motif_2', 'OG0001335_motif_3', 'OG0001335_motif_5', 'OG0001336_motif_2', 'OG0001336_motif_3', 'OG0001337_motif_1', 'OG0001338_motif_4', 'OG0001339_motif_1', 'OG0001339_motif_3', 'OG0001339_motif_5', 'OG0001340_motif_5', 'OG0001341_motif_1', 'OG0001342_motif_2', 'OG0001345_motif_4', 'OG0001345_motif_5', 'OG0001347_motif_4', 'OG0001348_motif_1', 'OG0001348_motif_3', 'OG0001351_motif_4', 'OG0001368_motif_2', 'OG0001368_motif_4', 'OG0001374_motif_4', 'OG0001385_motif_1', 'OG0001387_motif_3', 'OG0001387_motif_4', 'OG0001414_motif_2', 'OG0001417_motif_1', 'OG0001417_motif_2', 'OG0001422_motif_1', 'OG0001428_

In [27]:
# Print the number of columns marked for dropping
print(f"Number of columns marked for dropping: {len(same_value_columns)}")

Number of columns marked for dropping: 201


In [29]:
# Define the file name for the text file
output_file = 'feature_preprocessing_columns_to_drop_2.txt'

# Write the feature columns with no changes to the text file
with open(output_file, 'w') as file:
    for column in same_value_columns:
        file.write(column + '\n')
        
print("Column names marked for dropping have been saved to 'feature_preprocessing_columns_to_drop_2.txt' successfully.")

Column names marked for dropping have been saved to 'feature_preprocessing_columns_to_drop_2.txt' successfully.


In [30]:
# Drop the columns with no changes from the DataFrame
df.drop(columns=same_value_columns, inplace=True)

# Verify the columns have been dropped
print("Columns that contain identical motif features throughout the same column have been dropped from the DataFrame.")

Columns that contain identical motif features throughout the same column have been dropped from the DataFrame.


In [32]:
dimension3 = df.shape
print("Dimensions of the DataFrame (rows, columns) after dropped:", dimension3)

size3 = df.size
print("Size of the DataFrame (number of elements) after dropped:", size3)

Dimensions of the DataFrame (rows, columns) after dropped: (258, 5256)
Size of the DataFrame (number of elements) after dropped: 1356048


In [35]:
print("There are total of", len(columns_to_drop) + len(same_value_columns) , "columns been dropped." )

There are total of 1868 columns been dropped.


In [36]:
# Save the updated DataFrame to a new Excel file
new_output_excel = "C:/Users/jingj/Desktop/MASTER/Paper/Machine Learning/Feature Preprocessing/final_motifs.xlsx"
df.to_excel(new_output_excel, index=False)