In [1]:
%%capture
!pip install mljar-supervised;
!pip install gdown
!pip install imblearn



In [3]:

# Download both interpro and rast annotations for genomes for this to work. Stored in data/ Both files are in .gitignore
import os



# Check and download allgenomes.RAST.txt.ps.tsv file
if not os.path.exists("data/rast_features.tsv"):
    print("Downloading File data/rast_features.tsv")
    !gdown --no-check-certificate 1dDLqkjN0YGKa7mNDKhTmGSeZZWt1T3cl -O data/rast_features.tsv
else:
    print("File data/rast_features.tsv already exists")

# Check and download allgenomes.interpro.txt.ps.tsv file
if not os.path.exists("data/interpro_features.tsv"):
    print("Downloading File data/interpro_features.tsv")
    !gdown --no-check-certificate 13I7lBj02KEsA4OowW89h0zW8tLsmQrrb -O data/interpro_features.tsv
else:
    print("File data/interpro_features.tsv already exists")
    
    

File data/rast_features.tsv already exists
File data/interpro_features.tsv already exists


In [63]:
import uuid
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN
from sklearn.metrics import classification_report, accuracy_score
import shap
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_selection import VarianceThreshold

from sklearn.model_selection import train_test_split
from supervised.automl import AutoML
from IPython.display import display, HTML
import joblib



def get_X_y(feature_df, pheno_full_df, phenotype_name, variance_threshold):

   
    # Read phenotype data
    phenotype_df = pheno_full_df[phenotype].dropna()

    # Merge genotype and phenotype df 
    merged_df = feature_df.merge(phenotype_df, left_index=True, right_index=True, how='inner')

    # Remove all features small variance as they don't have much discriminatory power
    constant_filter = VarianceThreshold(threshold=variance_threshold)
    constant_filter.fit(merged_df)
    selected_columns_indices = constant_filter.get_support()
    selected_column_names = merged_df.columns[selected_columns_indices]
    df_filtered = merged_df[selected_column_names]


    # Get X and Y
    X = df_filtered.drop(columns=[phenotype])
    y = df_filtered[phenotype]
    
    return [X,y]


def generate_html_table(df: pd.DataFrame):
        """Display a pandas.DataFrame as jQuery DataTables"""

        # Generate random container name
        id_container = uuid.uuid1()
        output = """
    <div id="datatable-container-{id_container}">
      <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.7.0/jquery.min.js"></script>
      <script type="text/javascript" src="https://cdn.datatables.net/1.13.5/js/jquery.dataTables.min.js"></script>
      <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.13.5/css/jquery.dataTables.min.css"/>
      <script type="text/javascript">
        $(document).ready( function () {{
            $('#BGCtable').DataTable();
        }});
      </script>
      <!-- Insert table below -->
      {table}
    </div>
        """.format(
            id_container=id_container,
            table=df.to_html(
                index=False,
                table_id="classification",
                classes="display"
            ),
        )
        return output




def get_shap_plot(model, X_test, X, title, result_dir):
    # Initialize the SHAP explainer
    explainer = shap.TreeExplainer(model)

        # Calculate SHAP values for the test set
    shap_values = explainer.shap_values(X_test)

        # Get feature importance based on SHAP values
    mean_abs_shap = np.mean(np.abs(shap_values[1]), axis=0)
    feature_importance = pd.DataFrame({'feature': X.columns, 'importance': mean_abs_shap})
    feature_importance = feature_importance.sort_values(by='importance', ascending=False)

    # Select the top 100 features 
    top_features = feature_importance.head(20)[['feature', 'importance']]
    
    tsv_file =  result_dir + "/" + title + ".tsv"
    # Save the top_features DataFrame to a TSV file
    top_features.to_csv(tsv_file , sep='\t', index=False)

    # Concatenate the feature and importance columns into a single string separated by '#'
    top_features['combined'] = top_features.apply(lambda row: f"{row['feature']} // {row['importance']}", axis=1)

     # Extract the 'combined' column as a list of strings
    concatenated_features = top_features['combined'].tolist()

     # If you want a single string with all features separated by a space or newline:
    formatted_top_features = ' # '.join(concatenated_features)
  


    plt.switch_backend('Agg')
    
    # Plot SHAP summary plot for the top 20 features
    plt.figure(figsize=(20, 10))  # Adjust width (20) and height (10) as desired
    plt.title(title)
    shap.summary_plot(shap_values[1], X_test, max_display=20, plot_size=None)
    img_path = result_dir + "/" + title + ".png"
    plt.savefig(img_path)
    plt.close()

    return [formatted_top_features, img_path]



def get_shap_plot_catboost(model, X_test, X, title, result_dir):
    # Create explainer object
    explainer = shap.Explainer(model)
    
    # Calculate SHAP values for all instances in your dataset
    shap_values = explainer.shap_values(X)

    # Get feature importance based on SHAP values
    mean_abs_shap = np.mean(np.abs(shap_values), axis=0)
    feature_importance = pd.DataFrame({'feature': X.columns, 'importance': mean_abs_shap})
    feature_importance = feature_importance.sort_values(by='importance', ascending=False)

    # Select the top 100 features 
    top_features = feature_importance.head(20)[['feature', 'importance']]
    
    tsv_file =  result_dir + "/" + title + ".tsv"
    # Save the top_features DataFrame to a TSV file
    top_features.to_csv(tsv_file , sep='\t', index=False)
    
    # Concatenate the feature and importance columns into a single string separated by '#'
    top_features['combined'] = top_features.apply(lambda row: f"{row['feature']} // {row['importance'] }", axis=1)

     # Extract the 'combined' column as a list of strings
    concatenated_features = top_features['combined'].tolist()

     # If you want a single string with all features separated by a space or newline:
    formatted_top_features = ' # '.join(concatenated_features)
  


    plt.switch_backend('Agg')
    plt.figure(figsize=(20, 10))  # Adjust width (20) and height (10) as desired
    plt.title(title)
    shap.summary_plot(shap_values, X, max_display=20, plot_size=None)
    img_path = result_dir + "/" + title + ".png"
    plt.savefig(img_path)
    plt.close()

    return formatted_top_features, img_path

    
    
def get_classifier_report(X, y, phenotype_name, result_dir):

    import numpy as np
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    import xgboost as xgb
    import lightgbm as lgb
    import catboost as cb
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, balanced_accuracy_score, confusion_matrix
    from sklearn.feature_selection import SelectKBest, chi2
    import pandas as pd



    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1176)
    
    # Handle class imbalance using a combination of over-sampling and under-sampling techniques

    # Over-sampling the minority class
    ros = RandomOverSampler(sampling_strategy=0.75, random_state=1176)
    X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)

    # Under-sampling the majority class
    rus = RandomUnderSampler(sampling_strategy=1.0, random_state=1176)
    X_train_resampled, y_train_resampled = rus.fit_resample(X_train_resampled, y_train_resampled)


    # Dictionary to store the classifiers and their parameters
    classifiers = {
        'Decision Tree': {
            'model': DecisionTreeClassifier(),
            'params': {}
        },
        'Random Forest': {
            'model': RandomForestClassifier(),
            'params': {'n_estimators': 1000, 'max_depth': None, 'random_state': 100}
        }, 

        'CatBoost': {
            'model': cb.CatBoostClassifier(),
            'params': {'iterations': 1000, 'depth': 6, 'learning_rate': 0.1, 'random_state': 42, 'verbose': False}
        }

    }

    html_table_rows = []

    img_paths = []

    # Train and evaluate each classifier
    for clf_name, clf_data in classifiers.items():
        print ("Running " + clf_name + " for phenotype " + phenotype_name)
        model = clf_data['model']
        params = clf_data['params']

        # Train the classifier
        model.set_params(**params)
        model.fit(X_train_resampled, y_train_resampled)

        # Make predictions on the test set
        y_pred = model.predict(X_test)



        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
        
        confusion_matrix_info = confusion_matrix(y_test, y_pred)
        
        # Extracting values
        TP = confusion_matrix_info[0, 0]
        FP = confusion_matrix_info[0, 1]
        FN = confusion_matrix_info[1, 0]
        TN = confusion_matrix_info[1, 1]
        
        confusion_matrix_data = "TP=" + str(TP) + " TN=" + str(TN) + " FP=" + str(FP) + "FN=" + str(FN)
   
        title = clf_name + "___" + phenotype_name
        
        # Print shap
        if clf_name == "CatBoost":
            top_features, img_path = get_shap_plot_catboost(model, X_test, X, title, result_dir)
            img_paths.append(img_path)
        else:
            top_features, img_path = get_shap_plot (model, X_test, X, title, result_dir)
            img_paths.append(img_path)

        # Append row to the HTML table
        html_table_rows.append([clf_name, accuracy, precision, recall, f1, balanced_accuracy, confusion_matrix_data, top_features])
        
        

            
            

    # Create a DataFrame to display the results
    headers = ["Model", "Accuracy", "Precision", "Recall", "F1-score", "Balanced Accuracy", "Confusion Matrix", "Top features"]
    report_df = pd.DataFrame(html_table_rows, columns=headers)
    fname = result_dir + "/" + phenotype_name + "_complete.tsv"
    report_df.to_csv(fname , sep='\t', index=False)
    report_html_content = generate_html_table (report_df)
    print ("######### Combined report for " + phenotype_name + "#####################")
    display(HTML(report_html_content))
    html_img = ""
    for img_path in img_paths:
        html_img += "<img src='" + img_path + "'</></br>"
        
    display(HTML(html_img))

    



In [None]:

result_dir = "ml_results/metabolic_phenotypes"

VARIANCE_THRESHOLD = 0.0001

# Read rast features
rast_annotated_df = pd.read_csv("data/rast_features.tsv", sep = "\t", index_col=0)
# Print some stats about data
# Cleaning function
#def clean_column_name(col_name):
#    return ''.join(filter(str.isalnum, col_name.replace("[", "").replace("]", "").replace("<", "")))

# Cleaning function
def clean_column_name(col_name):
    for char in [",", "[", "]", "<"]:
        col_name = col_name.replace(char, "")
    return col_name


# Apply the cleaning function to each column name
rast_annotated_df.columns = [clean_column_name(col) for col in rast_annotated_df.columns]




metabolic_phenotypes_df = pd.read_csv("data/metabolic_phenotype_data_bacdive.tsv", sep = "\t", index_col=0)  


for phenotype in metabolic_phenotypes_df:
    # All phenotype names have -- in them
    if "--" not in phenotype:
        continue
    

    try:
        X, y = get_X_y(rast_annotated_df, metabolic_phenotypes_df, phenotype,VARIANCE_THRESHOLD )

        num_genome, num_features = X.shape
        display(HTML("<H2> Phenotype: " + phenotype + "</h2>"))
        print ("Numer of genomes:" + str(num_genome) + "\n" + "Number of genomic features :" + str(num_features)) 
        print ("Shape of y:" + str(y.shape))
        count_1 = sum(y==1)
        count_0 = sum(y==0)
        print ("Count of 1: " + str(count_1) + "\n" + "Count of 0: " + str(count_0))
    
        get_classifier_report(X,y, phenotype, result_dir)
        

    except Exception as e:
        print (e)


Numer of genomes:1147
Number of genomic features :4307
Shape of y:(1147,)
Count of 1: 345
Count of 0: 802
Running Decision Tree for phenotype raffinose--builds_acid_from




Running Random Forest for phenotype raffinose--builds_acid_from




Running CatBoost for phenotype raffinose--builds_acid_from




######### Combined report for raffinose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.773913,0.775745,0.773913,0.774768,0.739038,TP=132 TN=46 FP=27FN=25,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.23192756389063623 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.08728228711266396 # SSO:000000046__1-deoxy-D-xylulose 5-phosphate synthase (EC 2.2.1.7) // 0.05364121217942982 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.04481701268865823 # SSO:000020865__Outer membrane beta-barrel protein // 0.042374181345145 # SSO:000020331__NADH-dependent flavin oxidoreductase // 0.024599103135447838 # SSO:000021899__Polysaccharide deacetylase family protein // 0.023610976508424246 # SSO:000016824__Flavin reductase (EC 1.5.1.30) // 0.023333185446932955 # SSO:000011913__Capsule biosynthesis protein // 0.01504326497890673 # SSO:000039438__TetR family transcriptional regulator // 0.014003318672627295 # SSO:000001873__Cytidine deaminase (EC 3.5.4.5) // 0.013869383911697026 # SSO:000006532__Protein-L-isoaspartate O-methyltransferase (EC 2.1.1.77) // 0.013445226999222206 # SSO:000012338__Cold shock protein CspA // 0.012943915072613608 # SSO:000004053__Isocitrate lyase (EC 4.1.3.1) // 0.012599559140843389 # SSO:000018641__Isochorismatase family protein // 0.011481355982792563 # SSO:000012915__Transcriptional regulator // 0.010621550090748246 # SSO:000007904__Tagatose-6-phosphate kinase (EC 2.7.1.144) // 0.010580104413325735 # SSO:000009323__tRNA (Adenine-N(1)-) -methyltransferase (EC 2.1.1.36) // 0.009846420423327765 # SSO:000035492__Dihydromonapterin reductase (EC 1.5.1.50) // 0.009778692431629528 # SSO:000005809__Peptidyl-prolyl cis-trans isomerase (EC 5.2.1.8) // 0.009588276417140702
Random Forest,0.847826,0.844731,0.847826,0.843981,0.800292,TP=147 TN=48 FP=12FN=23,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.025175984732724574 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.013308884581556803 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.010194701313652398 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.00808525131558455 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.006326663591750418 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.00553342949256372 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.00514280631848541 # SSO:000003484__Homoserine O-succinyltransferase (EC 2.3.1.46) // 0.004721972777709712 # SSO:000029063__alpha-L-arabinofuranosidase (EC 3.2.1.55) // 0.004083805252744551 # SSO:000029073__Xylose isomerase // 0.0035566900676903767 # SSO:000006902__RNA polymerase sigma factor RpoD // 0.003493120110567231 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.0034342098665392806 # SSO:000021269__Patatin family protein // 0.0033988628021751725 # SSO:000007796__Sucrose-6-phosphate hydrolase (EC 3.2.1.26) // 0.0031599333548712077 # SSO:000029506__carbohydrate kinase // 0.0031461479931170393 # SSO:000013154__Dipeptidase // 0.002826500756943634 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.0027299665234155274 # SSO:000002280__Diaminopimelate decarboxylase (EC 4.1.1.20) // 0.002707857305489908 # SSO:000043843__glycine---tRNA ligase (EC 6.1.1.14) // 0.0026780048712095215 # SSO:000001260__Branched-chain amino acid aminotransferase (EC 2.6.1.42) // 0.002628977558453431
CatBoost,0.852174,0.853432,0.852174,0.852733,0.83072,TP=141 TN=55 FP=18FN=16,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 1.8241506877498335 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.8025861207958099 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.27849329870631495 # SSO:000016742__Fic family protein // 0.19883796684043614 # SSO:000001260__Branched-chain amino acid aminotransferase (EC 2.6.1.42) // 0.19494264209227186 # SSO:000005953__Phosphoenolpyruvate carboxykinase ATP (EC 4.1.1.49) // 0.18934806849616875 # SSO:000021269__Patatin family protein // 0.18650522630058244 # SSO:000021499__Phage capsid protein // 0.1691708634843609 # SSO:000013400__Endonuclease/exonuclease/phosphatase family protein // 0.14472860028467038 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.13904568315851773 # SSO:000012746__DNA binding protein // 0.1163756341194608 # SSO:000011933__Carbohydrate-binding domain containing protein // 0.10838202023156553 # SSO:000012966__DUF1275 domain-containing protein // 0.09088952846453587 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.08892623436856902 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.07553721909357757 # SSO:000012533__Cyclase family protein // 0.07450360293161502 # SSO:000010953__BCCT family transporter // 0.0722318893427168 # SSO:000003484__Homoserine O-succinyltransferase (EC 2.3.1.46) // 0.07078404392692529 # SSO:000009095__dTDP-glucose 46-dehydratase (EC 4.2.1.46) // 0.07029868874122282 # SSO:000002280__Diaminopimelate decarboxylase (EC 4.1.1.20) // 0.0637100061102421


Numer of genomes:1026
Number of genomic features :4123
Shape of y:(1026,)
Count of 1: 302
Count of 0: 724
Running Decision Tree for phenotype melibiose--builds_acid_from




Running Random Forest for phenotype melibiose--builds_acid_from




Running CatBoost for phenotype melibiose--builds_acid_from




######### Combined report for melibiose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.73301,0.747778,0.73301,0.739005,0.684949,TP=119 TN=32 FP=32FN=23,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.25234465168625403 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.06206604179030562 # SSO:000020878__Outer membrane lipoprotein carrier protein LolA // 0.05182863157415873 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.039800211476887 # SSO:000019412__Membrane dipeptidase // 0.03261781514313794 # SSO:000000046__1-deoxy-D-xylulose 5-phosphate synthase (EC 2.2.1.7) // 0.02762279099428287 # SSO:000002702__Ferredoxin (EC 1.18.1.3 ) // 0.026552368203881165 # SSO:000004381__Lactate oxidase (EC 1.13.12.-) // 0.024981494931420323 # SSO:000012248__Citrate transporter // 0.023550904640881748 # SSO:000012742__DNA adenine methylase (EC 2.1.1.72) // 0.019609387488753804 # SSO:000000819__Adenylate cyclase (EC 4.6.1.1) // 0.019505955157617848 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.019157791420758474 # SSO:000006048__Phosphoribulokinase (EC 2.7.1.19) // 0.01773147522826388 # SSO:000020655__Nucleoside deoxyribosyltransferase (EC 2.4.2.6) // 0.01610396655618724 # SSO:000021274__Patatin-like phospholipase family protein // 0.01264558181710175 # SSO:000041594__Undecaprenyl-phosphate glucose phosphotransferase (EC 2.7.8.31) // 0.012471083417525156 # SSO:000000167__2-iminoacetate synthase (ThiH) (EC 4.1.99.19) // 0.011699681033792084 # SSO:000006808__Pyridoxal kinase (EC 2.7.1.35) // 0.010786015947724917 # SSO:000025520__TerC family protein // 0.009881946031665086 # SSO:000021669__Phospholipase // 0.009434502910280628
Random Forest,0.830097,0.823115,0.830097,0.823644,0.751174,TP=139 TN=32 FP=12FN=23,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.023129385386354735 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.009545664488483799 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.007958958515425987 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.0075499543263786726 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.0073056428050014815 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.00696839292716641 # SSO:000029073__Xylose isomerase // 0.0050814258029185716 # SSO:000000046__1-deoxy-D-xylulose 5-phosphate synthase (EC 2.2.1.7) // 0.004954602390555219 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.0038796333910038213 # SSO:000033834__xylulokinase // 0.0037262177818022015 # SSO:000013618__FAD-binding protein // 0.0036743468628307047 # SSO:000003484__Homoserine O-succinyltransferase (EC 2.3.1.46) // 0.00331689038550657 # SSO:000033511__thiolase family protein // 0.0030702400535635465 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.0027662250159246862 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.0026057092508219313 # SSO:000043945__histidinol-phosphate transaminase (EC 2.6.1.9) // 0.002541755483113068 # SSO:000009961__ADP-ribosylglycohydrolase family protein // 0.0024940582430788135 # SSO:000001398__Carbamate kinase (EC 2.7.2.2) // 0.0024755393414311526 # SSO:000001193__Beta-phosphoglucomutase (EC 5.4.2.6) // 0.0023011922913169875 # SSO:000025627__Thioredoxin domain-containing protein // 0.0022927275991106166
CatBoost,0.805825,0.813392,0.805825,0.808879,0.769296,TP=128 TN=38 FP=23FN=17,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 2.0914469270592226 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.5577537906683763 # SSO:000029373__anion permease // 0.27845314299473223 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.2557259510486698 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.15197756395513892 # SSO:000000442__6-carboxytetrahydropterin synthase (EC 4.1.2.50) // 0.1518646672664491 # SSO:000010176__Acetyltransferase // 0.1490831295018361 # SSO:000012948__DNA/RNA helicase // 0.14232131283198254 # SSO:000000046__1-deoxy-D-xylulose 5-phosphate synthase (EC 2.2.1.7) // 0.14178620063892236 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.13004157498029156 # SSO:000009095__dTDP-glucose 46-dehydratase (EC 4.2.1.46) // 0.12678952406635904 # SSO:000021770__Phytoene desaturase (EC 1.14.99.-) // 0.12433208208688609 # SSO:000019412__Membrane dipeptidase // 0.1236775060975028 # SSO:000016774__Fis family transcriptional regulator // 0.09826190256672894 # SSO:000029307__aldolase // 0.09470627824077543 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.09231191028486424 # SSO:000021510__Phage holin // 0.08374103542316093 # SSO:000009126__glutamine amidotransferase (EC 4.1.3.27 ) // 0.08051141136956962 # SSO:000019509__Metal-dependent hydrolase // 0.07986650714969883 # SSO:000004381__Lactate oxidase (EC 1.13.12.-) // 0.07799300652017542


Numer of genomes:157
Number of genomic features :3198
Shape of y:(157,)
Count of 1: 76
Count of 0: 81
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:1527
Number of genomic features :4802
Shape of y:(1527,)
Count of 1: 1390
Count of 0: 137
Running Decision Tree for phenotype maltose--carbon_source




Running Random Forest for phenotype maltose--carbon_source




Running CatBoost for phenotype maltose--carbon_source




######### Combined report for maltose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.826797,0.848377,0.826797,0.837,0.564733,TP=7 TN=246 FP=22FN=31,SSO:000029792__cytochrome c-1 // 0.09832954953225805 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.07076165328322352 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.04552718021588602 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.04196489336904673 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.02970687995506402 # SSO:000013181__Disulfide oxidoreductase // 0.028450756391825795 # SSO:000003439__Histidine ammonia-lyase (EC 4.3.1.3) // 0.027021865495407527 # SSO:000043066__hypoxanthine phosphoribosyltransferase (EC 2.4.2.8) // 0.025626347323592997 # SSO:000017356__Glycogen synthase (EC 2.4.1.11) // 0.02170391815388504 # SSO:000001044__Arginine decarboxylase (EC 4.1.1.19) // 0.019262779106383573 # SSO:000017833__Histone H1 // 0.019128342103022833 # SSO:000025241__Sugar kinase // 0.018437923849067853 # SSO:000010798__Arginine repressor // 0.01704297993097442 # SSO:000020884__Outer membrane lipoprotein-sorting protein // 0.01690373296088096 # SSO:000003262__Glycerophosphoryl diester phosphodiesterase (EC 3.1.4.46) // 0.01626833857657743 # SSO:000024292__SAF domain-containing protein // 0.014293397528051302 # SSO:000020339__NADH/Ubiquinone/plastoquinone (complex I) // 0.013130529661950755 # SSO:000044149__5-guanidino-2-oxopentanoate decarboxylase (EC 4.1.1.75) // 0.013125037727326016 # SSO:000011951__Carboxylate-amine ligase // 0.012152475962843945 # SSO:000043015__UDP-N-acetylmuramate dehydrogenase (EC 1.1.1.158) // 0.011486852552336916
Random Forest,0.898693,0.870821,0.898693,0.879198,0.573572,TP=5 TN=270 FP=24FN=7,SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.006606802590379239 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.0055479669631457494 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.004564668190637888 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.004481998384087697 # SSO:000029792__cytochrome c-1 // 0.004372939675760656 # SSO:000043066__hypoxanthine phosphoribosyltransferase (EC 2.4.2.8) // 0.003937394270378021 # SSO:000017841__Histone deacetylase family protein // 0.0038402313977424223 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.003756916268672425 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.003726825346584374 # SSO:000009756__Membrane protein // 0.00360460066679448 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.003579319599212273 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.0033877982983257326 # SSO:000010833__Arylesterase // 0.003384793813735012 # SSO:000004190__LPS export ABC transporter permease LptG // 0.002737529544145875 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.002670712520996505 # SSO:000019560__Methionine biosynthesis protein MetW // 0.002666308076992092 # SSO:000022980__PspC domain-containing protein // 0.0026658559488477846 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.0025863003793387468 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.0025251140592336984 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.002522845706183684
CatBoost,0.905229,0.889751,0.905229,0.895061,0.638927,TP=9 TN=268 FP=20FN=9,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.40160371993008703 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.3839723944741685 # SSO:000009584__2OG-Fe(II) oxygenase // 0.2788670030444492 # SSO:000010141__AbrB family transcriptional regulator // 0.27452749820999744 # SSO:000009756__Membrane protein // 0.2687562229828318 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.26667843492160304 # SSO:000005809__Peptidyl-prolyl cis-trans isomerase (EC 5.2.1.8) // 0.26652511436109305 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.23485755822365026 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.19514066199102723 # SSO:000010149__Acetamidase/formamidase family protein // 0.1898143174105281 # SSO:000043066__hypoxanthine phosphoribosyltransferase (EC 2.4.2.8) // 0.1796926189087046 # SSO:000007177__Rod shape-determining protein MreB // 0.17881443166831268 # SSO:000007179__Rod shape-determining protein MreD // 0.1695118055349852 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.15630208430537432 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.13801014815789595 # SSO:000025086__Squalene---hopene cyclase (EC 5.4.99.17) // 0.1314022402078655 # SSO:000039286__Sugar O-acetyltransferase // 0.12580080154458825 # SSO:000000824__Adenylosuccinate synthetase (EC 6.3.4.4) // 0.1239742141871131 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.12157193921304969 # SSO:000025475__Tail-specific protease (EC 3.4.21.-) // 0.11548198807836987


Numer of genomes:320
Number of genomic features :3569
Shape of y:(320,)
Count of 1: 207
Count of 0: 113
Running Decision Tree for phenotype D-galactose--carbon_source




Running Random Forest for phenotype D-galactose--carbon_source




Running CatBoost for phenotype D-galactose--carbon_source




######### Combined report for D-galactose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.640625,0.654297,0.640625,0.646109,0.610742,TP=11 TN=30 FP=10FN=13,SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.18306695389412697 # SSO:000006841__Pyruvate carboxylase (EC 6.4.1.1) // 0.12433188938832229 # SSO:000009749__Adenosine kinase (EC 2.7.1.20) // 0.05876471556400863 # SSO:000012878__P-loop NTPase // 0.03945672634840171 # SSO:000023742__Pyridoxal phosphate-dependent aminotransferase // 0.03720586128577919 # SSO:000005516__Ornithine decarboxylase (EC 4.1.1.17) // 0.036722539901067885 # SSO:000008674__UTP--glucose-1-phosphate uridylyltransferase (EC 2.7.7.9) // 0.035368124450098 # SSO:000005916__Phosphate acetyltransferase (EC 2.3.1.8) // 0.03158096144257241 # SSO:000031442__phosphotriesterase // 0.02897941977184328 # SSO:000033351__sodium:proline symporter // 0.023554814378402116 # SSO:000009609__3-dehydroquinate dehydratase // 0.020730012536408427 # SSO:000012783__DNA methylase // 0.02046429222272486 # SSO:000023784__Quercetin 23-dioxygenase (EC 1.13.11.24) // 0.018930015041353315 # SSO:000024897__Small acid-soluble spore protein K // 0.018163047552757453 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.017283663505517737 # SSO:000012461__Copper chaperone // 0.01702147969696653 # SSO:000043419__Cobalamin biosynthesis protein CbiG // 0.013336712897508946 # SSO:000000720__Acetylglutamate kinase (EC 2.7.2.8) // 0.010471135724035751 # SSO:000017237__Glucosylglycerol-phosphate synthase (EC 2.4.1.213) // 0.010070994739830239 # SSO:000002067__DNA gyrase subunit A (EC 5.99.1.3) // 0.009531825708525805
Random Forest,0.6875,0.6875,0.6875,0.6875,0.645626,TP=11 TN=33 FP=10FN=10,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.008461878131408971 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.007157958568300432 # SSO:000004706__Mandelate racemase (EC 5.1.2.2) // 0.005129198202670674 # SSO:000033834__xylulokinase // 0.00461437230138257 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.0041383176263958665 # SSO:000029073__Xylose isomerase // 0.004108094115436707 # SSO:000025241__Sugar kinase // 0.0035256781734778935 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.0033737912829999352 # SSO:000001873__Cytidine deaminase (EC 3.5.4.5) // 0.003121657632149694 # SSO:000009126__glutamine amidotransferase (EC 4.1.3.27 ) // 0.002984266266337561 # SSO:000029506__carbohydrate kinase // 0.0028475611466105507 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.0027901359517089374 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.0026113135468018964 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.0023459130812460744 # SSO:000021298__Penicillin-binding protein // 0.0022428145030251507 # SSO:000034571__Aldose 1-epimerase family protein // 0.0021494542500556193 # SSO:000000811__Adenosine deaminase (EC 3.5.4.4) // 0.002142798712786275 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0021402964101388516 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.0020563299387872914 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.002054106035633243
CatBoost,0.765625,0.776172,0.765625,0.769202,0.752492,TP=15 TN=34 FP=6FN=9,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.5220421930373813 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.4052680567402239 # SSO:000001823__Cyanophycinase (EC 3.4.15.6) // 0.39185301288600727 # SSO:000024083__Restriction endonuclease // 0.24964163312113893 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.2032490339052734 # SSO:000004706__Mandelate racemase (EC 5.1.2.2) // 0.19270665028627432 # SSO:000033834__xylulokinase // 0.16544000682885218 # SSO:000010483__Alpha-amylase family protein // 0.1582210771447607 # SSO:000009126__glutamine amidotransferase (EC 4.1.3.27 ) // 0.14234731417274005 # SSO:000005432__Nucleotidyltransferase (EC 2.7.7.-) // 0.13432266650946864 # SSO:000011951__Carboxylate-amine ligase // 0.1254641160083229 # SSO:000000811__Adenosine deaminase (EC 3.5.4.4) // 0.11978524926015861 # SSO:000020878__Outer membrane lipoprotein carrier protein LolA // 0.10990871006347849 # SSO:000012533__Cyclase family protein // 0.10535282402484096 # SSO:000033631__transglutaminase domain-containing protein // 0.10263570645262983 # SSO:000022788__Protein kinase // 0.10116287686048131 # SSO:000024925__Sodium-dependent transporter // 0.10023683664552119 # SSO:000025239__Sugar isomerase // 0.09251896210166699 # SSO:000016969__GGDEF domain containing protein // 0.09119667698513743 # SSO:000000886__Alkaline phosphatase (EC 3.1.3.1) // 0.09022933674630845


Numer of genomes:342
Number of genomic features :3591
Shape of y:(342,)
Count of 1: 232
Count of 0: 110
Running Decision Tree for phenotype D-mannose--carbon_source




Running Random Forest for phenotype D-mannose--carbon_source




Running CatBoost for phenotype D-mannose--carbon_source




######### Combined report for D-mannose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.608696,0.681259,0.608696,0.630836,0.591503,TP=10 TN=32 FP=8FN=19,SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.1949372458840754 # SSO:000012031__Cell division protein // 0.058721737626343605 # SSO:000025231__Sugar binding protein // 0.05763803570808403 # SSO:000023536__Putative porin // 0.049988342473070706 # SSO:000000212__2-phosphosulfolactate phosphatase (EC 3.1.3.71 ) // 0.0486695098636426 # SSO:000021090__PKD domain-containing protein // 0.04652376644832912 # SSO:000029894__ethanolamine utilization protein EutH // 0.042166800151573265 # SSO:000012222__Chromosome partitioning protein ParA // 0.029450528705840535 # SSO:000002283__Diaminopropionate ammonia-lyase (EC 4.3.1.15) // 0.02899722053596862 # SSO:000008399__Tryptophanase (EC 4.1.99.1) // 0.02664325774606851 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.025280764680023958 # SSO:000029974__gamma-glutamyltransferase( EC:2.3.2.2 ) // 0.024097147615366975 # SSO:000010090__ATPase // 0.02257439677427601 # SSO:000010855__Asparaginase // 0.022450726551115373 # SSO:000001419__Carbonic anhydrase (EC 4.2.1.1) // 0.021219914630786166 # SSO:000037085__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit D // 0.02060451920802564 # SSO:000004605__Lysophospholipase (EC 3.1.1.5) // 0.02031885780679812 # SSO:000010272__Acyl-CoA thioesterase // 0.019644396668531867 # SSO:000023840__RNA polymerase sigma factor SigX // 0.016875283050498117 # SSO:000042475__5-methyltetrahydropteroyltriglutamate---homocysteine S-methyltransferase (EC 2.1.1.14) // 0.014833016265515298
Random Forest,0.724638,0.729657,0.724638,0.726996,0.651961,TP=9 TN=41 FP=9FN=10,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.009679018038871256 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.009330216507566656 # SSO:000029073__Xylose isomerase // 0.006577081832132325 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.006535702019824102 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.005840173606801866 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.005808898203692958 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.005518288542088626 # SSO:000000811__Adenosine deaminase (EC 3.5.4.4) // 0.005390130764725088 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.00538731761254914 # SSO:000013154__Dipeptidase // 0.004998997200510795 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.00496838695035628 # SSO:000025241__Sugar kinase // 0.004905824553815252 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.004733370688972535 # SSO:000033834__xylulokinase // 0.003948401657805844 # SSO:000007416__Septum formation protein Maf // 0.0037086695272739952 # SSO:000006580__Purine nucleoside phosphorylase (EC 2.4.2.1) // 0.0036737588734537733 # SSO:000005313__Nicotinate phosphoribosyltransferase (EC 6.3.4.21) // 0.0036056578479478165 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.003574014924466225 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.003535655333291825 # SSO:000019560__Methionine biosynthesis protein MetW // 0.003476735464383149
CatBoost,0.695652,0.701144,0.695652,0.698258,0.614379,TP=8 TN=40 FP=10FN=11,SSO:000013154__Dipeptidase // 0.7882256330482238 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.3944998239747775 # SSO:000000811__Adenosine deaminase (EC 3.5.4.4) // 0.35837382799739975 # SSO:000017750__Heparinase II/III family protein // 0.2974722398530305 # SSO:000030106__heme-binding protein // 0.23756891390589682 # SSO:000005992__Phosphomannomutase (EC 5.4.2.8) // 0.22827672257907952 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.22723806700194998 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.2019872900612036 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.1804650213257335 # SSO:000029995__gluconate transporter // 0.16545938877089875 # SSO:000003523__Hydroxyethylthiazole kinase (EC 2.7.1.50) // 0.16408921203264357 # SSO:000033834__xylulokinase // 0.16310993491744188 # SSO:000007633__Spheroidene monooxygenase (EC 1.14.15.9) // 0.15707755737502632 # SSO:000007416__Septum formation protein Maf // 0.15648533125300068 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.15515485055851122 # SSO:000016590__FMN-binding negative transcriptional regulator // 0.14827096617411806 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.14685922236895707 # SSO:000003484__Homoserine O-succinyltransferase (EC 2.3.1.46) // 0.13743204812937296 # SSO:000010749__Aquaporin // 0.13542284141168986 # SSO:000036062__Gluconate 2-dehydrogenase subunit 3 family protein // 0.13470506820989342


Numer of genomes:737
Number of genomic features :4258
Shape of y:(737,)
Count of 1: 602
Count of 0: 135
Running Decision Tree for phenotype raffinose--carbon_source




Running Random Forest for phenotype raffinose--carbon_source




Running CatBoost for phenotype raffinose--carbon_source




######### Combined report for raffinose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.736486,0.753309,0.736486,0.744116,0.61446,TP=12 TN=97 FP=17FN=22,SSO:000029792__cytochrome c-1 // 0.16839003571494696 # SSO:000004053__Isocitrate lyase (EC 4.1.3.1) // 0.0896683398312787 # SSO:000017750__Heparinase II/III family protein // 0.043261484639049504 # SSO:000004712__Mannose-6-phosphate isomerase (EC 5.3.1.8) // 0.043190615816183726 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.04182530774981894 # SSO:000018403__Inner membrane protein YhjD // 0.03582571430097295 # SSO:000039286__Sugar O-acetyltransferase // 0.03489366173256458 # SSO:000019235__MOSC domain containing protein // 0.03148792121482404 # SSO:000024227__Rod shape-determining protein // 0.027455701394140115 # SSO:000021615__Phosphate uptake regulator PhoU // 0.027342094264866904 # SSO:000031016__membrane or secreted protein // 0.023424156676164567 # SSO:000003256__Glycerol-3-phosphate acyltransferase (EC 2.3.1.15) // 0.020196795224005505 # SSO:000000824__Adenylosuccinate synthetase (EC 6.3.4.4) // 0.01981516282166471 # SSO:000016682__Ferredoxin reductase // 0.01809022748604939 # SSO:000010856__Asparagine synthase // 0.01730315606004825 # SSO:000029508__carbohydrate-binding protein // 0.016529505052169758 # SSO:000029834__dihydrodipicolinate reductase // 0.013872523493366906 # SSO:000029417__aspartoacylase // 0.011039125055568423 # SSO:000010586__Amino acid transporter // 0.010977414885460628 # SSO:000025611__Thiol:disulfide interchange protein // 0.010947659533105095
Random Forest,0.790541,0.793285,0.790541,0.791874,0.674152,TP=14 TN=103 FP=15FN=16,SSO:000006904__RNA polymerase sigma factor RpoH // 0.004926810767893163 # SSO:000029792__cytochrome c-1 // 0.004635128780658985 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.004006097474939568 # SSO:000003160__Glutamate--cysteine ligase (EC 6.3.2.2) // 0.003800219512530381 # SSO:000012809__DNA polymerase III subunit chi // 0.0037996484230114953 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.0037663039399251025 # SSO:000013154__Dipeptidase // 0.0037646507726603494 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.0037257098958658743 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.0035915526945512507 # SSO:000004390__Lactoylglutathione lyase (EC 4.4.1.5) // 0.003403869456851153 # SSO:000007780__Succinate dehydrogenase hydrophobic membrane anchor protein // 0.0033864663008416184 # SSO:000009756__Membrane protein // 0.0033141582204947663 # SSO:000017613__HTTM domain-containing protein // 0.0032617067255460157 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.0032255764052668318 # SSO:000004053__Isocitrate lyase (EC 4.1.3.1) // 0.0031765186577574133 # SSO:000003519__Hydroxyacylglutathione hydrolase (EC 3.1.2.6) // 0.00309209475407102 # SSO:000007776__Succinate dehydrogenase cytochrome b-556 subunit // 0.0030875526332837925 # SSO:000003212__Glutathione S-transferase family protein // 0.0030263242261621233 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.0029966186558706294 # SSO:000007096__Ribonuclease Y // 0.002964478137532075
CatBoost,0.790541,0.78245,0.790541,0.786132,0.648073,TP=12 TN=105 FP=17FN=14,SSO:000009756__Membrane protein // 0.4499724445814779 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.3761585202280304 # SSO:000013154__Dipeptidase // 0.265785755989981 # SSO:000037230__Nitronate monooxygenase (EC 1.13.12.16) // 0.25182919077555765 # SSO:000017613__HTTM domain-containing protein // 0.23586483371300754 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.20830888613174672 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.2011784585423578 # SSO:000012746__DNA binding protein // 0.20115278514173923 # SSO:000004706__Mandelate racemase (EC 5.1.2.2) // 0.1735694253433213 # SSO:000000824__Adenylosuccinate synthetase (EC 6.3.4.4) // 0.1716254555990267 # SSO:000039286__Sugar O-acetyltransferase // 0.17039686003616233 # SSO:000012992__DUF350 domain-containing protein // 0.16887247032732247 # SSO:000042933__glycerophosphodiester phosphodiesterase (EC 3.1.4.46) // 0.16275747234750462 # SSO:000025241__Sugar kinase // 0.16165290432293503 # SSO:000021669__Phospholipase // 0.15971899668056164 # SSO:000007384__Selenocysteine-specific translation elongation factor // 0.15963264957236026 # SSO:000024959__Sodium:solute symporter family protein // 0.15630170092857923 # SSO:000029792__cytochrome c-1 // 0.1498866663525578 # SSO:000006904__RNA polymerase sigma factor RpoH // 0.1361244796433884 # SSO:000002148__DNA-cytosine methyltransferase (EC 2.1.1.37) // 0.136012321742116


Numer of genomes:1047
Number of genomic features :4606
Shape of y:(1047,)
Count of 1: 923
Count of 0: 124
Running Decision Tree for phenotype cellobiose--carbon_source




Running Random Forest for phenotype cellobiose--carbon_source




Running CatBoost for phenotype cellobiose--carbon_source




######### Combined report for cellobiose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.795238,0.822912,0.795238,0.807777,0.602425,TP=9 TN=158 FP=17FN=26,SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.10786253800108601 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.07161373812903686 # SSO:000002791__Flagellar assembly protein FliH // 0.06868703833973569 # SSO:000012978__DUF1934 domain-containing protein // 0.06106859125233643 # SSO:000009756__Membrane protein // 0.034809392387234045 # SSO:000025998__Transglutaminase-like cysteine peptidase // 0.031602473586834226 # SSO:000008865__Urease accessory protein UreF // 0.030940050132550242 # SSO:000009507__24-dihydroxyhept-2-ene-17-dioic acid aldolase (EC 4.1.2.52) // 0.02763576086763172 # SSO:000012160__Chemotaxis response regulator protein-glutamate methylesterase (EC 3.1.1.61) // 0.02733893471037439 # SSO:000004631__Magnesium chelatase (EC 6.6.1.1 ) // 0.026504880734885335 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.025662244211532195 # SSO:000007627__Spermidine synthase (EC 2.5.1.16) // 0.02158460712872652 # SSO:000011951__Carboxylate-amine ligase // 0.020983316755995247 # SSO:000029178__Zinc carboxypeptidase // 0.020357658853369962 # SSO:000039420__Tagaturonate reductase (EC 1.1.1.58) // 0.018751730709346378 # SSO:000033404__sulfite oxidase( EC:1.8.3.1 ) // 0.016101155231745237 # SSO:000029459__beta-phosphoglucomutase family hydrolase // 0.015042893768307744 # SSO:000001854__Cystathionine gamma-lyase (EC 4.4.1.1) // 0.014511361945672367 # SSO:000013391__Endonuclease // 0.014227168063007745 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.014091687948997119
Random Forest,0.885714,0.862857,0.885714,0.862698,0.604515,TP=6 TN=180 FP=20FN=4,SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.006187798727281711 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.004245146046972913 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.0040807235078925575 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.003885613573920226 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.003835351692800424 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.003820111788487117 # SSO:000012978__DUF1934 domain-containing protein // 0.0036668901834848305 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.0034895806264395855 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.0033074656146896224 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.003185483116068701 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.0029914147494874256 # SSO:000007780__Succinate dehydrogenase hydrophobic membrane anchor protein // 0.0029737117914675386 # SSO:000004883__Methylisocitrate lyase (EC 4.1.3.30) // 0.0029384166749256055 # SSO:000006904__RNA polymerase sigma factor RpoH // 0.0026636805284442396 # SSO:000000338__3-oxoadipyl-CoA thiolase (EC 2.3.1.174) // 0.0026336236269681682 # SSO:000012809__DNA polymerase III subunit chi // 0.0026300986579964624 # SSO:000019083__Lycopene cyclase // 0.0026222406949375204 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.0026070117210338956 # SSO:000043994__3-deoxy-8-phosphooctulonate synthase (EC 2.5.1.55) // 0.0025566819180998903 # SSO:000004390__Lactoylglutathione lyase (EC 4.4.1.5) // 0.0025375197297392784
CatBoost,0.880952,0.858351,0.880952,0.863142,0.618311,TP=7 TN=178 FP=19FN=6,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.41301018974332754 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.3221322157597711 # SSO:000012746__DNA binding protein // 0.2953719673070257 # SSO:000009756__Membrane protein // 0.28391505724706273 # SSO:000019083__Lycopene cyclase // 0.2830242606645475 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.2555705067382978 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.2465469681410637 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.22899738199158332 # SSO:000004449__Lipase (EC 3.1.1.3) // 0.20744208928458602 # SSO:000000824__Adenylosuccinate synthetase (EC 6.3.4.4) // 0.17364506271778052 # SSO:000018657__Isoprenylcysteine carboxyl methyltransferase family protein // 0.17027337983447774 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.1642372873260086 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.15422342168822 # SSO:000034571__Aldose 1-epimerase family protein // 0.14559307078902856 # SSO:000009095__dTDP-glucose 46-dehydratase (EC 4.2.1.46) // 0.1394453453534669 # SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.13179782697622514 # SSO:000017756__HesA/MoeB/ThiF family protein // 0.1166712598694276 # SSO:000028910__VanZ family protein // 0.11502174282959081 # SSO:000035923__Flagellar export protein FliJ // 0.10785878788683766 # SSO:000004883__Methylisocitrate lyase (EC 4.1.3.30) // 0.1067385275186164


Numer of genomes:837
Number of genomic features :4457
Shape of y:(837,)
Count of 1: 731
Count of 0: 106
Running Decision Tree for phenotype lactose--carbon_source




Running Random Forest for phenotype lactose--carbon_source




Running CatBoost for phenotype lactose--carbon_source




######### Combined report for lactose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.797619,0.790429,0.797619,0.793917,0.569444,TP=6 TN=128 FP=18FN=16,SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.1549343894227647 # SSO:000004390__Lactoylglutathione lyase (EC 4.4.1.5) // 0.10651692513527208 # SSO:000021770__Phytoene desaturase (EC 1.14.99.-) // 0.04990590138769346 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.04954375995289273 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.03298598079658136 # SSO:000009370__tRNA(1)(Val) (adenine(37)-N(6))-methyltransferase (EC 2.1.1.223) // 0.031847193729703 # SSO:000008630__UDP-N-acetylmuramoyl-tripeptide--D-alanyl-D-alanine ligase (EC 6.3.2.10) // 0.027065992741845728 # SSO:000005809__Peptidyl-prolyl cis-trans isomerase (EC 5.2.1.8) // 0.02688831050335615 # SSO:000011029__Bacteriorhodopsin // 0.02259750833203642 # SSO:000012746__DNA binding protein // 0.018625465710931398 # SSO:000029974__gamma-glutamyltransferase( EC:2.3.2.2 ) // 0.015374468149604052 # SSO:000029803__dUTP diphosphatase (EC 3.6.1.23) // 0.015136268360325137 # SSO:000002256__Deoxyuridine 5'-triphosphate nucleotidohydrolase (EC 3.6.1.23) // 0.015026371660401276 # SSO:000029784__cytochrome c oxidase subunit III // 0.014861205243787111 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.014401323244561367 # SSO:000029417__aspartoacylase // 0.013332485132200261 # SSO:000012963__DUF1176 domain-containing protein // 0.012293449815892114 # SSO:000010315__histone protein // 0.011914664714291923 # SSO:000021299__Penicillin-binding protein 1A // 0.011434244182336895 # SSO:000018613__Iron-containing alcohol dehydrogenase // 0.011321201269962675
Random Forest,0.85119,0.818347,0.85119,0.826768,0.583333,TP=5 TN=138 FP=19FN=6,SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.008175225935181019 # SSO:000004390__Lactoylglutathione lyase (EC 4.4.1.5) // 0.005813935889803413 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.005651391560838338 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.005244498453357354 # SSO:000042222__Arginyltransferase( EC:2.3.2.8 ) // 0.004888291401594065 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.004025414537930462 # SSO:000017841__Histone deacetylase family protein // 0.003786944541066547 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.0034241204446604587 # SSO:000039286__Sugar O-acetyltransferase // 0.0031462386242735794 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.003124335898730492 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.0030225635632989767 # SSO:000008678__Ubiquinol-cytochrome C reductase iron-sulfur subunit (EC 1.10.2.2) // 0.0030115736818827382 # SSO:000028900__VacJ family lipoprotein // 0.0029657468485375683 # SSO:000042934__methylcrotonoyl-CoA carboxylase (EC 6.4.1.4) // 0.002940148890119016 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.0029259077683834248 # SSO:000007096__Ribonuclease Y // 0.002925189806077597 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.002799872294896686 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.0027643886193756354 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.002621760633896902 # SSO:000003212__Glutathione S-transferase family protein // 0.0026199016760569418
CatBoost,0.892857,0.881868,0.892857,0.877143,0.677083,TP=9 TN=141 FP=15FN=3,SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.6974195505209914 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.3776457403572421 # SSO:000012746__DNA binding protein // 0.349640318675889 # SSO:000018283__IclR-family transcriptional regulator // 0.24847117353025405 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.237410829282366 # SSO:000002060__DNA (cytosine-5-)-methyltransferase (EC 2.1.1.37 ) // 0.22391763790676644 # SSO:000039286__Sugar O-acetyltransferase // 0.22295373001993496 # SSO:000022972__Pseudouridine synthase // 0.22270459087343575 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.19493057946467593 # SSO:000013628__FAD:protein FMN transferase (EC 2.7.1.180) // 0.191607911028085 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.18520470799346875 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.18447414871662887 # SSO:000017573__HIT family protein // 0.16904629257622733 # SSO:000008066__Thymidine phosphorylase (EC 2.4.2.4) // 0.16293760817833927 # SSO:000029307__aldolase // 0.16279941581905646 # SSO:000002811__Flagellar hook protein FlgE // 0.15640394599505622 # SSO:000025362__TM2 domain containing protein // 0.14102705890377984 # SSO:000002255__Deoxyribose-phosphate aldolase (EC 4.1.2.4) // 0.1323620491515847 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.13136793687558965 # SSO:000017940__Hydrogenase maturation protease // 0.13070638785190508


Numer of genomes:638
Number of genomic features :4131
Shape of y:(638,)
Count of 1: 534
Count of 0: 104
Running Decision Tree for phenotype melibiose--carbon_source




Running Random Forest for phenotype melibiose--carbon_source




Running CatBoost for phenotype melibiose--carbon_source




######### Combined report for melibiose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.820312,0.831951,0.820312,0.825633,0.677209,TP=9 TN=96 FP=10FN=13,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.16649002850379518 # SSO:000010350__AhpC/TSA family protein // 0.07318945032660552 # SSO:000021899__Polysaccharide deacetylase family protein // 0.050680349283558045 # SSO:000024103__Rhamnogalacturonan acetylesterase // 0.048444781722988196 # SSO:000042733__carotenoid 12-hydratase (EC 4.2.1.131) // 0.04667845849345685 # SSO:000036187__Polysaccharide pyruvyl transferase CsaB // 0.03377193001658273 # SSO:000002619__Exodeoxyribonuclease III (EC 3.1.11.2) // 0.027642143367228884 # SSO:000003486__Homoserine kinase (EC 2.7.1.39) // 0.02564581541559295 # SSO:000023924__Redoxin domain-containing protein // 0.023901445039840298 # SSO:000007803__Sulfate adenylyltransferase (EC 2.7.7.4) // 0.02304402118882727 # SSO:000000273__3-dehydro-L-gulonate 2-dehydrogenase (EC 1.1.1.130) // 0.02072660211216448 # SSO:000030951__lipopolysaccharide kinase // 0.019669941755738708 # SSO:000008105__TonB-dependent siderophore receptor // 0.017035193474022763 # SSO:000005809__Peptidyl-prolyl cis-trans isomerase (EC 5.2.1.8) // 0.01690034168959268 # SSO:000043940__dihydrolipoyllysine-residue acetyltransferase (EC 2.3.1.12) // 0.016713933271551983 # SSO:000034018__2-polyprenylphenol hydroxylase (EC 1.14.13.240) // 0.014691265474290956 # SSO:000019421__Membrane metalloprotease // 0.013581082069426851 # SSO:000020858__OstA family protein // 0.013551540843236187 # SSO:000001398__Carbamate kinase (EC 2.7.2.2) // 0.013136265791568555 # SSO:000021606__Phosphate permease // 0.01121031815386743
Random Forest,0.84375,0.823809,0.84375,0.831194,0.625785,TP=6 TN=102 FP=13FN=7,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.009500677646266437 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.007348493498493271 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.006402295602799185 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.005967310469389121 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.004780252856467997 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.004687200422923562 # SSO:000013628__FAD:protein FMN transferase (EC 2.7.1.180) // 0.004461010678932044 # SSO:000004390__Lactoylglutathione lyase (EC 4.4.1.5) // 0.004256453116074831 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.004089652766840406 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.003931222540315912 # SSO:000010848__AsmA family protein // 0.0033454293925878707 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.0030734634006919235 # SSO:000003519__Hydroxyacylglutathione hydrolase (EC 3.1.2.6) // 0.002831535565560435 # SSO:000006904__RNA polymerase sigma factor RpoH // 0.0028064572528059754 # SSO:000004190__LPS export ABC transporter permease LptG // 0.00275649677749496 # SSO:000012809__DNA polymerase III subunit chi // 0.0026851015255354013 # SSO:000007096__Ribonuclease Y // 0.0026503925974269284 # SSO:000022980__PspC domain-containing protein // 0.0026314489674533767 # SSO:000003212__Glutathione S-transferase family protein // 0.0025868541762706244 # SSO:000007780__Succinate dehydrogenase hydrophobic membrane anchor protein // 0.002574759368642581
CatBoost,0.8125,0.80422,0.8125,0.808144,0.607436,TP=6 TN=98 FP=13FN=11,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 1.1462413359605816 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.5562744392830793 # SSO:000013628__FAD:protein FMN transferase (EC 2.7.1.180) // 0.42968403876670336 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.29880938489618664 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.23194667235810318 # SSO:000021409__Peptidoglycan-binding protein // 0.23158004790438075 # SSO:000031925__proteasome-type protease // 0.1926729968694922 # SSO:000010848__AsmA family protein // 0.18966570118686743 # SSO:000009710__5'(3')-deoxyribonucleotidase // 0.18310473595091498 # SSO:000026732__UbiD family decarboxylase // 0.17203977596439327 # SSO:000012951__DNA/RNA non-specific endonuclease // 0.1641402434108183 # SSO:000000568__ATP-dependent Clp protease proteolytic subunit (EC 3.4.21.92) // 0.1567964380852072 # SSO:000006811__Pyridoxamine 5'-phosphate oxidase (EC 1.4.3.5) // 0.15335985705356597 # SSO:000021899__Polysaccharide deacetylase family protein // 0.137315971715428 # SSO:000016592__FMN-dependent NADH-azoreductase (EC 1.7.1.6) // 0.1353322086399852 # SSO:000036062__Gluconate 2-dehydrogenase subunit 3 family protein // 0.1349781709482848 # SSO:000010350__AhpC/TSA family protein // 0.13328047160703282 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.12590982665235864 # SSO:000024959__Sodium:solute symporter family protein // 0.11340872086685838 # SSO:000034018__2-polyprenylphenol hydroxylase (EC 1.14.13.240) // 0.10669129925965612


Numer of genomes:2401
Number of genomic features :5655
Shape of y:(2401,)
Count of 1: 259
Count of 0: 2142
Running Decision Tree for phenotype tryptophan--energy_source




Running Random Forest for phenotype tryptophan--energy_source




Running CatBoost for phenotype tryptophan--energy_source




######### Combined report for tryptophan--energy_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.920998,0.920998,0.920998,0.920998,0.816471,TP=403 TN=40 FP=19FN=19,SSO:000008399__Tryptophanase (EC 4.1.99.1) // 0.2990835707104393 # SSO:000013395__Endonuclease NucS // 0.056360176446975094 # SSO:000011926__Carbamoyltransferase // 0.025595744945516495 # SSO:000001074__Arsenite methyltransferase (EC 2.1.1.137) // 0.01908756058498763 # SSO:000012994__DUF433 domain-containing protein // 0.0162807711118864 # SSO:000020300__NAD(P)H-quinone oxidoreductase subunit 3 (EC 1.6.5.2) // 0.010213925991910391 # SSO:000001185__Beta-glucuronidase (EC 3.2.1.31) // 0.009739010046149233 # SSO:000009370__tRNA(1)(Val) (adenine(37)-N(6))-methyltransferase (EC 2.1.1.223) // 0.008843433397802105 # SSO:000009662__Amylo-alpha-16-glucosidase (EC 3.2.1.33) // 0.008353867997918957 # SSO:000010638__Amylosucrase (EC 2.4.1.4) // 0.008007581129529524 # SSO:000043878__quinoprotein glucose dehydrogenase (EC 1.1.5.2) // 0.00749528598708756 # SSO:000021510__Phage holin // 0.006622292555987123 # SSO:000017237__Glucosylglycerol-phosphate synthase (EC 2.4.1.213) // 0.006621344403344119 # SSO:000043166__tryptophan 2-C-methyltransferase (EC 2.1.1.106) // 0.006227493465386026 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.006130774767588078 # SSO:000005779__Pentalenene synthase (EC 4.2.3.7) // 0.005924203783984938 # SSO:000007084__Ribonuclease III (EC 3.1.26.3) // 0.005799194289563735 # SSO:000030905__isomerase // 0.005446208450479932 # SSO:000010214__AcrB/AcrD/AcrF family protein // 0.005389323150720609 # SSO:000021027__PAP2 family protein // 0.004868730207281985
Random Forest,0.941788,0.938691,0.941788,0.938901,0.82103,TP=414 TN=39 FP=8FN=20,SSO:000008399__Tryptophanase (EC 4.1.99.1) // 0.029360097805257186 # SSO:000003485__Homoserine dehydrogenase (EC 1.1.1.3) // 0.004514710738643622 # SSO:000001569__Chromosome partition protein MukF // 0.004007960962265882 # SSO:000035996__Fumarate reductase (quinol) flavoprotein subunit (EC 1.3.5.4) // 0.0038567804121371297 # SSO:000001095__Aspartate carbamoyltransferase (EC 2.1.3.2) // 0.0037368482288153435 # SSO:000021359__Peptidase T (EC 3.4.11.-) // 0.0033553334375958197 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.0032752909692911314 # SSO:000001568__Chromosome partition protein MukE // 0.0032467703789376802 # SSO:000005953__Phosphoenolpyruvate carboxykinase ATP (EC 4.1.1.49) // 0.003190660743851451 # SSO:000036827__Methionine ABC transporter ATP-binding protein MetN // 0.0030711268704966765 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.002704129943337586 # SSO:000044330__glutamate formimidoyltransferase (EC 2.1.2.5) // 0.002688295951428794 # SSO:000001567__Chromosome partition protein MukB // 0.002644709214933283 # SSO:000001476__Cell division protein ZapC // 0.0024978255516012564 # SSO:000006033__Phosphoribosyl-AMP cyclohydrolase (EC 3.5.4.19) // 0.00244769406993062 # SSO:000020286__NAD(P)H nitroreductase // 0.0024088017405649144 # SSO:000005355__Nitrogen regulatory protein P-II // 0.0023573528511655496 # SSO:000000167__2-iminoacetate synthase (ThiH) (EC 4.1.99.19) // 0.002282767604834728 # SSO:000000564__ATP-dependent Clp protease ATP-binding subunit ClpX // 0.002230490087367186 # SSO:000038896__RNA chaperone ProQ // 0.0021969607939592954
CatBoost,0.939709,0.941125,0.939709,0.940349,0.870873,TP=406 TN=46 FP=16FN=13,SSO:000008399__Tryptophanase (EC 4.1.99.1) // 3.8673020987594717 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.20688204936010593 # SSO:000042937__maltose alpha-D-glucosyltransferase (EC 5.4.99.16) // 0.13526469751476344 # SSO:000034554__Aldehyde dehydrogenase (NAD(P)(+)) (EC 1.2.1.5) // 0.12951925390714167 # SSO:000024083__Restriction endonuclease // 0.12599203921524585 # SSO:000006147__Polyketide synthase // 0.09529261116198966 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.09104687088764045 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.08782997008195878 # SSO:000029974__gamma-glutamyltransferase( EC:2.3.2.2 ) // 0.07831895026954003 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.07813081790987765 # SSO:000019936__MmgE/PrpD family protein // 0.07226342464161126 # SSO:000018202__IS21 family transposase // 0.07198603906241965 # SSO:000000886__Alkaline phosphatase (EC 3.1.3.1) // 0.06914664382953022 # SSO:000021359__Peptidase T (EC 3.4.11.-) // 0.06733849048688183 # SSO:000013194__DnaJ domain-containing protein // 0.06537944934777379 # SSO:000001768__Copper resistance protein CopC // 0.060848048592116226 # SSO:000009310__sortase // 0.059840422392077694 # SSO:000011926__Carbamoyltransferase // 0.05960117226933551 # SSO:000010255__Acyl-CoA dehydrogenase family protein // 0.05925864520554242 # SSO:000020771__Oligopeptide transporter OPT family // 0.05638926615899339


Numer of genomes:3813
Number of genomic features :6065
Shape of y:(3813,)
Count of 1: 1711
Count of 0: 2102
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:591
Number of genomic features :3441
Shape of y:(591,)
Count of 1: 103
Count of 0: 488
Running Decision Tree for phenotype raffinose--fermentation




Running Random Forest for phenotype raffinose--fermentation




Running CatBoost for phenotype raffinose--fermentation




######### Combined report for raffinose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.831933,0.846669,0.831933,0.838221,0.729474,TP=88 TN=11 FP=12FN=8,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.27748486005425815 # SSO:000001419__Carbonic anhydrase (EC 4.2.1.1) // 0.12339773241017465 # SSO:000021634__Phosphodiesterase // 0.04571951650032719 # SSO:000022980__PspC domain-containing protein // 0.037386075898318744 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.02570761479244152 # SSO:000031325__penicillin-binding protein 2 // 0.02483982000778553 # SSO:000000366__4-hydroxy-tetrahydrodipicolinate synthase (EC 4.3.3.7) // 0.018004984232712188 # SSO:000005119__N-acetylornithine carbamoyltransferase (EC 2.1.3.9) // 0.01710691190579233 # SSO:000002978__GMP synthase (EC 6.3.5.2) // 0.013822960400011608 # SSO:000006808__Pyridoxal kinase (EC 2.7.1.35) // 0.012610079978817348 # SSO:000007517__Signal peptidase I (EC 3.4.21.89) // 0.012245271746984141 # SSO:000012973__DUF1850 domain-containing protein // 0.011069544922902262 # SSO:000020581__Nitrogenase( EC:1.18.6.1 ) // 0.010561538673217458 # SSO:000007113__Ribose-phosphate pyrophosphokinase (EC 2.7.6.1) // 0.010529518083664641 # SSO:000010307__Adenine specific DNA methyltransferase // 0.010024489760375045 # SSO:000002448__Electron transport complex protein RnfA // 0.009546354570129353 # SSO:000033667__transposase family protein // 0.009489404642426848 # SSO:000021805__Plasmid recombination enzyme // 0.009242726412592678 # SSO:000020989__Dihydropteridine reductase (EC 1.5.1.34) // 0.009024168777103284 # SSO:000026732__UbiD family decarboxylase // 0.009019891732477395
Random Forest,0.865546,0.865546,0.865546,0.865546,0.749474,TP=92 TN=11 FP=8FN=8,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.017296924847109273 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.011526407999638345 # SSO:000013154__Dipeptidase // 0.009249255458920832 # SSO:000006902__RNA polymerase sigma factor RpoD // 0.00879433688368602 # SSO:000018718__LD-transpeptidase // 0.008115396444302533 # SSO:000003352__Heat-inducible transcription repressor HrcA // 0.007628272404333787 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.00724492915229134 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.006812068817348041 # SSO:000043427__L-serine ammonia-lyase (EC 4.3.1.17) // 0.006478656776832928 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.006007388823781199 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.005736696528389075 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.005395004795617183 # SSO:000034571__Aldose 1-epimerase family protein // 0.004989940737515807 # SSO:000042577__NAD+ diphosphatase (EC 3.6.1.22) // 0.004969257768904064 # SSO:000005159__NAD kinase (EC 2.7.1.23) // 0.004882651099303028 # SSO:000029073__Xylose isomerase // 0.0045750999054277024 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.004563521286422039 # SSO:000001419__Carbonic anhydrase (EC 4.2.1.1) // 0.004307995720146552 # SSO:000022980__PspC domain-containing protein // 0.00429184555554003 # SSO:000013400__Endonuclease/exonuclease/phosphatase family protein // 0.004244338856940986
CatBoost,0.882353,0.882353,0.882353,0.882353,0.780789,TP=93 TN=12 FP=7FN=7,SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 2.0362102599992014 # SSO:000001419__Carbonic anhydrase (EC 4.2.1.1) // 0.38441938218495586 # SSO:000018718__LD-transpeptidase // 0.34742730238536007 # SSO:000013154__Dipeptidase // 0.34552491216930786 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.30255530367545114 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.29283628651620724 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.26817633935251933 # SSO:000034571__Aldose 1-epimerase family protein // 0.2627674321877335 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.2582477624666601 # SSO:000029838__dihydrolipoyl dehydrogenase // 0.2381399457243508 # SSO:000033667__transposase family protein // 0.21998382458117258 # SSO:000003352__Heat-inducible transcription repressor HrcA // 0.1815484790596479 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.17179987804123337 # SSO:000010639__Anaerobic C4-dicarboxylate transporter // 0.1623797202298137 # SSO:000008961__Xanthine phosphoribosyltransferase (EC 2.4.2.22) // 0.14832375336206507 # SSO:000033137__rubrerythrin family protein // 0.12417760414540198 # SSO:000031325__penicillin-binding protein 2 // 0.109672081272519 # SSO:000017578__HNH endonuclease // 0.1022410585629626 # SSO:000006902__RNA polymerase sigma factor RpoD // 0.09868556002467813 # SSO:000013194__DnaJ domain-containing protein // 0.0977728936981489


Numer of genomes:587
Number of genomic features :3584
Shape of y:(587,)
Count of 1: 213
Count of 0: 374
Running Decision Tree for phenotype D-mannose--fermentation




Running Random Forest for phenotype D-mannose--fermentation




Running CatBoost for phenotype D-mannose--fermentation




######### Combined report for D-mannose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.720339,0.73086,0.720339,0.723081,0.719807,TP=52 TN=33 FP=20FN=13,SSO:000007588__Site-specific tyrosine recombinase XerD // 0.15477368097557828 # SSO:000000975__Ammonium transporter // 0.11564184661902825 # SSO:000006523__Protein translocase subunit SecD // 0.04675527927935726 # SSO:000002288__Dihydrofolate reductase (EC 1.5.1.3) // 0.04608232292592883 # SSO:000009468__14-dihydroxy-2-naphthoate octaprenyltransferase // 0.0435575191464767 # SSO:000032555__putative glycoside hydrolase // 0.03302926072811097 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.03087389637491157 # SSO:000023719__Response regulator // 0.027793801803819136 # SSO:000001469__Cell division protein FtsQ // 0.027443102347622945 # SSO:000037085__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit D // 0.025342687309642835 # SSO:000012182__Chloride channel protein // 0.024211511760335664 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.02209329812289199 # SSO:000043675__4-phosphoerythronate dehydrogenase (EC 1.1.1.290) // 0.01929601021155083 # SSO:000025054__Sporulation integral membrane protein YtvI // 0.017510220274576906 # SSO:000020584__Nitroreductase family protein // 0.017106757558927804 # SSO:000011121__Beta-xylosidase // 0.016516879120531073 # SSO:000005432__Nucleotidyltransferase (EC 2.7.7.-) // 0.016248487933093036 # SSO:000012969__DUF1541 domain-containing protein // 0.015326217182847594 # SSO:000003224__Glutathione peroxidase (EC 1.11.1.9) // 0.01497751247859384 # SSO:000021071__PEP phosphonomutase // 0.012674322074728924
Random Forest,0.872881,0.87534,0.872881,0.873536,0.872283,TP=63 TN=40 FP=9FN=6,SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.00756454106088243 # SSO:000007588__Site-specific tyrosine recombinase XerD // 0.007193913208569627 # SSO:000030003__glucose-6-phosphate dehydrogenase // 0.005727601217441648 # SSO:000021634__Phosphodiesterase // 0.0051251827250393305 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0050749902593406025 # SSO:000010702__Antibiotic biosynthesis monooxygenase // 0.004644000857744545 # SSO:000005994__Phosphomevalonate kinase (EC 2.7.4.2) // 0.004554077067951603 # SSO:000000995__Anthranilate phosphoribosyltransferase (EC 2.4.2.18) // 0.004307122322214759 # SSO:000039286__Sugar O-acetyltransferase // 0.004306328125309471 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.004170205613215567 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.004117240306067253 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.004113831858220654 # SSO:000018718__LD-transpeptidase // 0.003909208892458666 # SSO:000013106__Diguanylate cyclase // 0.0038090236180057632 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.0036484225687604547 # SSO:000004907__Mevalonate kinase (EC 2.7.1.36) // 0.003524186086059385 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.0034319728289729217 # SSO:000002350__Diphosphomevalonate decarboxylase (EC 4.1.1.33) // 0.003411167860745879 # SSO:000007384__Selenocysteine-specific translation elongation factor // 0.003347743029931151 # SSO:000008068__Thymidylate synthase (EC 2.1.1.45) // 0.0032281090221883673
CatBoost,0.855932,0.858542,0.855932,0.856674,0.854469,TP=62 TN=39 FP=10FN=7,SSO:000018718__LD-transpeptidase // 0.42717946014715286 # SSO:000034571__Aldose 1-epimerase family protein // 0.3785070224438052 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.3148915031323437 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.31338971990207487 # SSO:000002270__Diacylglycerol kinase (EC 2.7.1.107) // 0.2648932567990381 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.24483930628307 # SSO:000000075__23-bisphosphoglycerate-independent phosphoglycerate mutase (EC 5.4.2.12) // 0.22169164811005315 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.21919155504899648 # SSO:000021897__Polysaccharide deacetylase // 0.17633171294830052 # SSO:000039286__Sugar O-acetyltransferase // 0.16034529578287512 # SSO:000007588__Site-specific tyrosine recombinase XerD // 0.14094465625784372 # SSO:000021634__Phosphodiesterase // 0.13476433929883805 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.12083424235740771 # SSO:000026421__Tyrosine recombinase XerC // 0.11957904905148664 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.11796124198816428 # SSO:000002992__GTP-binding protein // 0.11485802906002056 # SSO:000007384__Selenocysteine-specific translation elongation factor // 0.11456858297443942 # SSO:000023750__Pyridoxamine 5'-phosphate oxidase family protein // 0.11101622572958814 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.10872479404371911 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.10718542488061522


Numer of genomes:3146
Number of genomic features :5826
Shape of y:(3146,)
Count of 1: 711
Count of 0: 2435
Running Decision Tree for phenotype urea--hydrolysis




Running Random Forest for phenotype urea--hydrolysis




Running CatBoost for phenotype urea--hydrolysis




######### Combined report for urea--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.809524,0.81346,0.809524,0.811346,0.737243,TP=424 TN=86 FP=64FN=56,SSO:000008866__Urease accessory protein UreG // 0.20839660917164476 # SSO:000035277__Cytochrome c oxidase subunit 4 (EC 1.9.3.1) // 0.05844197054183102 # SSO:000003687__Hypoxanthine-guanine phosphoribosyltransferase (EC 2.4.2.8) // 0.04642682396848987 # SSO:000001851__Cystathionine beta-lyase (EC 4.4.1.8) // 0.028541667750801268 # SSO:000024558__Serine protease // 0.026859639267374162 # SSO:000000065__16S rRNA processing protein RimM // 0.02478365445959815 # SSO:000008864__Urease accessory protein UreE // 0.019487756498141523 # SSO:000002974__GDP-mannose mannosyl hydrolase (EC 3.6.1.-) // 0.018568520216986278 # SSO:000043675__4-phosphoerythronate dehydrogenase (EC 1.1.1.290) // 0.015392183233684548 # SSO:000042813__amino-acid N-acetyltransferase (EC 2.3.1.1) // 0.01514278490400324 # SSO:000005313__Nicotinate phosphoribosyltransferase (EC 6.3.4.21) // 0.014923631596448234 # SSO:000002338__Dimethylsulfoniopropionate demethylase (EC 2.1.1.269) // 0.011552302018028279 # SSO:000002128__DNA translocase FtsK // 0.010260688806878809 # SSO:000002815__Flagellar hook-length control protein FliK // 0.009999480924843696 # SSO:000002475__Endonuclease V (EC 3.1.21.7) // 0.00972745774119766 # SSO:000039887__UDP-23-diacylglucosamine diphosphatase LpxI (EC 3.6.1.54) // 0.008275751013342856 # SSO:000002249__Deoxynucleoside kinase (EC 2.7.1.113 ) // 0.00795823189246551 # SSO:000025241__Sugar kinase // 0.007923571256345394 # SSO:000002702__Ferredoxin (EC 1.18.1.3 ) // 0.007819781510121478 # SSO:000030940__lipid A biosynthesis acyltransferase // 0.006927840466094779
Random Forest,0.84127,0.846436,0.84127,0.843499,0.787693,TP=432 TN=98 FP=56FN=44,SSO:000008866__Urease accessory protein UreG // 0.017387860106949277 # SSO:000008865__Urease accessory protein UreF // 0.01709674484254767 # SSO:000008863__Urease accessory protein UreD // 0.01483891709845021 # SSO:000008864__Urease accessory protein UreE // 0.011279997989325054 # SSO:000044064__phosphoserine transaminase (EC 2.6.1.52) // 0.004816497038676926 # SSO:000035277__Cytochrome c oxidase subunit 4 (EC 1.9.3.1) // 0.004697829090230391 # SSO:000021661__Phosphoglyceromutase // 0.0036849388843789095 # SSO:000006841__Pyruvate carboxylase (EC 6.4.1.1) // 0.0036819429085118956 # SSO:000013395__Endonuclease NucS // 0.003524944473218807 # SSO:000009218__phosphoribosylformylglycinamidine synthase (EC 6.3.5.3 ) // 0.0032899424532359845 # SSO:000043620__mycothiol synthase (EC 2.3.1.189) // 0.002992532053105829 # SSO:000029759__coproporphyrinogen III oxidase // 0.002929512962164338 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.0028365729166632213 # SSO:000008953__WhiB family transcriptional regulator // 0.0028174610607907802 # SSO:000024157__Ribose 5-phosphate isomerase (EC 5.3.1.6) // 0.0027838277888978043 # SSO:000029992__globin // 0.0026461030291123505 # SSO:000031624__prepilin-type N-terminal cleavage/methylation domain-containing protein // 0.002639775446934119 # SSO:000028881__Uroporphyrinogen decarboxylase // 0.0025419732021236974 # SSO:000002076__DNA mismatch repair protein MutS // 0.0024724774130141496 # SSO:000001853__Cystathionine beta-synthase (EC 4.2.1.22) // 0.0024719109229830462
CatBoost,0.838095,0.843345,0.838095,0.840369,0.783148,TP=431 TN=97 FP=57FN=45,SSO:000008866__Urease accessory protein UreG // 0.8466042117697495 # SSO:000008865__Urease accessory protein UreF // 0.7269384707785509 # SSO:000008863__Urease accessory protein UreD // 0.36620545845150043 # SSO:000008864__Urease accessory protein UreE // 0.36023140133806436 # SSO:000001444__Catalase (EC 1.11.1.6) // 0.12594658547359286 # SSO:000035277__Cytochrome c oxidase subunit 4 (EC 1.9.3.1) // 0.10899723281205023 # SSO:000013395__Endonuclease NucS // 0.10682636189233788 # SSO:000031624__prepilin-type N-terminal cleavage/methylation domain-containing protein // 0.10460105940593625 # SSO:000016627__Fatty acid cis/trans isomerase // 0.10248081956014385 # SSO:000024243__Rubredoxin // 0.10190579025309664 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.09440529459237688 # SSO:000017273__Glutaminase A // 0.09354402743468865 # SSO:000013106__Diguanylate cyclase // 0.08710822902858771 # SSO:000009089__dTDP-4-dehydrorhamnose 35-epimerase (EC 5.1.3.13) // 0.0812326579892465 # SSO:000005483__Oleate hydratase (EC 4.2.1.53) // 0.08113804092183272 # SSO:000021734__Phosphotriesterase related protein // 0.07428465186906148 # SSO:000039114__SapC family protein // 0.07185805432187126 # SSO:000018768__L-lactate permease // 0.07046989300546956 # SSO:000002128__DNA translocase FtsK // 0.06896528885812901 # SSO:000006977__Regulatory protein RecX // 0.06686404462767725


Numer of genomes:495
Number of genomic features :3225
Shape of y:(495,)
Count of 1: 58
Count of 0: 437
Running Decision Tree for phenotype L-glutamate--degradation




Running Random Forest for phenotype L-glutamate--degradation




Running CatBoost for phenotype L-glutamate--degradation




######### Combined report for L-glutamate--degradation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.929293,0.938032,0.929293,0.932323,0.894007,TP=81 TN=11 FP=5FN=2,SSO:000003148__Glutamate decarboxylase (EC 4.1.1.15) // 0.20258284938387255 # SSO:000021054__PASTA domain containing protein // 0.08154637396100274 # SSO:000038542__Phosphomethylpyrimidine synthase ThiC (EC 4.1.99.17) // 0.07646545663421081 # SSO:000017389__Glycosyl hydrolase family 5 // 0.04665894592534029 # SSO:000042254__pyruvate synthase (EC 1.2.7.1) // 0.04032570193783608 # SSO:000024474__Sensor histidine kinase // 0.03229182557663682 # SSO:000033686__trehalose-phosphatase // 0.029111385748475636 # SSO:000013134__Dihydroorotate dehydrogenase // 0.02814689140448511 # SSO:000001470__Cell division protein FtsW // 0.019005801831361295 # SSO:000024332__SH3 domain-containing protein // 0.016380379268617275 # SSO:000012612__Cytochrome b561 // 0.009864143432932881 # SSO:000018657__Isoprenylcysteine carboxyl methyltransferase family protein // 0.009114555406795215 # SSO:000009681__4-hydroxybenzoate 3-monooxygenase( EC:1.14.13.2 ) // 0.004999284495125526 # SSO:000011117__Beta-mannosidase // 0.0032497833041195416 # SSO:000026007__Transglutaminase-like protein // 0.0023890689967537605 # SSO:000004056__Isoleucyl-tRNA synthetase (EC 6.1.1.5) // 0.002134052252409172 # SSO:000025455__TRNA and rRNA cytosine-C5-methylase // 0.0 # SSO:000012875__DNA sulfur modification protein DndB // 0.0 # SSO:000033047__pyruvate carboxyltransferase // 0.0 # SSO:000013192__DnaD domain protein // 0.0
Random Forest,0.959596,0.963396,0.959596,0.960801,0.944097,TP=83 TN=12 FP=3FN=1,SSO:000003148__Glutamate decarboxylase (EC 4.1.1.15) // 0.015181564858980534 # SSO:000020341__NADH:flavin oxidoreductase // 0.007640926884202476 # SSO:000011068__Phenylacetate CoA-ligase (EC 6.2.1.30) // 0.0075763858417310535 # SSO:000021837__Polyamine ABC transporter ATP-binding protein // 0.0072795027404373115 # SSO:000009749__Adenosine kinase (EC 2.7.1.20) // 0.00655658386741254 # SSO:000038542__Phosphomethylpyrimidine synthase ThiC (EC 4.1.99.17) // 0.005999816910843031 # SSO:000012966__DUF1275 domain-containing protein // 0.0054472249831275295 # SSO:000000536__AMP nucleosidase (EC 3.2.2.4) // 0.0053910545568473245 # SSO:000020652__Nucleoside Triphosphate Pyrophosphohydrolase (EC 3.6.1.8) // 0.005091680145731767 # SSO:000002535__Energy-dependent translational throttle protein EttA // 0.004964734239510821 # SSO:000000826__Adenylylsulfate kinase (EC 2.7.1.25) // 0.004918297467477066 # SSO:000026225__Tryptophan-rich sensory protein // 0.004828655587754529 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.004801296817344541 # SSO:000002246__Deoxyguanosinetriphosphate triphosphohydrolase (EC 3.1.5.1) // 0.004727086728914033 # SSO:000017273__Glutaminase A // 0.004703686075418625 # SSO:000007088__Ribonuclease M5 (EC 3.1.26.8) // 0.00459907174884061 # SSO:000018454__Inositol monophosphatase // 0.004587138100375325 # SSO:000021054__PASTA domain containing protein // 0.00426223768707629 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.004204588227804195 # SSO:000025277__Sulfotransferase // 0.0039805850118531366
CatBoost,0.939394,0.950785,0.939394,0.942713,0.932469,TP=81 TN=12 FP=5FN=1,SSO:000003148__Glutamate decarboxylase (EC 4.1.1.15) // 1.8099051387174896 # SSO:000011068__Phenylacetate CoA-ligase (EC 6.2.1.30) // 0.543744735123276 # SSO:000012966__DUF1275 domain-containing protein // 0.35326617321986375 # SSO:000021269__Patatin family protein // 0.31179228054006325 # SSO:000002246__Deoxyguanosinetriphosphate triphosphohydrolase (EC 3.1.5.1) // 0.30784441191341205 # SSO:000021837__Polyamine ABC transporter ATP-binding protein // 0.27501301107089254 # SSO:000012739__DJ-1/PfpI family protein // 0.2581845881175211 # SSO:000026225__Tryptophan-rich sensory protein // 0.21692245193231727 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.21002203328617228 # SSO:000007416__Septum formation protein Maf // 0.19169830736978136 # SSO:000042344__threonine ammonia-lyase (EC 4.3.1.19) // 0.1632492512007778 # SSO:000005468__O-methyltransferase (EC 2.1.1.-) // 0.15985003128227218 # SSO:000042254__pyruvate synthase (EC 1.2.7.1) // 0.14476986302726008 # SSO:000018905__Leucine-rich repeat protein // 0.14062119803476023 # SSO:000038542__Phosphomethylpyrimidine synthase ThiC (EC 4.1.99.17) // 0.1335607372429069 # SSO:000031065__methionine synthase // 0.13008506100091535 # SSO:000009749__Adenosine kinase (EC 2.7.1.20) // 0.12111554127954902 # SSO:000017273__Glutaminase A // 0.11424410425034778 # SSO:000021054__PASTA domain containing protein // 0.10806268081738471 # SSO:000000735__Aconitate hydratase (EC 4.2.1.3) // 0.10666633029106218


Numer of genomes:2433
Number of genomic features :5577
Shape of y:(2433,)
Count of 1: 638
Count of 0: 1795
Running Decision Tree for phenotype arginine--hydrolysis




Running Random Forest for phenotype arginine--hydrolysis




Running CatBoost for phenotype arginine--hydrolysis




######### Combined report for arginine--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.823409,0.843435,0.823409,0.829684,0.807829,TP=307 TN=94 FP=59FN=27,SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 0.2390967828175424 # SSO:000001398__Carbamate kinase (EC 2.7.2.2) // 0.07280429937191853 # SSO:000021848__Polyhydroxyalkanoate synthesis repressor PhaR // 0.041646556189641595 # SSO:000016592__FMN-dependent NADH-azoreductase (EC 1.7.1.6) // 0.040917446462971525 # SSO:000012985__DUF309 domain-containing protein // 0.032219304947393626 # SSO:000039072__S-(hydroxymethyl)mycothiol dehydrogenase (EC 1.1.1.306) // 0.025835207082415938 # SSO:000000263__34-dihydroxy-2-butanone 4-phosphate synthase (EC 4.1.99.12) // 0.013618367752609026 # SSO:000006147__Polyketide synthase // 0.012732206222377546 # SSO:000025487__Taurine dioxygenase (EC 1.14.11.17) // 0.011557421606388275 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.01110428754220523 # SSO:000033632__transglutaminase family protein // 0.009749655678192939 # SSO:000036033__Galactose/methyl galactoside ABC transporter ATP-binding protein MglA (EC 3.6.3.17) // 0.009721559477309495 # SSO:000023826__RNA helicase // 0.008884811982962466 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.008799691592952682 # SSO:000024883__Small multidrug export protein // 0.008459750748423889 # SSO:000025595__Thioesterase // 0.008384061929394062 # SSO:000017940__Hydrogenase maturation protease // 0.00745744184281251 # SSO:000018641__Isochorismatase family protein // 0.007448083261933902 # SSO:000029781__cytochrome b6 // 0.006948216949860203 # SSO:000003227__Glutathione synthetase (EC 6.3.2.3) // 0.006630070082899222
Random Forest,0.848049,0.859798,0.848049,0.852026,0.826988,TP=318 TN=95 FP=48FN=26,SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 0.023082270074420697 # SSO:000001398__Carbamate kinase (EC 2.7.2.2) // 0.015372317441065018 # SSO:000002230__DegV family protein // 0.004478428718934606 # SSO:000010798__Arginine repressor // 0.004307142090786574 # SSO:000043994__3-deoxy-8-phosphooctulonate synthase (EC 2.5.1.55) // 0.0039050134056655963 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.0038534313253526936 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.003764483362624171 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.003755334336730309 # SSO:000034830__D-ribose pyranase (EC 5.4.99.62) // 0.003650493508761969 # SSO:000012216__Chromate transporter // 0.0035342190418853415 # SSO:000000281__3-deoxy-manno-octulosonate cytidylyltransferase (EC 2.7.7.38) // 0.0032454571441960195 # SSO:000009323__tRNA (Adenine-N(1)-) -methyltransferase (EC 2.1.1.36) // 0.003131252345779944 # SSO:000008104__TonB-dependent receptor // 0.0031180228145373293 # SSO:000036573__LPS-assembly protein LptD // 0.003102515554871919 # SSO:000008604__UDP-3-O-3-hydroxymyristoyl glucosamine N-acyltransferase (EC 2.3.1.-) // 0.002960750528232024 # SSO:000023973__TonB family protein // 0.0029562115996574338 # SSO:000042897__acetoacetate---CoA ligase (EC 6.2.1.16) // 0.0029270709937975602 # SSO:000007416__Septum formation protein Maf // 0.0029175150832949736 # SSO:000001076__Arylamine N-acetyltransferase (EC 2.3.1.5) // 0.0025892239415678435 # SSO:000006532__Protein-L-isoaspartate O-methyltransferase (EC 2.1.1.77) // 0.0025446607287615015
CatBoost,0.852156,0.869112,0.852156,0.85715,0.843551,TP=315 TN=100 FP=51FN=21,SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 1.9568854637373156 # SSO:000001398__Carbamate kinase (EC 2.7.2.2) // 0.7791569184278662 # SSO:000012216__Chromate transporter // 0.1864765188710795 # SSO:000017665__Heavy metal translocating P-type ATPase // 0.18294543702262844 # SSO:000030137__histone deacetylase // 0.16449577383294314 # SSO:000025595__Thioesterase // 0.1264409702242406 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.12577599560051655 # SSO:000000536__AMP nucleosidase (EC 3.2.2.4) // 0.12470073539164353 # SSO:000034830__D-ribose pyranase (EC 5.4.99.62) // 0.11495195249387212 # SSO:000025627__Thioredoxin domain-containing protein // 0.0996654970726757 # SSO:000018768__L-lactate permease // 0.09908515580188526 # SSO:000016742__Fic family protein // 0.09467481328117217 # SSO:000006147__Polyketide synthase // 0.0936198713711654 # SSO:000023973__TonB family protein // 0.08836623810408273 # SSO:000024633__Siderophore-interacting protein // 0.08340015348639118 # SSO:000017027__GTPase // 0.08248545416788544 # SSO:000000308__3-isopropylmalate dehydrogenase (EC 1.1.1.85) // 0.07949378204372974 # SSO:000018202__IS21 family transposase // 0.0778058150345662 # SSO:000007828__Sulfoacetaldehyde acetyltransferase (EC 2.3.3.15) // 0.07448684143964664 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.07107276560148998


Numer of genomes:616
Number of genomic features :4084
Shape of y:(616,)
Count of 1: 85
Count of 0: 531
Running Decision Tree for phenotype sorbitol--builds_acid_from




Running Random Forest for phenotype sorbitol--builds_acid_from




Running CatBoost for phenotype sorbitol--builds_acid_from




######### Combined report for sorbitol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.814516,0.846861,0.814516,0.828056,0.680556,TP=93 TN=8 FP=15FN=8,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.19909585384568 # SSO:000012290__CoA-binding protein // 0.08338163322998596 # SSO:000005916__Phosphate acetyltransferase (EC 2.3.1.8) // 0.07325079172598596 # SSO:000030895__iron-sulfur cluster assembly accessory protein // 0.05021052503188143 # SSO:000016806__Flagellar protein // 0.04741383286250956 # SSO:000012512__Cupin domain-containing protein // 0.04098245255761611 # SSO:000001067__Arsenate reductase (EC 1.20.4.1) // 0.03179809156165196 # SSO:000016902__Fructoselysine 3-epimerase // 0.024686544675211263 # SSO:000006915__RNA polymerase sigma-70 factor (EC 2.7.7.6 ) // 0.02371515100539208 # SSO:000002086__DNA polymerase IV (EC 2.7.7.7) // 0.015353039753254291 # SSO:000000685__Acetolactate synthase large subunit (EC 2.2.1.6) // 0.01258874080849142 # SSO:000025785__Transcriptional antiterminator // 0.012135476776464055 # SSO:000013541__Exopolysaccharide biosynthesis protein // 0.011917002259283167 # SSO:000003297__Glycolate oxidase (EC 1.1.3.15) // 0.011033666765493439 # SSO:000041675__WD40 repeat domain-containing protein // 0.009819173715726723 # SSO:000019681__Phage minor capsid protein // 0.009245409523590462 # SSO:000025693__TolA family protein // 0.00792548237072879 # SSO:000009074__cytidine/deoxycytidylate deaminase family protein (EC 3.5.4.3 ) // 0.007750187970555313 # SSO:000029929__fimbrial biogenesis outer membrane usher protein // 0.007685916784101904 # SSO:000036600__Lantibiotic ABC transporter permease // 0.007327515959150352
Random Forest,0.879032,0.869828,0.879032,0.873604,0.690972,TP=102 TN=7 FP=6FN=9,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.011357221489948825 # SSO:000025597__Thioesterase family protein // 0.00921859253653908 # SSO:000020835__OsmC family protein // 0.007099671741357823 # SSO:000012290__CoA-binding protein // 0.006882672646702169 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.005802498559641264 # SSO:000035974__Fructose-6-phosphate aldolase // 0.0057335495034919306 # SSO:000036322__S-methyl-5-thioribose-1-phosphate isomerase (EC 5.3.1.23) // 0.0055217543258183795 # SSO:000012248__Citrate transporter // 0.004849912097780194 # SSO:000010141__AbrB family transcriptional regulator // 0.004536462697441685 # SSO:000001582__Citrate lyase holo-acyl-carrier-protein synthase (EC 2.7.7.61) // 0.0033670575006249323 # SSO:000007779__Succinate dehydrogenase flavoprotein subunit (EC 1.3.99.1) // 0.003094921534486448 # SSO:000005953__Phosphoenolpyruvate carboxykinase ATP (EC 4.1.1.49) // 0.0030271695982539426 # SSO:000007796__Sucrose-6-phosphate hydrolase (EC 3.2.1.26) // 0.003022177256321536 # SSO:000004903__Methylthioribulose-1-phosphate dehydratase (EC 4.2.1.109) // 0.0029269264062875117 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.0028999634094184087 # SSO:000024332__SH3 domain-containing protein // 0.002834163383248171 # SSO:000012972__DUF1801 domain-containing protein // 0.002805860934692398 # SSO:000043098__myo-inosose-2 dehydratase (EC 4.2.1.44) // 0.002693690711999254 # SSO:000033847__zinc-binding dehydrogenase // 0.002691065464141036 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.002558949811840242
CatBoost,0.870968,0.864223,0.870968,0.867259,0.686343,TP=101 TN=7 FP=7FN=9,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 1.2730700199472105 # SSO:000025597__Thioesterase family protein // 1.0726278560628908 # SSO:000012290__CoA-binding protein // 0.7169375955343725 # SSO:000006853__Pyruvate oxidase (EC 1.2.3.3) // 0.3173975018452806 # SSO:000012248__Citrate transporter // 0.2944268571482644 # SSO:000005916__Phosphate acetyltransferase (EC 2.3.1.8) // 0.2854523625238517 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.2421862923674282 # SSO:000007796__Sucrose-6-phosphate hydrolase (EC 3.2.1.26) // 0.19988372043088665 # SSO:000010090__ATPase // 0.17716723399425208 # SSO:000035974__Fructose-6-phosphate aldolase // 0.15675944481677784 # SSO:000033847__zinc-binding dehydrogenase // 0.1466278486040665 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.1435728786910579 # SSO:000013242__DoxX family protein // 0.1405594601980362 # SSO:000012389__Conjugal transfer protein TraA // 0.13206627862420006 # SSO:000033834__xylulokinase // 0.12940698050789076 # SSO:000016611__FTR1 family protein // 0.11642053229941705 # SSO:000024925__Sodium-dependent transporter // 0.10158573844280087 # SSO:000021359__Peptidase T (EC 3.4.11.-) // 0.0976323782613874 # SSO:000009809__ABC transporter // 0.09477052047977089 # SSO:000002765__Ferrous iron transport protein A // 0.09446448608006552


Numer of genomes:1024
Number of genomic features :4395
Shape of y:(1024,)
Count of 1: 466
Count of 0: 558
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:877
Number of genomic features :4207
Shape of y:(877,)
Count of 1: 106
Count of 0: 771
Running Decision Tree for phenotype myo-inositol--builds_acid_from




Running Random Forest for phenotype myo-inositol--builds_acid_from




Running CatBoost for phenotype myo-inositol--builds_acid_from




######### Combined report for myo-inositol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.880682,0.882912,0.880682,0.881756,0.746661,TP=142 TN=13 FP=11FN=10,SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.2981399655549341 # SSO:000030039__glycoside hydrolase family protein // 0.05226463900997849 # SSO:000010153__Acetolactate synthase // 0.025075865080874758 # SSO:000002472__Endonuclease III (EC 4.2.99.18) // 0.02462673170362834 # SSO:000002765__Ferrous iron transport protein A // 0.021702159776117946 # SSO:000024258__Class II aldolase // 0.016604160434806677 # SSO:000021037__PAS domain-containing protein // 0.015330097174670194 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.014329868940958012 # SSO:000005967__Phosphoglucomutase (EC 5.4.2.2) // 0.014315225425827706 # SSO:000018905__Leucine-rich repeat protein // 0.01417576593426575 # SSO:000031351__peptidase S24 // 0.0141186534518214 # SSO:000030067__glycosyltransferase family 2 protein // 0.013287248094429223 # SSO:000003526__Hydroxymethylglutaryl-CoA reductase (EC 1.1.1.34) // 0.013078229154920821 # SSO:000012567__Cysteine ABC transporter substrate-binding protein // 0.012311730972782785 # SSO:000018191__IS110 family transposase // 0.011886432016310865 # SSO:000003169__Glutamine synthetase (EC 6.3.1.2) // 0.008970688318257784 # SSO:000000670__Acetate kinase (EC 2.7.2.1) // 0.008755408995990534 # SSO:000043564__precorrin-2 dehydrogenase (EC 1.3.1.76) // 0.008100412379708384 # SSO:000042350__L-lysine 6-transaminase (EC 2.6.1.36) // 0.007650683711974419 # SSO:000023757__Pyrimidine nucleoside transporter // 0.007359445343744302
Random Forest,0.909091,0.903361,0.909091,0.905348,0.763001,TP=147 TN=13 FP=6FN=10,SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.01888677850832584 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.018389755735837936 # SSO:000043098__myo-inosose-2 dehydratase (EC 4.2.1.44) // 0.013931731157163086 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.009256127805218286 # SSO:000010813__Leucyl aminopeptidase (EC 3.4.11.10) // 0.006772153814540092 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.0064406216198254675 # SSO:000000843__Agmatinase (EC 3.5.3.11) // 0.005239894950440196 # SSO:000007776__Succinate dehydrogenase cytochrome b-556 subunit // 0.004252702910634996 # SSO:000007779__Succinate dehydrogenase flavoprotein subunit (EC 1.3.99.1) // 0.0041893637756151205 # SSO:000018283__IclR-family transcriptional regulator // 0.0038662848288043674 # SSO:000012759__DNA gyrase inhibitor YacG // 0.0038509308156769785 # SSO:000004504__Lipoyl synthase (EC 2.8.1.8) // 0.0038366706407347655 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0037096883662112684 # SSO:000001541__Choline dehydrogenase (EC 1.1.99.1) // 0.0034594126677249693 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.0033779708016460856 # SSO:000006879__Quinone oxidoreductase (EC 1.6.5.5) // 0.00323420275629657 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.00319770182689401 # SSO:000006208__Precorrin-3B C(17)-methyltransferase (EC 2.1.1.131) // 0.003167183607866802 # SSO:000006206__Precorrin-2 C(20)-methyltransferase (EC 2.1.1.130) // 0.003153794244297893 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.0030961945244723068
CatBoost,0.909091,0.909091,0.909091,0.909091,0.799943,TP=145 TN=15 FP=8FN=8,SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 1.7159896782441177 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 1.4074101680092939 # SSO:000002315__Dihydropyrimidinase (EC 3.5.2.2) // 0.19914648359556744 # SSO:000043098__myo-inosose-2 dehydratase (EC 4.2.1.44) // 0.1925545540938052 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.154925063240817 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.14984815387710038 # SSO:000025362__TM2 domain containing protein // 0.13915099411317244 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.13183603835636507 # SSO:000022980__PspC domain-containing protein // 0.12647227265746555 # SSO:000007627__Spermidine synthase (EC 2.5.1.16) // 0.11564377490672002 # SSO:000019538__Metallophosphoesterase // 0.10919024823198352 # SSO:000002765__Ferrous iron transport protein A // 0.10845573387799244 # SSO:000030039__glycoside hydrolase family protein // 0.10097033845752028 # SSO:000002472__Endonuclease III (EC 4.2.99.18) // 0.09635550495531826 # SSO:000006208__Precorrin-3B C(17)-methyltransferase (EC 2.1.1.131) // 0.09408920995475557 # SSO:000013035__DedA family protein // 0.09391249974801058 # SSO:000031196__nucleotidyltransferase family protein // 0.08994656748746131 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.08874429518032555 # SSO:000007180__Rod shape-determining protein RodA // 0.08840717135944733 # SSO:000008678__Ubiquinol-cytochrome C reductase iron-sulfur subunit (EC 1.10.2.2) // 0.08445905227275705


Numer of genomes:908
Number of genomic features :4334
Shape of y:(908,)
Count of 1: 198
Count of 0: 710
Running Decision Tree for phenotype L-rhamnose--builds_acid_from




Running Random Forest for phenotype L-rhamnose--builds_acid_from




Running CatBoost for phenotype L-rhamnose--builds_acid_from




######### Combined report for L-rhamnose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.774725,0.802627,0.774725,0.784695,0.733524,TP=114 TN=27 FP=27FN=14,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.24402284137791436 # SSO:000037094__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit M // 0.07999843344604333 # SSO:000018777__L-rhamnose isomerase // 0.07851608983773062 # SSO:000000624__ATP-dependent protease subunit HslV (EC 3.4.25.2) // 0.04075129485165623 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.03996036538934529 # SSO:000000045__1-deoxy-D-xylulose 5-phosphate reductoisomerase (EC 1.1.1.267) // 0.031176849066823494 # SSO:000009334__tRNA (cytidine(34)-2'-O)-methyltransferase (EC 2.1.1.207) // 0.022358222590020554 # SSO:000012978__DUF1934 domain-containing protein // 0.022110551219229186 # SSO:000002316__Dihydroxy-acid dehydratase (EC 4.2.1.9) // 0.021469067287977945 # SSO:000007180__Rod shape-determining protein RodA // 0.017492376276925452 # SSO:000012386__Coniferyl aldehyde dehydrogenase (EC 1.2.1.68) // 0.016864673097196557 # SSO:000025554__Tetratricopeptide repeat protein // 0.014275244296193638 # SSO:000043513__adenosylmethionine decarboxylase (EC 4.1.1.50) // 0.014226272657147996 # SSO:000000428__5-methyltetrahydrofolate--homocysteine methyltransferase (EC 2.1.1.13) // 0.01381511119388719 # SSO:000025119__Sterol binding protein // 0.01068127338615349 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.009645962826038721 # SSO:000012328__Colanic acid biosynthesis acetyltransferase WcaF (EC 2.3.1.-) // 0.007964986289222256 # SSO:000000379__4-hydroxyphenylacetate decarboxylase small subunit (EC 4.1.1.83) // 0.007735959508066978 # SSO:000000270__3-aminobutyryl-CoA ammonia-lyase (EC 4.3.1.14) // 0.007690344395706669 # SSO:000012982__DUF2383 domain-containing protein // 0.007667545791754962
Random Forest,0.802198,0.809399,0.802198,0.805375,0.733956,TP=121 TN=25 FP=20FN=16,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.020469306490675177 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.015789821293240427 # SSO:000018777__L-rhamnose isomerase // 0.015137643084834621 # SSO:000007040__Rhamnulose-1-phosphate aldolase (EC 4.1.2.19) // 0.010444611451132587 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.0049626296739517925 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.004191406543817616 # SSO:000037093__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit L // 0.00401412290759152 # SSO:000005727__Pantoate--beta-alanine ligase (EC 6.3.2.1) // 0.0040102898585080565 # SSO:000000936__Altronate dehydratase (EC 4.2.1.7) // 0.003716892868197699 # SSO:000004083__Ketol-acid reductoisomerase (EC 1.1.1.86) // 0.0035628953995242804 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.0034990858116137305 # SSO:000006304__Prephenate dehydratase (EC 4.2.1.51) // 0.003361930211810461 # SSO:000037094__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit M // 0.0032830638360019063 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.003259617385068022 # SSO:000004670__Malate dehydrogenase (EC 1.1.1.37) // 0.0031288718484146793 # SSO:000000308__3-isopropylmalate dehydrogenase (EC 1.1.1.85) // 0.002834468171199685 # SSO:000000115__2-C-methyl-D-erythritol 24-cyclodiphosphate synthase (EC 4.6.1.12) // 0.002764517771940835 # SSO:000031065__methionine synthase // 0.002693430038463616 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.0026877835106891186 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.0026659605964814196
CatBoost,0.840659,0.845101,0.840659,0.842615,0.784726,TP=125 TN=28 FP=16FN=13,SSO:000018777__L-rhamnose isomerase // 1.4262763354472008 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 1.3850023168992824 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.5624911571527762 # SSO:000002988__GTP pyrophosphokinase (EC 2.7.6.5) // 0.25366031057934296 # SSO:000000387__4-oxalomesaconate tautomerase (EC 5.3.2.8) // 0.23156026858857034 # SSO:000043940__dihydrolipoyllysine-residue acetyltransferase (EC 2.3.1.12) // 0.20788905532555724 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.14602007281597237 # SSO:000037094__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit M // 0.12376118603663529 # SSO:000006304__Prephenate dehydratase (EC 4.2.1.51) // 0.11447198728532829 # SSO:000003153__Glutamate racemase (EC 5.1.1.3) // 0.11206484800651716 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.10312785417577364 # SSO:000025554__Tetratricopeptide repeat protein // 0.0978959553841962 # SSO:000010324__Adhesin // 0.09252726834614562 # SSO:000006855__Pyruvatephosphate dikinase (EC 2.7.9.1) // 0.09189501229692139 # SSO:000033632__transglutaminase family protein // 0.088255135238309 # SSO:000017946__Hydrolase // 0.08687105565114606 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.081357682381148 # SSO:000004699__Malto-oligosyltrehalose trehalohydrolase (EC 3.2.1.141) // 0.07911982881565677 # SSO:000018973__Lipopolysaccharide biosynthesis protein // 0.0775991421456274 # SSO:000002359__Diphthine--ammonia ligase (EC 6.3.1.14) // 0.068093081217638


Numer of genomes:1231
Number of genomic features :4469
Shape of y:(1231,)
Count of 1: 631
Count of 0: 600
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:961
Number of genomic features :4132
Shape of y:(961,)
Count of 1: 99
Count of 0: 862
Running Decision Tree for phenotype D-arabitol--builds_acid_from




Running Random Forest for phenotype D-arabitol--builds_acid_from




Running CatBoost for phenotype D-arabitol--builds_acid_from




######### Combined report for D-arabitol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.906736,0.898956,0.906736,0.902087,0.704769,TP=166 TN=9 FP=7FN=11,SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.11982454962928252 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.07668225186050821 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.049364520100272244 # SSO:000010243__Acyl-ACP thioesterase // 0.04478098662338088 # SSO:000010553__Amidohydrolase // 0.04144615170408087 # SSO:000011933__Carbohydrate-binding domain containing protein // 0.04040384864261364 # SSO:000039286__Sugar O-acetyltransferase // 0.02946275734558701 # SSO:000013134__Dihydroorotate dehydrogenase // 0.02831753939828973 # SSO:000019395__Mechanosensitive ion channel family protein // 0.028126983684286046 # SSO:000025747__Transcription antiterminator // 0.02478133215979231 # SSO:000024157__Ribose 5-phosphate isomerase (EC 5.3.1.6) // 0.02426880950466272 # SSO:000003277__Glycine oxidase ThiO (EC 1.4.3.19) // 0.023784031541097667 # SSO:000003240__Glycerate kinase (EC 2.7.1.31) // 0.02101670809289501 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.01972245388499662 # SSO:000000406__5-(carboxyamino)imidazole ribonucleotide synthase (EC 6.3.4.18) // 0.015155037265474588 # SSO:000009416__transcriptional regulator NanR // 0.014501725668148932 # SSO:000000587__ATP-dependent DNA ligase (EC 6.5.1.1) // 0.013234267368466796 # SSO:000007177__Rod shape-determining protein MreB // 0.012390744923245838 # SSO:000010617__Aminotransferase // 0.011825932925672758 # SSO:000016903__Fructoselysine 6-kinase // 0.010276950180617925
Random Forest,0.948187,0.951018,0.948187,0.940279,0.75,TP=173 TN=10 FP=0FN=10,SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.00528010682091244 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.005278177331976824 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.005177337381026972 # SSO:000043973__adenosylcobinamide-GDP ribazoletransferase (EC 2.7.8.26) // 0.004852866204588223 # SSO:000001968__Cytosine permease // 0.004659753302303336 # SSO:000006208__Precorrin-3B C(17)-methyltransferase (EC 2.1.1.131) // 0.004507214010286379 # SSO:000033834__xylulokinase // 0.004448684210042324 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.00429739307369378 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.003622415674340904 # SSO:000002255__Deoxyribose-phosphate aldolase (EC 4.1.2.4) // 0.003552661687168771 # SSO:000006176__Porphobilinogen synthase (EC 4.2.1.24) // 0.003473660941266649 # SSO:000000584__ATP-dependent DNA helicase recQ (EC 3.6.1.- ) // 0.003448077887716808 # SSO:000011135__Bile acid:sodium symporter // 0.003375185386680237 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.0032563657658554995 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.0032186914139429117 # SSO:000018792__L-threonine 3-dehydrogenase // 0.0031385676147711664 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.0030236542660207377 # SSO:000000896__Allantoate amidohydrolase (EC 3.5.3.9) // 0.0028271022253641377 # SSO:000018454__Inositol monophosphatase // 0.002815493624935897 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.002792839082808153
CatBoost,0.943005,0.939239,0.943005,0.937525,0.76922,TP=171 TN=11 FP=2FN=9,SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.7052443695120687 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.5522417080668015 # SSO:000002255__Deoxyribose-phosphate aldolase (EC 4.1.2.4) // 0.32993630199973206 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.28156238207889756 # SSO:000010300__Adenine deaminase (EC 3.5.4.2) // 0.2714927250988376 # SSO:000005171__NAD(P)H-hydrate epimerase (EC 5.1.99.6) // 0.2660459623503621 # SSO:000001968__Cytosine permease // 0.25076688861361945 # SSO:000033834__xylulokinase // 0.23246142558428642 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.1727345233375241 # SSO:000010245__Acyl-CoA dehydrogenase // 0.1513736515359185 # SSO:000020765__Oligopeptide ABC transporter substrate-binding protein // 0.1426511969586861 # SSO:000009045__bacteriocin immunity protein // 0.13949526090838427 # SSO:000010553__Amidohydrolase // 0.13661824533888414 # SSO:000016827__Flavocytochrome c // 0.13254463481393358 # SSO:000000355__4-carboxymuconolactone decarboxylase (EC 4.1.1.44) // 0.12132474414204299 # SSO:000024007__Replication protein // 0.11991350993940797 # SSO:000010855__Asparaginase // 0.1179098042131718 # SSO:000002619__Exodeoxyribonuclease III (EC 3.1.11.2) // 0.11373360706309454 # SSO:000042957__acetyl-CoA carboxylase (EC 6.4.1.2) // 0.11369437435631469 # SSO:000018454__Inositol monophosphatase // 0.10985319442660886


Numer of genomes:1239
Number of genomic features :4514
Shape of y:(1239,)
Count of 1: 417
Count of 0: 822
Running Decision Tree for phenotype L-arabinose--builds_acid_from




Running Random Forest for phenotype L-arabinose--builds_acid_from




Running CatBoost for phenotype L-arabinose--builds_acid_from




######### Combined report for L-arabinose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.810484,0.822317,0.810484,0.814116,0.801891,TP=141 TN=60 FP=30FN=17,SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.3146854576419568 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.08949608846256263 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0426119222323393 # SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.02365136889087136 # SSO:000042934__methylcrotonoyl-CoA carboxylase (EC 6.4.1.4) // 0.023530768680487747 # SSO:000020835__OsmC family protein // 0.020562007594361213 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.01905183596810514 # SSO:000011397__Bmp family protein // 0.018434927533379438 # SSO:000018283__IclR-family transcriptional regulator // 0.015350318330587339 # SSO:000031462__plasmid stabilization protein // 0.013471411225120987 # SSO:000002250__Deoxyribodipyrimidine photolyase (EC 4.1.99.3) // 0.012390149512171248 # SSO:000000870__Aldose 1-epimerase (EC 5.1.3.3) // 0.01045476481981831 # SSO:000008105__TonB-dependent siderophore receptor // 0.009707063043344209 # SSO:000023738__Pyridine nucleotide-disulfide oxidoreductase // 0.007501731375516743 # SSO:000010058__ATP-dependent helicase HrpB // 0.006897632212300135 # SSO:000002031__D-lactate dehydrogenase (EC 1.1.1.28) // 0.006505863757192412 # SSO:000016590__FMN-binding negative transcriptional regulator // 0.006075325078693472 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.005901842051932948 # SSO:000019294__Maleate cis-trans isomerase // 0.005847930770676376 # SSO:000043274__dimethylargininase (EC 3.5.3.18) // 0.005751517173389408
Random Forest,0.782258,0.801223,0.782258,0.787603,0.777854,TP=135 TN=59 FP=36FN=18,SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.028555759319523203 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.018112359896129953 # SSO:000029073__Xylose isomerase // 0.015644008058508196 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.012479380006666872 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.00864023188344567 # SSO:000033834__xylulokinase // 0.007923749600724518 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.007507414530637535 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.006147791240673109 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.006030112938707643 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.004789482386187652 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.0040630897829309395 # SSO:000013098__Dienelactone hydrolase family protein // 0.00396441182559511 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.0038628709885485256 # SSO:000039420__Tagaturonate reductase (EC 1.1.1.58) // 0.003785656147853647 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.0034642684767306926 # SSO:000000936__Altronate dehydratase (EC 4.2.1.7) // 0.0034321033687320754 # SSO:000003118__Glucose 1-dehydrogenase (EC 1.1.1.47) // 0.0032789487929192574 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.003074036161825214 # SSO:000008019__Thiazole synthase (EC 2.8.1.10) // 0.003002485904819007 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.002895547574921695
CatBoost,0.850806,0.859299,0.850806,0.853301,0.845409,TP=147 TN=64 FP=24FN=13,SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 3.402245833326693 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.5099410105157787 # SSO:000029073__Xylose isomerase // 0.20188121777228246 # SSO:000020835__OsmC family protein // 0.1570278613242039 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.13891201733173658 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.11349421930438708 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.1003189363569518 # SSO:000011397__Bmp family protein // 0.09840389860510491 # SSO:000003095__Glucarate dehydratase (EC 4.2.1.40) // 0.09349607355468502 # SSO:000033686__trehalose-phosphatase // 0.08490507366484855 # SSO:000006900__RNA polymerase sigma factor // 0.0827179334380049 # SSO:000008020__Thiazole tautomerase TenI (EC 5.3.99.10) // 0.08210308455311612 # SSO:000017027__GTPase // 0.07982626909794244 # SSO:000012628__Cytochrome c oxidase subunit I // 0.07208539884873723 # SSO:000004554__Low-specificity L-threonine aldolase (EC 4.1.2.48) // 0.06830703906560423 # SSO:000004128__L-aspartate oxidase (EC 1.4.3.16) // 0.06470982736979347 # SSO:000011951__Carboxylate-amine ligase // 0.06420313086651905 # SSO:000013541__Exopolysaccharide biosynthesis protein // 0.0584279203887509 # SSO:000037088__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit G // 0.05804705969215505 # SSO:000035439__DUF402 domain-containing protein // 0.05651466092245543


Numer of genomes:1446
Number of genomic features :4653
Shape of y:(1446,)
Count of 1: 780
Count of 0: 666
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:1199
Number of genomic features :4542
Shape of y:(1199,)
Count of 1: 932
Count of 0: 267
Running Decision Tree for phenotype D-glucose--builds_acid_from




Running Random Forest for phenotype D-glucose--builds_acid_from




Running CatBoost for phenotype D-glucose--builds_acid_from




######### Combined report for D-glucose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.7375,0.738849,0.7375,0.738162,0.663573,TP=32 TN=145 FP=31FN=32,SSO:000030003__glucose-6-phosphate dehydrogenase // 0.16133290980641743 # SSO:000000701__Acetyl-CoA C-acyltransferase (EC 2.3.1.16) // 0.10420177415277102 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.047282817586902774 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.043717183686514775 # SSO:000012290__CoA-binding protein // 0.03164939433858394 # SSO:000017884__HoxN/HupN/NixA family nickel/cobalt transporter // 0.02342527359608842 # SSO:000010272__Acyl-CoA thioesterase // 0.021588583881530007 # SSO:000017665__Heavy metal translocating P-type ATPase // 0.02121872598967995 # SSO:000002977__GMP reductase (EC 1.7.1.7) // 0.018981007932287266 # SSO:000000975__Ammonium transporter // 0.018815597002473018 # SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.018217194348634242 # SSO:000029193__Zinc transporter ZupT // 0.014835063243804878 # SSO:000004144__L-fucose isomerase (EC 5.3.1.25) // 0.014693077528457794 # SSO:000006028__Phosphopentomutase (EC 5.4.2.7) // 0.013707734151090462 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.013021240060049826 # SSO:000042612__aspartate kinase (EC 2.7.2.4) // 0.01250561756960735 # SSO:000010295__Acyltransferase family protein // 0.010580707930792611 # SSO:000010281__Acyl-acyl carrier protein thioesterase // 0.010151814752623094 # SSO:000036190__Glycosyltransferase family 1 // 0.009061428021477885 # SSO:000000982__Anaerobic glycerol-3-phosphate dehydrogenase subunit C (EC 1.1.5.3) // 0.008684348106592233
Random Forest,0.791667,0.789598,0.791667,0.790572,0.725854,TP=37 TN=153 FP=26FN=24,SSO:000030003__glucose-6-phosphate dehydrogenase // 0.006133574553050175 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.005612688258027594 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.005327394475497746 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.004930286838525893 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.004580244023745694 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.00417159365086671 # SSO:000000075__23-bisphosphoglycerate-independent phosphoglycerate mutase (EC 5.4.2.12) // 0.003970522032514525 # SSO:000034571__Aldose 1-epimerase family protein // 0.0038216117584873323 # SSO:000005727__Pantoate--beta-alanine ligase (EC 6.3.2.1) // 0.0037921719936829315 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.0036203647452339375 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.0034693582282248002 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.003292884438253521 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.0032383858795902594 # SSO:000006176__Porphobilinogen synthase (EC 4.2.1.24) // 0.003160835114171524 # SSO:000034025__2Fe-2S iron-sulfur cluster binding domain-containing protein // 0.0031486733428328154 # SSO:000028881__Uroporphyrinogen decarboxylase // 0.00295140129271116 # SSO:000021631__Phosphocarrier protein HPr // 0.0029472315667214556 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.002932977595324395 # SSO:000004958__MotA/TolQ/ExbB proton channel family protein // 0.0028997603672334307 # SSO:000006816__Pyridoxine 5'-phosphate synthase (EC 2.6.99.2) // 0.0028259714751235057
CatBoost,0.791667,0.793857,0.791667,0.792702,0.736077,TP=39 TN=151 FP=24FN=26,SSO:000030003__glucose-6-phosphate dehydrogenase // 0.5264207807487163 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.39572249658137104 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.3674496029744014 # SSO:000009045__bacteriocin immunity protein // 0.19925764146672287 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.17358603866951272 # SSO:000029128__YeeE/YedE family protein // 0.1706464692863292 # SSO:000034571__Aldose 1-epimerase family protein // 0.16512765533393328 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.15808796376339482 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.13774374993036764 # SSO:000029073__Xylose isomerase // 0.13312422633872695 # SSO:000009809__ABC transporter // 0.121534831421005 # SSO:000021641__Phosphoenolpyruvate-protein phosphotransferase // 0.1105408529051135 # SSO:000023917__Recombinase // 0.10693143261913986 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.10412946254079977 # SSO:000017481__GtrA family protein // 0.09450220350870094 # SSO:000003478__Homocysteine S-methyltransferase (EC 2.1.1.10) // 0.0911685390312136 # SSO:000005727__Pantoate--beta-alanine ligase (EC 6.3.2.1) // 0.08729480495964188 # SSO:000008287__Transketolase (EC 2.2.1.1) // 0.08448645542326833 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.08356151649375129 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.08311673175525437


Numer of genomes:148
Number of genomic features :3199
Shape of y:(148,)
Count of 1: 39
Count of 0: 109
Running Decision Tree for phenotype malonate--assimilation




Running Random Forest for phenotype malonate--assimilation




Running CatBoost for phenotype malonate--assimilation




######### Combined report for malonate--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.7,0.8,0.7,0.712,0.753968,TP=13 TN=8 FP=8FN=1,SSO:000008385__Triphosphoribosyl-dephospho-CoA synthase (EC 2.4.2.52) // 0.27589743580880793 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.1778609194775826 # SSO:000035123__Class A beta-lactamase (EC 3.5.2.6) // 0.07558582972064169 # SSO:000008954__Xaa-Pro aminopeptidase (EC 3.4.11.9) // 0.06871042898486686 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.04764595085733296 # SSO:000018237__ISNCY family transposase // 0.034965280834075164 # SSO:000012267__Clostripain // 0.03463514454893764 # SSO:000042599__protocatechuate 34-dioxygenase (EC 1.13.11.3) // 0.029382045031149107 # SSO:000003447__Histidinol-phosphatase (EC 3.1.3.15) // 0.027091079652055264 # SSO:000031351__peptidase S24 // 0.019405015339974702 # SSO:000008429__Twitching motility protein PilT // 0.006893032190008511 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.0 # SSO:000013192__DnaD domain protein // 0.0 # SSO:000029941__flagellin domain-containing protein // 0.0 # SSO:000031204__opacity associated protein A // 0.0 # SSO:000001489__Cell filamentation protein fic // 0.0 # SSO:000009606__3-carboxyethylcatechol 23-dioxygenase (EC 1.13.11.16) // 0.0 # SSO:000000376__4-hydroxyphenylacetate 3-monooxygenase (EC 1.14.14.9) // 0.0 # SSO:000002791__Flagellar assembly protein FliH // 0.0 # SSO:000029327__alpha-L-rhamnosidase // 0.0
Random Forest,0.766667,0.760227,0.766667,0.762517,0.706349,TP=18 TN=5 FP=3FN=4,SSO:000008385__Triphosphoribosyl-dephospho-CoA synthase (EC 2.4.2.52) // 0.009879721621387972 # SSO:000000159__2-hydroxy-3-oxopropionate reductase (EC 1.1.1.60) // 0.009519877139666117 # SSO:000035123__Class A beta-lactamase (EC 3.5.2.6) // 0.006377160355267171 # SSO:000018973__Lipopolysaccharide biosynthesis protein // 0.006369886316957338 # SSO:000010350__AhpC/TSA family protein // 0.005297949070053076 # SSO:000017510__HAD family hydrolase // 0.004482391638497111 # SSO:000018454__Inositol monophosphatase // 0.0041044549391818985 # SSO:000023737__PyrBI operon leader peptide // 0.0036249077904018548 # SSO:000009621__3-hydroxybutyrate dehydrogenase (EC 1.1.1.30) // 0.003541958588240491 # SSO:000042143__Lipopolysaccharide heptosyltransferase I // 0.003411484755010983 # SSO:000018447__Inorganic phosphate transporter // 0.0033137269326093703 # SSO:000012992__DUF350 domain-containing protein // 0.0032581765060023167 # SSO:000008131__Trans-aconitate 2-methyltransferase (EC 2.1.1.144) // 0.0032128636107663197 # SSO:000025473__Tail fiber protein // 0.003163686673812566 # SSO:000012629__Cytochrome c peroxidase (EC 1.11.1.5) // 0.0030330263439811117 # SSO:000005895__Phenylacetic acid degradation protein PaaY // 0.0030017461664996726 # SSO:000042174__acetylornithine transaminase // 0.002829843336306625 # SSO:000042815__indolepyruvate decarboxylase (EC 4.1.1.74) // 0.002740384979569529 # SSO:000024959__Sodium:solute symporter family protein // 0.0027131715917519394 # SSO:000029830__diguanylate phosphodiesterase // 0.0026827734886802758
CatBoost,0.8,0.8,0.8,0.8,0.761905,TP=18 TN=6 FP=3FN=3,SSO:000008385__Triphosphoribosyl-dephospho-CoA synthase (EC 2.4.2.52) // 1.1618116956525406 # SSO:000000159__2-hydroxy-3-oxopropionate reductase (EC 1.1.1.60) // 0.7208233711785673 # SSO:000018973__Lipopolysaccharide biosynthesis protein // 0.4849150258771607 # SSO:000035123__Class A beta-lactamase (EC 3.5.2.6) // 0.35147822052691413 # SSO:000004605__Lysophospholipase (EC 3.1.1.5) // 0.2764042660494936 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.18770417064255557 # SSO:000008378__Trimethylamine-N-oxide reductase TorA (EC 1.7.2.3) // 0.17156340397880465 # SSO:000018454__Inositol monophosphatase // 0.17038014003975296 # SSO:000004694__Malonyl-CoA decarboxylase (EC 4.1.1.9) // 0.16099733456289653 # SSO:000012147__Chemotaxis protein CheA (EC 2.7.3.-) // 0.14856233756602888 # SSO:000017510__HAD family hydrolase // 0.14459414460831752 # SSO:000020269__NA+/H+ ANTIPORTER NHAC // 0.1444672102516601 # SSO:000033163__sel1-like repeat protein // 0.130971614402556 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.11132131095884333 # SSO:000010350__AhpC/TSA family protein // 0.10436343087612912 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.10126668673577766 # SSO:000029232__Zn-finger protein // 0.10112312955715892 # SSO:000016871__Formate dehydrogenase( EC:1.2.1.2 ) // 0.09983507144468548 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.09728505183643235 # SSO:000042174__acetylornithine transaminase // 0.09177413780087082


Numer of genomes:84
Number of genomic features :2688
Shape of y:(84,)
Count of 1: 17
Count of 0: 67
Running Decision Tree for phenotype palatinose--builds_acid_from




Running Random Forest for phenotype palatinose--builds_acid_from




Running CatBoost for phenotype palatinose--builds_acid_from




######### Combined report for palatinose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.941176,0.945098,0.941176,0.936308,0.833333,TP=14 TN=2 FP=0FN=1,SSO:000023737__PyrBI operon leader peptide // 0.17237823510720826 # SSO:000005134__N-hydroxyarylamine O-acetyltransferase (EC 2.3.1.118) // 0.15592544710191766 # SSO:000005992__Phosphomannomutase (EC 5.4.2.8) // 0.10892141794489578 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.05572523881347408 # SSO:000002607__Excinuclease ABC subunit A // 0.04603329092170681 # SSO:000020876__Outer membrane lipoprotein Blc // 0.03265998707175177 # SSO:000043841__acyl-homoserine-lactone acylase (EC 3.5.1.97) // 0.016719929274753836 # SSO:000029941__flagellin domain-containing protein // 0.0 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0 # SSO:000018213__IS630 family transposase // 0.0 # SSO:000023999__RepA protein // 0.0 # SSO:000012882__DNA topoisomerase IV subunit B // 0.0 # SSO:000009632__3-methyladenine DNA glycosylase // 0.0 # SSO:000001489__Cell filamentation protein fic // 0.0 # SSO:000031204__opacity associated protein A // 0.0 # SSO:000021269__Patatin family protein // 0.0 # SSO:000009606__3-carboxyethylcatechol 23-dioxygenase (EC 1.13.11.16) // 0.0 # SSO:000002791__Flagellar assembly protein FliH // 0.0 # SSO:000029327__alpha-L-rhamnosidase // 0.0 # SSO:000002915__Formylmethanofuran dehydrogenase subunit B (EC 1.2.99.5) // 0.0
Random Forest,0.882353,0.882353,0.882353,0.882353,0.797619,TP=13 TN=2 FP=1FN=1,SSO:000019394__Mechanosensitive ion channel // 0.008220657099251724 # SSO:000016827__Flavocytochrome c // 0.007951499871575613 # SSO:000025121__Sterol desaturase family protein // 0.006712158279887397 # SSO:000005271__Na(+)-translocating NADH-quinone reductase subunit A (EC 1.6.5.-) // 0.0066158390364888355 # SSO:000037107__NADH:ubiquinone reductase (Na(+)-transporting) subunit F (EC 7.2.1.1) // 0.006495226995005201 # SSO:000011987__Catechol 12-dioxygenase (EC 1.13.11.1) // 0.006342573636253848 # SSO:000021578__Phenolic acid decarboxylase (EC 4.1.1.-) // 0.006298501254322318 # SSO:000044184__tRNAMet cytidine acetyltransferase (EC 2.3.1.193) // 0.006124507424199845 # SSO:000008385__Triphosphoribosyl-dephospho-CoA synthase (EC 2.4.2.52) // 0.006005144774937187 # SSO:000020559__Nitrite transporter NirC // 0.005798251199708693 # SSO:000012629__Cytochrome c peroxidase (EC 1.11.1.5) // 0.0056307567971237635 # SSO:000037105__NADH:ubiquinone reductase (Na(+)-transporting) subunit D (EC 7.2.1.1) // 0.00540221967787383 # SSO:000002278__Diaminobutyrate--2-oxoglutarate transaminase (EC 2.6.1.76) // 0.0051425765002038985 # SSO:000037103__NADH:ubiquinone reductase (Na(+)-transporting) subunit B (EC 7.2.1.1) // 0.0050527148348110925 # SSO:000005273__Na(+)-translocating NADH-quinone reductase subunit C (EC 1.6.5.-) // 0.0050484835209797865 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.004918977478734349 # SSO:000023737__PyrBI operon leader peptide // 0.004874722877765002 # SSO:000029597__class II aldolase/adducin family protein // 0.004866816630819411 # SSO:000005134__N-hydroxyarylamine O-acetyltransferase (EC 2.3.1.118) // 0.004487580666537575 # SSO:000038656__Primary-amine oxidase (EC 1.4.3.21) // 0.004257876188705855
CatBoost,0.941176,0.945098,0.941176,0.936308,0.833333,TP=14 TN=2 FP=0FN=1,SSO:000011987__Catechol 12-dioxygenase (EC 1.13.11.1) // 0.5964946599804548 # SSO:000012016__Cd(II)/Pb(II)-responsive transcriptional regulator // 0.5262072298114369 # SSO:000025121__Sterol desaturase family protein // 0.45750868663486355 # SSO:000029597__class II aldolase/adducin family protein // 0.4552925043886871 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.32566958038211996 # SSO:000016827__Flavocytochrome c // 0.2806476490726803 # SSO:000019394__Mechanosensitive ion channel // 0.25302945593069603 # SSO:000033054__radical SAM protein // 0.19041988490531825 # SSO:000020559__Nitrite transporter NirC // 0.15710177047361223 # SSO:000029953__formate/nitrite transporter family protein // 0.14308993937232875 # SSO:000001103__Aspartate/tyrosine/aromatic aminotransferase (EC 2.6.1.1 ) // 0.14083548029527201 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.14073566799285858 # SSO:000017756__HesA/MoeB/ThiF family protein // 0.11733131468666988 # SSO:000037103__NADH:ubiquinone reductase (Na(+)-transporting) subunit B (EC 7.2.1.1) // 0.10851249691777243 # SSO:000005273__Na(+)-translocating NADH-quinone reductase subunit C (EC 1.6.5.-) // 0.10287317806253794 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.10249591609174237 # SSO:000011997__Cation transporter // 0.1021677149175972 # SSO:000005919__Phosphate propanoyltransferase (EC 2.3.1.222) // 0.10200261561975088 # SSO:000044077__Magnesium transporter // 0.09860757467279925 # SSO:000009109__flavodoxin // 0.09258515300225453


Numer of genomes:801
Number of genomic features :4123
Shape of y:(801,)
Count of 1: 58
Count of 0: 743
Running Decision Tree for phenotype ribitol--builds_acid_from




Running Random Forest for phenotype ribitol--builds_acid_from




Running CatBoost for phenotype ribitol--builds_acid_from




######### Combined report for ribitol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.888199,0.944468,0.888199,0.914298,0.555128,TP=142 TN=1 FP=14FN=4,SSO:000003257__Glycerol-3-phosphate dehydrogenase (EC 1.1.5.3) // 0.214391710746391 # SSO:000025241__Sugar kinase // 0.054403528139952924 # SSO:000029128__YeeE/YedE family protein // 0.04828022652723752 # SSO:000023930__Regulator // 0.048273320993063445 # SSO:000009089__dTDP-4-dehydrorhamnose 35-epimerase (EC 5.1.3.13) // 0.03513442298709002 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.031317063987608675 # SSO:000001304__CDP-glucose 46-dehydratase (EC 4.2.1.45) // 0.029207860265097554 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.02312166342981265 # SSO:000000212__2-phosphosulfolactate phosphatase (EC 3.1.3.71 ) // 0.020927281665613282 # SSO:000024103__Rhamnogalacturonan acetylesterase // 0.020586515522688174 # SSO:000002934__Fructose-bisphosphate aldolase class I (EC 4.1.2.13) // 0.019504323643360997 # SSO:000011529__CDP-alcohol phosphatidyltransferase family protein // 0.019236181261235642 # SSO:000043641__N4-(beta-N-acetylglucosaminyl)-L-asparaginase (EC 3.5.1.26) // 0.01766352932872041 # SSO:000001017__Antitoxin HicB // 0.016881759667834985 # SSO:000022951__Protein-glutamate O-methyltransferase( EC:2.1.1.80 ) // 0.016383778099536464 # SSO:000013192__DnaD domain protein // 0.0146440685952991 # SSO:000010301__Adenine permease AdeP // 0.014287380823089077 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.011508258306205407 # SSO:000021299__Penicillin-binding protein 1A // 0.010626427869619667 # SSO:000013560__Exported protein // 0.0075307477967730225
Random Forest,0.956522,0.952022,0.956522,0.954176,0.590385,TP=153 TN=1 FP=3FN=4,SSO:000003257__Glycerol-3-phosphate dehydrogenase (EC 1.1.5.3) // 0.0063629272546184045 # SSO:000029307__aldolase // 0.00595545181364056 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.005925149186146834 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.005506238461120682 # SSO:000036573__LPS-assembly protein LptD // 0.004635333222452839 # SSO:000001479__Cell division topological specificity factor MinE // 0.004373640747174613 # SSO:000004426__Leucyl/phenylalanyl-tRNA--protein transferase (EC 2.3.2.6) // 0.004056478943575003 # SSO:000001877__Cytochrome O ubiquinol oxidase subunit I (EC 1.10.3.-) // 0.0038174935213617814 # SSO:000001022__Apolipoprotein N-acyltransferase (EC 2.3.1.-) // 0.003726889378924342 # SSO:000035873__FMNH2-dependent alkanesulfonate monooxygenase (EC 1.14.14.5) // 0.003670620735612006 # SSO:000036484__L-arabinose ABC transporter ATP-binding protein AraG // 0.003609342773173582 # SSO:000003212__Glutathione S-transferase family protein // 0.0035170776536156993 # SSO:000043098__myo-inosose-2 dehydratase (EC 4.2.1.44) // 0.0034914467731424884 # SSO:000037094__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit M // 0.003476760336030694 # SSO:000033999__2-oxo-4-hydroxy-4-carboxy-5-ureidoimidazoline decarboxylase (EC 4.1.1.97) // 0.0034666607168433953 # SSO:000008083__Tol-Pal system-associated acyl-CoA thioesterase // 0.0032190487615584855 # SSO:000020342__NADH:flavin oxidoreductase/NADH oxidase // 0.003218466749116804 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.003125954339259118 # SSO:000038753__Protein-methionine-sulfoxide reductase catalytic subunit MsrP // 0.003041464082082376 # SSO:000033834__xylulokinase // 0.002990445109039581
CatBoost,0.944099,0.949115,0.944099,0.94655,0.583974,TP=151 TN=1 FP=5FN=4,SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.5137805253275468 # SSO:000029307__aldolase // 0.44335001085957504 # SSO:000017550__HD-GYP domain containing protein // 0.278860031345238 # SSO:000000073__2345-tetrahydropyridine-26-dicarboxylate N-acetyltransferase (EC 2.3.1.89) // 0.26270516735585403 # SSO:000029128__YeeE/YedE family protein // 0.2379020289516895 # SSO:000011529__CDP-alcohol phosphatidyltransferase family protein // 0.22968955365884158 # SSO:000025479__Tartrate dehydrogenase (EC 1.1.1.93) // 0.22493771268066481 # SSO:000020342__NADH:flavin oxidoreductase/NADH oxidase // 0.21871244993129463 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.1882275875843726 # SSO:000013372__Endoglucanase // 0.18489529379039346 # SSO:000003125__Glucose-1-phosphate cytidylyltransferase (EC 2.7.7.33) // 0.1754020106890314 # SSO:000009087__dTDP-4-amino-46-dideoxygalactose transaminase (EC 2.6.1.59) // 0.16889697714897822 # SSO:000002315__Dihydropyrimidinase (EC 3.5.2.2) // 0.16812440461952918 # SSO:000037230__Nitronate monooxygenase (EC 1.13.12.16) // 0.16570989114790866 # SSO:000011545__CHAP domain containing protein // 0.12610275674626234 # SSO:000025241__Sugar kinase // 0.11780109537377881 # SSO:000006053__Phosphosulfolactate synthase (EC 4.4.1.19) // 0.11632322314031597 # SSO:000005522__Orotidine 5'-phosphate decarboxylase (EC 4.1.1.23) // 0.11089891338366849 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.10849677890002764 # SSO:000000381__4-hydroxyproline epimerase (EC 5.1.1.8) // 0.10691054625704147


Numer of genomes:1453
Number of genomic features :4680
Shape of y:(1453,)
Count of 1: 984
Count of 0: 469
Running Decision Tree for phenotype maltose--builds_acid_from




Running Random Forest for phenotype maltose--builds_acid_from




Running CatBoost for phenotype maltose--builds_acid_from




######### Combined report for maltose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.694158,0.701768,0.694158,0.697306,0.667526,TP=57 TN=145 FP=40FN=49,SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.1217001061589302 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.08513282189146812 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.08500593569674453 # SSO:000044113__UDP-N-acetylmuramoyl-L-alanyl-D-glutamate---L-lysine ligase (EC 6.3.2.7) // 0.04262920929198359 # SSO:000011964__Carboxypeptidase // 0.03493812145818072 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.02670803062658617 # SSO:000019052__Low molecular weight phosphotyrosine protein phosphatase // 0.01895797321286642 # SSO:000010324__Adhesin // 0.018527060453225025 # SSO:000007415__Septation ring formation regulator EzrA // 0.016556567068372018 # SSO:000029216__Zn-dependent hydrolase // 0.0164336371150141 # SSO:000018777__L-rhamnose isomerase // 0.016327151200487826 # SSO:000018206__IS3 family transposase // 0.015864350311674727 # SSO:000039882__Tyrosine recombinase XerS // 0.015225891822876614 # SSO:000017607__HTH-type transcriptional regulator malT // 0.013394778978273134 # SSO:000007082__Ribonuclease HIII (EC 3.1.26.4) // 0.013312651132514172 # SSO:000005972__Phosphoglycerate mutase (EC 5.4.2.1) // 0.013132211378231338 # SSO:000041594__Undecaprenyl-phosphate glucose phosphotransferase (EC 2.7.8.31) // 0.01277104666126644 # SSO:000003163__Glutamate-1-semialdehyde 21-aminomutase (EC 5.4.3.8) // 0.011006141153180325 # SSO:000001052__Argininosuccinate synthase (EC 6.3.4.5) // 0.010654950799878332 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.010192780295516474
Random Forest,0.766323,0.758889,0.766323,0.757648,0.708763,TP=52 TN=171 FP=45FN=23,SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.007789122488326931 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.007660074680154426 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.0068450184653665175 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.006051259130641411 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.005646798316437958 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.005178280840427928 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.004472802233740958 # SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.004345871867338057 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.0037281312937308053 # SSO:000007113__Ribose-phosphate pyrophosphokinase (EC 2.7.6.1) // 0.003593991739273404 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.003375161817128212 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.0033664990641618145 # SSO:000001193__Beta-phosphoglucomutase (EC 5.4.2.6) // 0.003275824549092333 # SSO:000007415__Septation ring formation regulator EzrA // 0.0032529620130918725 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.0031688012718933374 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.0031655011564086233 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.003075303463323108 # SSO:000043918__DNA-formamidopyrimidine glycosylase (EC 3.2.2.23) // 0.0030224426789073806 # SSO:000009045__bacteriocin immunity protein // 0.0029137509599763845 # SSO:000002977__GMP reductase (EC 1.7.1.7) // 0.0028898099599201066
CatBoost,0.769759,0.764644,0.769759,0.766013,0.726804,TP=58 TN=166 FP=39FN=28,SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.5908117259092471 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.5158121135959799 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.33409432146715395 # SSO:000001193__Beta-phosphoglucomutase (EC 5.4.2.6) // 0.29282418740454375 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.2228062894185145 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.19171179703726257 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.17901949026385977 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.1681148771954235 # SSO:000009045__bacteriocin immunity protein // 0.15112779336923873 # SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.1365680743131652 # SSO:000004378__Lactaldehyde reductase (EC 1.1.1.77) // 0.12444845847768063 # SSO:000016939__GAF domain-containing protein // 0.1222865122791998 # SSO:000009961__ADP-ribosylglycohydrolase family protein // 0.10906535593352258 # SSO:000012210__Chorismate mutase // 0.09802744985424837 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.09317552253838075 # SSO:000004043__Isochorismate synthase (EC 5.4.4.2) // 0.09234570004070985 # SSO:000024948__Sodium/proline symporter // 0.09072314579994159 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.0824082270734356 # SSO:000003478__Homocysteine S-methyltransferase (EC 2.1.1.10) // 0.08204904962568377 # SSO:000036105__Glutamate-5-semialdehyde dehydrogenase (EC 1.2.1.41) // 0.08038478045736126


Numer of genomes:1158
Number of genomic features :4485
Shape of y:(1158,)
Count of 1: 380
Count of 0: 778
Running Decision Tree for phenotype D-mannitol--builds_acid_from




Running Random Forest for phenotype D-mannitol--builds_acid_from




Running CatBoost for phenotype D-mannitol--builds_acid_from




######### Combined report for D-mannitol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.737069,0.741206,0.737069,0.738792,0.717889,TP=118 TN=53 FP=33FN=28,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.2649075135756276 # SSO:000042518__dCTP deaminase (EC 3.5.4.13) // 0.06486892379716619 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.04135557361422158 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.025737227969321482 # SSO:000008865__Urease accessory protein UreF // 0.018548263195068727 # SSO:000000980__Anaerobic glycerol-3-phosphate dehydrogenase subunit A (EC 1.1.5.3) // 0.017929027187105175 # SSO:000001541__Choline dehydrogenase (EC 1.1.99.1) // 0.017648092653692764 # SSO:000002985__GTP cyclohydrolase II (EC 3.5.4.25) // 0.017254903606425512 # SSO:000033632__transglutaminase family protein // 0.01657352087801455 # SSO:000000749__Acyl carrier protein // 0.016206127787092153 # SSO:000022669__Protease HtpX // 0.01493055680106571 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.012688663547709993 # SSO:000043984__thioredoxin-disulfide reductase (EC 1.8.1.9) // 0.012335000937178861 # SSO:000021315__PepSY-associated TM helix domain-containing protein // 0.00974915208726301 # SSO:000017940__Hydrogenase maturation protease // 0.009667434592236103 # SSO:000001517__Chloramphenicol acetyltransferase (EC 2.3.1.28) // 0.00928806146278712 # SSO:000025785__Transcriptional antiterminator // 0.009283726938869885 # SSO:000006040__Phosphoribosylanthranilate isomerase (EC 5.3.1.24) // 0.009260687364718618 # SSO:000006808__Pyridoxal kinase (EC 2.7.1.35) // 0.008730084442146043 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.008644686326712064
Random Forest,0.767241,0.770192,0.767241,0.768484,0.749653,TP=122 TN=56 FP=29FN=25,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.029911612865858295 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.005476954938128041 # SSO:000029108__YaiI/YqxD family protein // 0.004644069597668039 # SSO:000018889__LemA family protein // 0.0038942451576662426 # SSO:000000542__ATP phosphoribosyltransferase (EC 2.4.2.17) // 0.0038566955722429733 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.003815576945334522 # SSO:000043945__histidinol-phosphate transaminase (EC 2.6.1.9) // 0.0036243069208967723 # SSO:000003446__Histidinol dehydrogenase (EC 1.1.1.23) // 0.003575999920842908 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.003538225367829705 # SSO:000020822__Organic hydroperoxide resistance protein // 0.0034803852524114554 # SSO:000024550__Serine O-acetyltransferase // 0.00344977759947122 # SSO:000017597__HTH domain-containing protein // 0.003308632044651569 # SSO:000016592__FMN-dependent NADH-azoreductase (EC 1.7.1.6) // 0.0031908112343705893 # SSO:000006841__Pyruvate carboxylase (EC 6.4.1.1) // 0.0031854096544391987 # SSO:000043071__D-amino-acid transaminase (EC 2.6.1.21) // 0.003127597117819901 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.003057173924388048 # SSO:000000543__ATP phosphoribosyltransferase regulatory subunit (EC 2.4.2.17) // 0.002944975688763688 # SSO:000019235__MOSC domain containing protein // 0.002943999843400476 # SSO:000007779__Succinate dehydrogenase flavoprotein subunit (EC 1.3.99.1) // 0.0028833218170948387 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.0027387141863111954
CatBoost,0.849138,0.84744,0.849138,0.84744,0.824013,TP=137 TN=60 FP=14FN=21,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 3.0968049605089254 # SSO:000010559__Amidohydrolase family protein // 0.19900815566249422 # SSO:000012966__DUF1275 domain-containing protein // 0.12934052638315482 # SSO:000019235__MOSC domain containing protein // 0.10528067456845817 # SSO:000010090__ATPase // 0.10273909908044442 # SSO:000033847__zinc-binding dehydrogenase // 0.09690468452913392 # SSO:000002060__DNA (cytosine-5-)-methyltransferase (EC 2.1.1.37 ) // 0.09507544360196366 # SSO:000018763__L-iditol 2-dehydrogenase // 0.09463268450618323 # SSO:000021529__Phage portal protein // 0.08678323162714296 # SSO:000004554__Low-specificity L-threonine aldolase (EC 4.1.2.48) // 0.0824129836034546 # SSO:000010295__Acyltransferase family protein // 0.07965698184746346 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.0748381717287644 # SSO:000011127__Bifunctional DNA primase/polymerase // 0.07114025577938503 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.07037442991608521 # SSO:000010855__Asparaginase // 0.06797929243744351 # SSO:000001855__Cystathionine gamma-synthase (EC 2.5.1.48) // 0.06724663615233821 # SSO:000000440__67-dimethyl-8-ribityllumazine synthase (EC 2.5.1.78) // 0.06546923302052424 # SSO:000025785__Transcriptional antiterminator // 0.065356477978228 # SSO:000018889__LemA family protein // 0.06411696147310081 # SSO:000029525__cation diffusion facilitator family transporter // 0.06211106870582038


Numer of genomes:399
Number of genomic features :3395
Shape of y:(399,)
Count of 1: 32
Count of 0: 367
Running Decision Tree for phenotype Potassium_5-ketogluconate--builds_acid_from




Running Random Forest for phenotype Potassium_5-ketogluconate--builds_acid_from




Running CatBoost for phenotype Potassium_5-ketogluconate--builds_acid_from




######### Combined report for Potassium_5-ketogluconate--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.8875,0.944547,0.8875,0.907908,0.846667,TP=67 TN=4 FP=8FN=1,SSO:000017206__Gluconate 5-dehydrogenase (EC 1.1.1.69) // 0.1900609732581689 # SSO:000012210__Chorismate mutase // 0.11991570603482274 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.1032687838914332 # SSO:000017300__Glutaredoxin-like protein nrdH // 0.08925117797745773 # SSO:000029525__cation diffusion facilitator family transporter // 0.05401965977077452 # SSO:000042475__5-methyltetrahydropteroyltriglutamate---homocysteine S-methyltransferase (EC 2.1.1.14) // 0.03472659200139376 # SSO:000009028__Protein-PII uridylyltransferase (EC 2.7.7.59) // 0.030805461488371766 # SSO:000005620__Oxalyl-CoA decarboxylase (EC 4.1.1.8) // 0.024836786304362764 # SSO:000024142__Ribonuclease // 0.022291812711927414 # SSO:000018206__IS3 family transposase // 0.020359439761498697 # SSO:000017631__Haloacid dehalogenase type II (EC 3.8.1.2) // 0.00933073764121264 # SSO:000043116__Shikimate 5-dehydrogenase (EC 1.1.1.25) // 0.0084557925934816 # SSO:000006910__RNA polymerase sigma factor SigV // 0.0018379887072122077 # SSO:000031204__opacity associated protein A // 0.0 # SSO:000021611__Phosphate starvation-inducible protein PsiF // 0.0 # SSO:000021269__Patatin family protein // 0.0 # SSO:000004933__Molybdenum ABC transporter ATP-binding protein ModC // 0.0 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0 # SSO:000018213__IS630 family transposase // 0.0 # SSO:000023999__RepA protein // 0.0
Random Forest,0.925,0.909632,0.925,0.916118,0.586667,TP=73 TN=1 FP=2FN=4,SSO:000017206__Gluconate 5-dehydrogenase (EC 1.1.1.69) // 0.01314963443480691 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.006372588721004329 # SSO:000021890__Polysaccharide biosynthesis protein // 0.006361171489904565 # SSO:000000722__Acetylornithine deacetylase (EC 3.5.1.16) // 0.006085763254222464 # SSO:000029073__Xylose isomerase // 0.005707696759044116 # SSO:000005097__N-acetyl-gamma-glutamyl-phosphate reductase (EC 1.2.1.38) // 0.005353750010323061 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.005141953095638255 # SSO:000000720__Acetylglutamate kinase (EC 2.7.2.8) // 0.005052399311213892 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.0048163254327367256 # SSO:000007623__Spermidine N1-acetyltransferase (EC 2.3.1.57) // 0.004793587726673989 # SSO:000044131__UDP-4-amino-4-deoxy-L-arabinose aminotransferase (EC 2.6.1.87) // 0.0047287243434940155 # SSO:000007094__Ribonuclease PH (EC 2.7.7.56) // 0.004041081471555955 # SSO:000042174__acetylornithine transaminase // 0.004006220836689373 # SSO:000013528__Exodeoxyribonuclease X // 0.0038407781408310417 # SSO:000000306__3-isopropylmalate dehydratase large subunit (EC 4.2.1.33) // 0.0038362375682983083 # SSO:000000308__3-isopropylmalate dehydrogenase (EC 1.1.1.85) // 0.003561965630954281 # SSO:000043945__histidinol-phosphate transaminase (EC 2.6.1.9) // 0.0035195014155325654 # SSO:000001878__Cytochrome O ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.003492186256813483 # SSO:000004083__Ketol-acid reductoisomerase (EC 1.1.1.86) // 0.003404697880379779 # SSO:000006028__Phosphopentomutase (EC 5.4.2.7) // 0.0032212600221197805
CatBoost,0.925,0.909632,0.925,0.916118,0.586667,TP=73 TN=1 FP=2FN=4,SSO:000017206__Gluconate 5-dehydrogenase (EC 1.1.1.69) // 2.768680870417422 # SSO:000012210__Chorismate mutase // 0.4215392795907585 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.31340271324566804 # SSO:000029073__Xylose isomerase // 0.25517133696104005 # SSO:000013098__Dienelactone hydrolase family protein // 0.1819744326597221 # SSO:000000720__Acetylglutamate kinase (EC 2.7.2.8) // 0.1788474522519926 # SSO:000012746__DNA binding protein // 0.1742578960593403 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.1723157710550844 # SSO:000007623__Spermidine N1-acetyltransferase (EC 2.3.1.57) // 0.16352113482772943 # SSO:000009045__bacteriocin immunity protein // 0.15459895384654804 # SSO:000025747__Transcription antiterminator // 0.1477401603564047 # SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.1360379244697244 # SSO:000017597__HTH domain-containing protein // 0.12855985946716125 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.12362444095627496 # SSO:000009809__ABC transporter // 0.11287612974493157 # SSO:000022348__Prenyltransferase // 0.10965231735144497 # SSO:000010857__Asparagine synthase (glutamine-hydrolyzing) // 0.10949943448978972 # SSO:000010264__Acyl-CoA reductase (EC 1.2.1.50) // 0.10567255265997916 # SSO:000033631__transglutaminase domain-containing protein // 0.1027419199649526 # SSO:000020217__N-acetyltransferase family protein // 0.09438429440794785


Numer of genomes:84
Number of genomic features :2711
Shape of y:(84,)
Count of 1: 35
Count of 0: 49
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:686
Number of genomic features :3947
Shape of y:(686,)
Count of 1: 19
Count of 0: 667
Running Decision Tree for phenotype L-arabitol--builds_acid_from




Running Random Forest for phenotype L-arabitol--builds_acid_from




Running CatBoost for phenotype L-arabitol--builds_acid_from




######### Combined report for L-arabitol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.956522,0.970798,0.956522,0.963607,0.485294,TP=132 TN=0 FP=4FN=2,SSO:000001479__Cell division topological specificity factor MinE // 0.1380427600554532 # SSO:000029506__carbohydrate kinase // 0.11203782519902103 # SSO:000007384__Selenocysteine-specific translation elongation factor // 0.07978171642720318 # SSO:000021899__Polysaccharide deacetylase family protein // 0.061031621627438734 # SSO:000022956__Proteinase inhibitor I4 serpin // 0.05859302361173054 # SSO:000005035__Muramoyltetrapeptide carboxypeptidase (EC 3.4.17.13) // 0.05777584019275657 # SSO:000013134__Dihydroorotate dehydrogenase // 0.0492644208211624 # SSO:000025307__Superoxide dismutase // 0.030407160887729122 # SSO:000016732__Fibronectin type III domain-containing protein // 0.025917552524525118 # SSO:000020436__Na+/H+ antiporter family protein // 0.024124918233922342 # SSO:000043641__N4-(beta-N-acetylglucosaminyl)-L-asparaginase (EC 3.5.1.26) // 0.017949206605113125 # SSO:000000345__4'-phosphopantetheinyl transferase (EC 2.7.8.-) // 0.005421874991161536 # SSO:000020125__Murein DD-endopeptidase MepM // 0.0 # SSO:000021345__Peptidase M61 // 0.0 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0 # SSO:000023999__RepA protein // 0.0 # SSO:000018213__IS630 family transposase // 0.0 # SSO:000004933__Molybdenum ABC transporter ATP-binding protein ModC // 0.0 # SSO:000021269__Patatin family protein // 0.0 # SSO:000021611__Phosphate starvation-inducible protein PsiF // 0.0
Random Forest,0.985507,0.971225,0.985507,0.978314,0.5,TP=136 TN=0 FP=0FN=2,SSO:000001479__Cell division topological specificity factor MinE // 0.007540685836961763 # SSO:000029506__carbohydrate kinase // 0.00686693229487398 # SSO:000029307__aldolase // 0.005612189131146382 # SSO:000001022__Apolipoprotein N-acyltransferase (EC 2.3.1.-) // 0.004938008546201535 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.004904125938424481 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.004788433895499641 # SSO:000012812__DNA polymerase III subunit epsilon // 0.003990738969438459 # SSO:000036573__LPS-assembly protein LptD // 0.003703186149124016 # SSO:000025241__Sugar kinase // 0.003489766607589767 # SSO:000008954__Xaa-Pro aminopeptidase (EC 3.4.11.9) // 0.0033578237936610056 # SSO:000017481__GtrA family protein // 0.003222758085830435 # SSO:000044033__glutathione-disulfide reductase (EC 1.8.1.7) // 0.0031506794802054977 # SSO:000030144__hsp20/alpha crystallin family protein // 0.0031468455165003244 # SSO:000008604__UDP-3-O-3-hydroxymyristoyl glucosamine N-acyltransferase (EC 2.3.1.-) // 0.0031153165475351213 # SSO:000012759__DNA gyrase inhibitor YacG // 0.003032105007853093 # SSO:000007071__Ribonuclease D (EC 3.1.26.3) // 0.002833892712186519 # SSO:000005468__O-methyltransferase (EC 2.1.1.-) // 0.002824973626264375 # SSO:000009944__ACT domain-containing protein // 0.0028218706280430652 # SSO:000007937__Tetraacyldisaccharide 4'-kinase (EC 2.7.1.130) // 0.0028144324655784586 # SSO:000021665__Phosphohydrolase // 0.0027448094297662868
CatBoost,0.978261,0.97112,0.978261,0.974677,0.496324,TP=135 TN=0 FP=1FN=2,SSO:000029307__aldolase // 0.7947251623566591 # SSO:000001479__Cell division topological specificity factor MinE // 0.683364755245482 # SSO:000017481__GtrA family protein // 0.46453839912119116 # SSO:000016732__Fibronectin type III domain-containing protein // 0.292859470586041 # SSO:000005432__Nucleotidyltransferase (EC 2.7.7.-) // 0.2784241141260847 # SSO:000012812__DNA polymerase III subunit epsilon // 0.23196579451803886 # SSO:000019052__Low molecular weight phosphotyrosine protein phosphatase // 0.16891407316193005 # SSO:000010266__Acyl-CoA synthetase // 0.15949876111616354 # SSO:000010058__ATP-dependent helicase HrpB // 0.14448499660708258 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.13346477214056116 # SSO:000022956__Proteinase inhibitor I4 serpin // 0.12170052312723005 # SSO:000005468__O-methyltransferase (EC 2.1.1.-) // 0.11888820778153823 # SSO:000000315__3-methyl-2-oxobutanoate hydroxymethyltransferase (EC 2.1.2.11) // 0.1177690749583117 # SSO:000037082__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit A // 0.11696930767950067 # SSO:000007384__Selenocysteine-specific translation elongation factor // 0.11637010678527131 # SSO:000005954__Phosphoenolpyruvate carboxykinase GTP (EC 4.1.1.32) // 0.1160526698916437 # SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.10346363332366984 # SSO:000011135__Bile acid:sodium symporter // 0.10048000265796517 # SSO:000000381__4-hydroxyproline epimerase (EC 5.1.1.8) // 0.09731422295363422 # SSO:000029128__YeeE/YedE family protein // 0.09481079772392144


Numer of genomes:739
Number of genomic features :4222
Shape of y:(739,)
Count of 1: 271
Count of 0: 468
Running Decision Tree for phenotype lysine--degradation




Running Random Forest for phenotype lysine--degradation




Running CatBoost for phenotype lysine--degradation




######### Combined report for lysine--degradation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.722973,0.728721,0.722973,0.725023,0.714286,TP=69 TN=38 FP=23FN=18,SSO:000004108__L-2-hydroxyglutarate oxidase (EC 1.1.3.15) // 0.14140859712390766 # SSO:000035074__Cellulose biosynthesis protein BcsF // 0.09093033516191093 # SSO:000011127__Bifunctional DNA primase/polymerase // 0.06984570588742908 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.05446876999938833 # SSO:000021269__Patatin family protein // 0.044375372218454584 # SSO:000038785__Putative aminohydrolase SsnA // 0.04104496477929114 # SSO:000028997__WGR domain-containing protein // 0.03788741546225858 # SSO:000004053__Isocitrate lyase (EC 4.1.3.1) // 0.03501297922680269 # SSO:000006147__Polyketide synthase // 0.028378930716542284 # SSO:000030895__iron-sulfur cluster assembly accessory protein // 0.026704055137589545 # SSO:000025239__Sugar isomerase // 0.022384341933603103 # SSO:000013044__Dehydrogenase // 0.018194003052164554 # SSO:000010176__Acetyltransferase // 0.01722418530331119 # SSO:000000401__510-methylenetetrahydrofolate reductase (EC 1.5.1.20) // 0.016271194127700915 # SSO:000001155__Bacteriocin production protein // 0.016047340717436416 # SSO:000000327__3-oxoacyl-ACP synthase (EC 2.3.1.41) // 0.016041395594133014 # SSO:000011068__Phenylacetate CoA-ligase (EC 6.2.1.30) // 0.015518977775994305 # SSO:000009703__Nitroreductase // 0.015163557239486907 # SSO:000010570__Amino acid carrier protein // 0.014799643786823611 # SSO:000020105__Multidrug transporter // 0.014015315738587558
Random Forest,0.736486,0.763697,0.736486,0.740437,0.749612,TP=64 TN=45 FP=28FN=11,SSO:000004108__L-2-hydroxyglutarate oxidase (EC 1.1.3.15) // 0.006187499519478704 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.005728391242113105 # SSO:000043973__adenosylcobinamide-GDP ribazoletransferase (EC 2.7.8.26) // 0.005072969073530652 # SSO:000009524__2-aminoethylphosphonate ABC transporter substrate-binding protein // 0.005061248765716841 # SSO:000001640__Cobyric acid synthase (EC 6.3.5.10) // 0.004622463260889605 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.004247798311446268 # SSO:000005317__Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase (EC 2.4.2.21) // 0.00424515621601526 # SSO:000043554__adenosylcobalamin/alpha-ribazole phosphatase (EC 3.1.3.73) // 0.004068014678491066 # SSO:000043198__threonine-phosphate decarboxylase (EC 4.1.1.81) // 0.0032217549507032915 # SSO:000001864__Cysteine synthase (EC 2.5.1.47) // 0.003142001373508767 # SSO:000011127__Bifunctional DNA primase/polymerase // 0.0030688568235310324 # SSO:000033225__sigma-54-dependent transcriptional regulator // 0.002818983249970384 # SSO:000002577__Ethanolamine utilization protein EutJ // 0.002691609957211501 # SSO:000028997__WGR domain-containing protein // 0.0026895090110961115 # SSO:000005814__Peptidyl-tRNA hydrolase (EC 3.1.1.29) // 0.002631313718977876 # SSO:000034428__Acetoacetate decarboxylase family protein // 0.0023314126290997677 # SSO:000018210__IS5 family transposase // 0.002288337935559198 # SSO:000037089__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit H // 0.0021991263803275134 # SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 0.0021672256278508725 # SSO:000024558__Serine protease // 0.0021610839981609942
CatBoost,0.72973,0.746542,0.72973,0.733379,0.733696,TP=66 TN=42 FP=26FN=14,SSO:000009524__2-aminoethylphosphonate ABC transporter substrate-binding protein // 0.3467048987765434 # SSO:000004108__L-2-hydroxyglutarate oxidase (EC 1.1.3.15) // 0.3271298725610896 # SSO:000005432__Nucleotidyltransferase (EC 2.7.7.-) // 0.26694945435931067 # SSO:000043973__adenosylcobinamide-GDP ribazoletransferase (EC 2.7.8.26) // 0.2534386205764284 # SSO:000006147__Polyketide synthase // 0.19209371927494664 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.17697047091346177 # SSO:000035189__Conjugal transfer protein TraF // 0.17200186151219185 # SSO:000033225__sigma-54-dependent transcriptional regulator // 0.15962007611721774 # SSO:000025362__TM2 domain containing protein // 0.15901426917639 # SSO:000000363__4-hydroxy-2-oxovalerate aldolase (EC 4.1.3.39) // 0.1556199267767681 # SSO:000017510__HAD family hydrolase // 0.14557212390247373 # SSO:000028997__WGR domain-containing protein // 0.14323514842164328 # SSO:000013265__EAL domain containing protein // 0.13994796256059838 # SSO:000033493__thermostable hemolysin // 0.12419264777096405 # SSO:000033834__xylulokinase // 0.11945426886057203 # SSO:000000898__Allantoin permease // 0.1055412843557259 # SSO:000007803__Sulfate adenylyltransferase (EC 2.7.7.4) // 0.10267201502291143 # SSO:000001968__Cytosine permease // 0.10108528059509628 # SSO:000020220__N-acylneuraminate cytidylyltransferase (EC 2.7.7.43) // 0.09075820297789357 # SSO:000019607__Methylated-DNA-(protein)-cysteine S-methyltransferase // 0.09032965715609138


Numer of genomes:972
Number of genomic features :4466
Shape of y:(972,)
Count of 1: 263
Count of 0: 709
Running Decision Tree for phenotype ornithine--degradation




Running Random Forest for phenotype ornithine--degradation




Running CatBoost for phenotype ornithine--degradation




######### Combined report for ornithine--degradation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.74359,0.760692,0.74359,0.750191,0.702797,TP=113 TN=32 FP=30FN=20,SSO:000008066__Thymidine phosphorylase (EC 2.4.2.4) // 0.11153599717104226 # SSO:000005968__Phosphogluconate dehydratase (EC 4.2.1.12) // 0.10890474620351892 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.08378268943752355 # SSO:000039072__S-(hydroxymethyl)mycothiol dehydrogenase (EC 1.1.1.306) // 0.05753514841484173 # SSO:000009287__putrescine-ornithine antiporter // 0.04050840342219354 # SSO:000042733__carotenoid 12-hydratase (EC 4.2.1.131) // 0.02911964769450711 # SSO:000009598__34-dihydroxyphenylacetate 23-dioxygenase (EC 1.13.11.15) // 0.028546580428606687 # SSO:000003483__Homoserine O-acetyltransferase (EC 2.3.1.31) // 0.027058051235383674 # SSO:000003857__Indole-3-glycerol phosphate synthase (EC 4.1.1.48) // 0.02130086753625824 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.020490538633317058 # SSO:000009606__3-carboxyethylcatechol 23-dioxygenase (EC 1.13.11.16) // 0.019961910565764622 # SSO:000011533__CDP-glycerol:glycerophosphate glycerophosphotransferase // 0.018297929183887094 # SSO:000021576__Phe operon leader peptide // 0.017733154393466687 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.014402795521534674 # SSO:000010419__Alkane-1 monooxygenase (EC 1.14.15.3) // 0.01425384482169254 # SSO:000020220__N-acylneuraminate cytidylyltransferase (EC 2.7.7.43) // 0.013104205941651854 # SSO:000024157__Ribose 5-phosphate isomerase (EC 5.3.1.6) // 0.012782100830401428 # SSO:000041789__ribosomal protein S18-alanine N-acetyltransferase (EC 2.3.1.266) // 0.012306724330629085 # SSO:000011951__Carboxylate-amine ligase // 0.012113389389367941 # SSO:000003125__Glucose-1-phosphate cytidylyltransferase (EC 2.7.7.33) // 0.011796948575319927
Random Forest,0.815385,0.8486,0.815385,0.823333,0.825175,TP=115 TN=44 FP=28FN=8,SSO:000009287__putrescine-ornithine antiporter // 0.012136181123506062 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.006718814506847664 # SSO:000001855__Cystathionine gamma-synthase (EC 2.5.1.48) // 0.004328835267883195 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.004137701387038239 # SSO:000011127__Bifunctional DNA primase/polymerase // 0.00390418610284328 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.0035664312331885333 # SSO:000001640__Cobyric acid synthase (EC 6.3.5.10) // 0.003530470160036826 # SSO:000008066__Thymidine phosphorylase (EC 2.4.2.4) // 0.003403414389871596 # SSO:000005968__Phosphogluconate dehydratase (EC 4.2.1.12) // 0.0032052982720525017 # SSO:000001864__Cysteine synthase (EC 2.5.1.47) // 0.003204243105224744 # SSO:000043973__adenosylcobinamide-GDP ribazoletransferase (EC 2.7.8.26) // 0.00312080700624152 # SSO:000001853__Cystathionine beta-synthase (EC 4.2.1.22) // 0.0030724796931358763 # SSO:000003012__Galactose-1-phosphate uridylyltransferase (EC 2.7.7.10) // 0.002964387198031962 # SSO:000043812__N-acetyl-1-D-myo-inositol-2-amino-2-deoxy-alpha-D-glucopyranoside deacetylase (EC 3.5.1.103) // 0.0029114710812163094 # SSO:000033686__trehalose-phosphatase // 0.0026628409978719137 # SSO:000004155__L-lactate dehydrogenase (EC 1.1.1.27) // 0.002649504824482239 # SSO:000043620__mycothiol synthase (EC 2.3.1.189) // 0.0025792012635605845 # SSO:000000685__Acetolactate synthase large subunit (EC 2.2.1.6) // 0.0025576444654093547 # SSO:000009261__proteasome subunit alpha (EC 3.4.25.1) // 0.0025362272305051 # SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 0.0025350610954078165
CatBoost,0.815385,0.832824,0.815385,0.820891,0.800699,TP=119 TN=40 FP=24FN=12,SSO:000009287__putrescine-ornithine antiporter // 1.7494152746405855 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.23228243653742717 # SSO:000006147__Polyketide synthase // 0.17740724483320744 # SSO:000011920__CarD family transcriptional regulator // 0.15714622029912836 # SSO:000025362__TM2 domain containing protein // 0.1282599733578223 # SSO:000010642__Anaerobic C4-dicarboxylate transporter DcuC // 0.12807513701769505 # SSO:000007071__Ribonuclease D (EC 3.1.26.3) // 0.12627221676926426 # SSO:000010266__Acyl-CoA synthetase // 0.11450638371164941 # SSO:000001864__Cysteine synthase (EC 2.5.1.47) // 0.10949649983802212 # SSO:000005968__Phosphogluconate dehydratase (EC 4.2.1.12) // 0.10682592480928474 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.10627548269791537 # SSO:000035277__Cytochrome c oxidase subunit 4 (EC 1.9.3.1) // 0.10472167248131692 # SSO:000000295__3-hydroxybutyryl-CoA dehydrogenase (EC 1.1.1.157) // 0.10254305308333429 # SSO:000010054__ATP-dependent helicase // 0.09870037926935339 # SSO:000020518__NifU family protein // 0.09652231698731996 # SSO:000018206__IS3 family transposase // 0.09132945330456958 # SSO:000043973__adenosylcobinamide-GDP ribazoletransferase (EC 2.7.8.26) // 0.08748566834772852 # SSO:000030137__histone deacetylase // 0.08726815545668822 # SSO:000024558__Serine protease // 0.08329636369044907 # SSO:000018207__IS30 family transposase // 0.08310267166726115


Numer of genomes:946
Number of genomic features :4059
Shape of y:(946,)
Count of 1: 469
Count of 0: 477
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:1197
Number of genomic features :4396
Shape of y:(1197,)
Count of 1: 479
Count of 0: 718
Running Decision Tree for phenotype lactose--builds_acid_from




Running Random Forest for phenotype lactose--builds_acid_from




Running CatBoost for phenotype lactose--builds_acid_from




######### Combined report for lactose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.658333,0.661471,0.658333,0.659493,0.653608,TP=95 TN=63 FP=44FN=38,SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.14776151891149086 # SSO:000013628__FAD:protein FMN transferase (EC 2.7.1.180) // 0.06818508241009794 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.05281293039102903 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.045755803061044174 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.0413294131938624 # SSO:000042518__dCTP deaminase (EC 3.5.4.13) // 0.03398164894325126 # SSO:000012845__DNA repair exonuclease // 0.0307691558809657 # SSO:000018207__IS30 family transposase // 0.029527408465987927 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.029505454339017047 # SSO:000001558__Chorismate synthase (EC 4.2.3.5) // 0.0161018585686303 # SSO:000012150__Chemotaxis protein CheX // 0.015979438511146492 # SSO:000044269__UDP-N-acetylmuramoyl-L-alanyl-D-glutamate---26-diaminopimelate ligase (EC 6.3.2.13) // 0.01587364902681798 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.013184863775604013 # SSO:000000870__Aldose 1-epimerase (EC 5.1.3.3) // 0.012512781850128158 # SSO:000024474__Sensor histidine kinase // 0.011428298106550905 # SSO:000037086__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit E // 0.011260643937401588 # SSO:000033834__xylulokinase // 0.011227512567621084 # SSO:000022669__Protease HtpX // 0.011093753677277442 # SSO:000007517__Signal peptidase I (EC 3.4.21.89) // 0.010912077330397478 # SSO:000038747__Protein-ADP-ribose hydrolase // 0.010720983327498877
Random Forest,0.745833,0.745506,0.745833,0.745657,0.738621,TP=109 TN=70 FP=30FN=31,SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.010261846350153574 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.009343187718065001 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.008960711826087686 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.008609810743780609 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.00550281365636401 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.0053073166623973924 # SSO:000013628__FAD:protein FMN transferase (EC 2.7.1.180) // 0.004726424439014341 # SSO:000029073__Xylose isomerase // 0.004422270483220978 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.0036393838271465443 # SSO:000025471__Tagatose-bisphosphate aldolase (EC 4.1.2.40) // 0.0032661210132586185 # SSO:000033209__sialate O-acetylesterase // 0.0030824698347465475 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.003032963324132775 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.002894822209370526 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.0028480438284914367 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.002838279152228553 # SSO:000009944__ACT domain-containing protein // 0.0027275245414128987 # SSO:000017381__Glycosyl hydrolase // 0.0025756685694229373 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.0025400227167687167 # SSO:000016591__FMN-binding protein // 0.002455637133105965 # SSO:000025220__Sucrose phosphorylase (EC 2.4.1.7) // 0.002374114094070896
CatBoost,0.791667,0.790862,0.791667,0.79104,0.783603,TP=116 TN=74 FP=23FN=27,SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.741058372781735 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.4687348727701891 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.41383444792622975 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.35379131399274516 # SSO:000013628__FAD:protein FMN transferase (EC 2.7.1.180) // 0.25090791565932435 # SSO:000009334__tRNA (cytidine(34)-2'-O)-methyltransferase (EC 2.1.1.207) // 0.24466284023534057 # SSO:000029073__Xylose isomerase // 0.18526341727501838 # SSO:000028963__Voltage-gated chloride channel family protein // 0.16811436542669075 # SSO:000024558__Serine protease // 0.12648041314146835 # SSO:000018641__Isochorismatase family protein // 0.12590431724305173 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.12360131744812765 # SSO:000006853__Pyruvate oxidase (EC 1.2.3.3) // 0.11392296191925401 # SSO:000012787__DNA methyltransferase // 0.10724332216802498 # SSO:000009464__13-beta-galactosyl-N-acetylhexosamine phosphorylase (EC 2.4.1.211) // 0.10644809310763458 # SSO:000002129__DNA-3-methyladenine glycosylase (EC 3.2.2.20) // 0.10394880697975917 # SSO:000008287__Transketolase (EC 2.2.1.1) // 0.10020668356249236 # SSO:000005120__N-acetyltransferase // 0.09688319714097789 # SSO:000029525__cation diffusion facilitator family transporter // 0.09290067254147087 # SSO:000018210__IS5 family transposase // 0.09197543528651334 # SSO:000016827__Flavocytochrome c // 0.08680133771554345


Numer of genomes:270
Number of genomic features :2493
Shape of y:(270,)
Count of 1: 40
Count of 0: 230
Running Decision Tree for phenotype alpha-cyclodextrin--builds_acid_from




Running Random Forest for phenotype alpha-cyclodextrin--builds_acid_from




Running CatBoost for phenotype alpha-cyclodextrin--builds_acid_from




######### Combined report for alpha-cyclodextrin--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.814815,0.852734,0.814815,0.829125,0.736413,TP=39 TN=5 FP=7FN=3,SSO:000025721__Toxic anion resistance protein // 0.19570424828071933 # SSO:000010533__Amidase // 0.18119973659057742 # SSO:000010823__Arsenate reductase family protein // 0.06987421148309368 # SSO:000029128__YeeE/YedE family protein // 0.03458271298628223 # SSO:000043918__DNA-formamidopyrimidine glycosylase (EC 3.2.2.23) // 0.03200182431750153 # SSO:000002822__Flagellar protein FlaG // 0.02351414777405553 # SSO:000013593__Extracellular solute-binding protein // 0.018251560362008513 # SSO:000033829__winged helix family transcriptional regulator // 0.015618407985556255 # SSO:000005317__Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase (EC 2.4.2.21) // 0.012516092234269293 # SSO:000020637__Nuclease // 0.011459102947389152 # SSO:000010324__Adhesin // 0.011098664709998703 # SSO:000001260__Branched-chain amino acid aminotransferase (EC 2.6.1.42) // 0.008903114531574313 # SSO:000029584__chromosome partitioning protein ParB // 0.0074201964196925424 # SSO:000021521__Phage major tail protein // 0.0058084492415736225 # SSO:000039723__beta-ketoacyl-acyl-carrier-protein synthase II (EC 2.3.1.179) // 0.004201847752818245 # SSO:000011964__Carboxypeptidase // 0.002228008552531204 # SSO:000012938__DNA-directed DNA polymerase // 0.002150880096844246 # SSO:000004933__Molybdenum ABC transporter ATP-binding protein ModC // 0.0 # SSO:000009632__3-methyladenine DNA glycosylase // 0.0 # SSO:000018820__LacX protein // 0.0
Random Forest,0.925926,0.936195,0.925926,0.929218,0.904891,TP=43 TN=7 FP=3FN=1,SSO:000025721__Toxic anion resistance protein // 0.014089741052857182 # SSO:000010533__Amidase // 0.01356234443704152 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.012387748973310393 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.011628461529299495 # SSO:000006841__Pyruvate carboxylase (EC 6.4.1.1) // 0.010829675290499031 # SSO:000016827__Flavocytochrome c // 0.010390099287654466 # SSO:000010266__Acyl-CoA synthetase // 0.009250385294267693 # SSO:000001193__Beta-phosphoglucomutase (EC 5.4.2.6) // 0.00872067803120789 # SSO:000033225__sigma-54-dependent transcriptional regulator // 0.008086033400904137 # SSO:000009089__dTDP-4-dehydrorhamnose 35-epimerase (EC 5.1.3.13) // 0.006265366646600389 # SSO:000029108__YaiI/YqxD family protein // 0.00550759085517711 # SSO:000043918__DNA-formamidopyrimidine glycosylase (EC 3.2.2.23) // 0.005366934633168991 # SSO:000000219__2-succinyl-6-hydroxy-24-cyclohexadiene-1-carboxylate synthase (EC 4.2.99.20) // 0.005287141764924835 # SSO:000002702__Ferredoxin (EC 1.18.1.3 ) // 0.0050500214270256605 # SSO:000021467__Peroxiredoxin // 0.004990801267436872 # SSO:000029506__carbohydrate kinase // 0.004920095451499024 # SSO:000029953__formate/nitrite transporter family protein // 0.004875461623128514 # SSO:000043940__dihydrolipoyllysine-residue acetyltransferase (EC 2.3.1.12) // 0.004843335918666722 # SSO:000025538__Tetracycline resistance MFS efflux pump // 0.004599110288029968 # SSO:000035652__Peptide-methionine (S)-S-oxide reductase MsrA (EC 1.8.4.11) // 0.004597639797869258
CatBoost,0.907407,0.912757,0.907407,0.909622,0.842391,TP=43 TN=6 FP=3FN=2,SSO:000010533__Amidase // 0.5396676700868192 # SSO:000034380__ABC-F type ribosomal protection protein // 0.47470008807244257 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.4725472882450585 # SSO:000010266__Acyl-CoA synthetase // 0.4674514828719574 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.4007330101518577 # SSO:000025538__Tetracycline resistance MFS efflux pump // 0.3432928293599118 # SSO:000010090__ATPase // 0.2983323218770652 # SSO:000029769__cyclic nucleotide-binding domain-containing protein // 0.28061547148112814 # SSO:000033225__sigma-54-dependent transcriptional regulator // 0.23676563631004843 # SSO:000013593__Extracellular solute-binding protein // 0.22867242513327424 # SSO:000016827__Flavocytochrome c // 0.21727108525842379 # SSO:000001193__Beta-phosphoglucomutase (EC 5.4.2.6) // 0.21345308558942294 # SSO:000018641__Isochorismatase family protein // 0.21169570177543012 # SSO:000025721__Toxic anion resistance protein // 0.19914588742933553 # SSO:000021467__Peroxiredoxin // 0.19636509689680468 # SSO:000008848__Uracil permease // 0.1918835908863119 # SSO:000024271__S-layer protein // 0.1655177267175938 # SSO:000021338__Peptidase M23 // 0.16220159729857542 # SSO:000010889__Autolysin (EC 3.5.1.28) // 0.14309783038223842 # SSO:000000219__2-succinyl-6-hydroxy-24-cyclohexadiene-1-carboxylate synthase (EC 4.2.99.20) // 0.13868236968202965


Numer of genomes:389
Number of genomic features :2949
Shape of y:(389,)
Count of 1: 112
Count of 0: 277
Running Decision Tree for phenotype hippurate--hydrolysis




Running Random Forest for phenotype hippurate--hydrolysis




Running CatBoost for phenotype hippurate--hydrolysis




######### Combined report for hippurate--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.74359,0.764391,0.74359,0.749634,0.737358,TP=40 TN=18 FP=13FN=7,SSO:000010553__Amidohydrolase // 0.15178136965644462 # SSO:000012182__Chloride channel protein // 0.10223537343543715 # SSO:000021616__Phosphate-binding protein // 0.09316558117271614 # SSO:000037530__Undecaprenyldiphospho-muramoylpentapeptide beta-N-acetylglucosaminyltransferase (EC 2.4.1.227) // 0.08556471387906385 # SSO:000025747__Transcription antiterminator // 0.0716351830257605 # SSO:000010242__Acyl transferase // 0.02431325132195963 # SSO:000030106__heme-binding protein // 0.021480515656289213 # SSO:000001460__Cell division inhibitor // 0.019993390134918267 # SSO:000006560__Pseudouridine 5'-phosphate glycosidase (EC 4.2.1.70) // 0.019963936187985126 # SSO:000033963__Peptide-methionine (R)-S-oxide reductase MsrB (EC 1.8.4.12) // 0.017251767023486613 # SSO:000023742__Pyridoxal phosphate-dependent aminotransferase // 0.016933394056753138 # SSO:000007576__Sirohydrochlorin cobaltochelatase (EC 4.99.1.3) // 0.016345368929063984 # SSO:000021523__Phage major tail tube protein // 0.015140496392566307 # SSO:000018491__Integrase // 0.01499126306328035 # SSO:000000405__5-(carboxyamino)imidazole ribonucleotide mutase (EC 5.4.99.18) // 0.013481851989346323 # SSO:000029834__dihydrodipicolinate reductase // 0.013186794596364518 # SSO:000033520__topology modulation protein // 0.01207201947374947 # SSO:000043984__thioredoxin-disulfide reductase (EC 1.8.1.9) // 0.012050855036295225 # SSO:000003095__Glucarate dehydratase (EC 4.2.1.40) // 0.011692946048910299 # SSO:000023860__RNA-directed DNA polymerase // 0.011574281032688408
Random Forest,0.794872,0.813984,0.794872,0.799707,0.796226,TP=42 TN=20 FP=11FN=5,SSO:000010553__Amidohydrolase // 0.009785919205284032 # SSO:000020331__NADH-dependent flavin oxidoreductase // 0.008892959819580997 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.00750259930925429 # SSO:000001185__Beta-glucuronidase (EC 3.2.1.31) // 0.006665658158363919 # SSO:000010953__BCCT family transporter // 0.005447105830044393 # SSO:000003247__Glycerol dehydrogenase (EC 1.1.1.6) // 0.005086673262091134 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.005053999184917111 # SSO:000020210__N-acetylneuraminate synthase // 0.0050430274194956754 # SSO:000020957__Oxidoreductase // 0.0047175931596164615 # SSO:000016736__Fibronectin/fibrinogen-binding protein // 0.004538471419931313 # SSO:000010533__Amidase // 0.004468025439721962 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.004354313655185918 # SSO:000002097__DNA recombination protein RmuC // 0.00397315165500573 # SSO:000003485__Homoserine dehydrogenase (EC 1.1.1.3) // 0.0038872319559007185 # SSO:000022348__Prenyltransferase // 0.0037085219105375366 # SSO:000003212__Glutathione S-transferase family protein // 0.0036818184248118865 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.0035470969644072845 # SSO:000007796__Sucrose-6-phosphate hydrolase (EC 3.2.1.26) // 0.0035416693894095887 # SSO:000035652__Peptide-methionine (S)-S-oxide reductase MsrA (EC 1.8.4.11) // 0.003433659600101733 # SSO:000000584__ATP-dependent DNA helicase recQ (EC 3.6.1.- ) // 0.0033628637317897545
CatBoost,0.782051,0.80595,0.782051,0.787729,0.786792,TP=41 TN=20 FP=12FN=5,SSO:000010553__Amidohydrolase // 0.6170660223852124 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.39343530033949575 # SSO:000020957__Oxidoreductase // 0.36526686406252573 # SSO:000020331__NADH-dependent flavin oxidoreductase // 0.33708315664123284 # SSO:000017805__Histidine kinase // 0.32608134017056994 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.30801096551172324 # SSO:000029834__dihydrodipicolinate reductase // 0.23956857921087116 # SSO:000001185__Beta-glucuronidase (EC 3.2.1.31) // 0.22591095180484497 # SSO:000020835__OsmC family protein // 0.22254230426777932 # SSO:000010953__BCCT family transporter // 0.19650882448895915 # SSO:000016736__Fibronectin/fibrinogen-binding protein // 0.1911786048953462 # SSO:000003447__Histidinol-phosphatase (EC 3.1.3.15) // 0.19083828262829544 # SSO:000021467__Peroxiredoxin // 0.18955667936605114 # SSO:000003486__Homoserine kinase (EC 2.7.1.39) // 0.18501792289709945 # SSO:000006843__Pyruvate carboxylase subunit B (EC 6.4.1.1) // 0.16960379871569656 # SSO:000035652__Peptide-methionine (S)-S-oxide reductase MsrA (EC 1.8.4.11) // 0.14894036173765413 # SSO:000022969__Protoporphyrinogen oxidase // 0.1384959014038093 # SSO:000043998__3-deoxy-7-phosphoheptulonate synthase (EC 2.5.1.54) // 0.13046753288279894 # SSO:000012182__Chloride channel protein // 0.13045024257874774 # SSO:000018491__Integrase // 0.12358532642596766


Numer of genomes:919
Number of genomic features :3986
Shape of y:(919,)
Count of 1: 169
Count of 0: 750
Running Decision Tree for phenotype glycogen--builds_acid_from




Running Random Forest for phenotype glycogen--builds_acid_from




Running CatBoost for phenotype glycogen--builds_acid_from




######### Combined report for glycogen--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.73913,0.810753,0.73913,0.763523,0.694079,TP=116 TN=20 FP=36FN=12,SSO:000005994__Phosphomevalonate kinase (EC 2.7.4.2) // 0.19613263007178403 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.10696946441108411 # SSO:000002270__Diacylglycerol kinase (EC 2.7.1.107) // 0.04546025219418815 # SSO:000004710__Mannose-1-phosphate guanylyltransferase (EC 2.7.7.13) // 0.03996523951907911 # SSO:000002280__Diaminopimelate decarboxylase (EC 4.1.1.20) // 0.039390594414562614 # SSO:000035189__Conjugal transfer protein TraF // 0.03224718870352778 # SSO:000012290__CoA-binding protein // 0.030905387589066295 # SSO:000021723__Phosphotransferase // 0.030266626721993876 # SSO:000024194__Rieske (2Fe-2S) domain-containing protein // 0.02253016459504453 # SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.01990653632155431 # SSO:000017356__Glycogen synthase (EC 2.4.1.11) // 0.01791742030866168 # SSO:000013194__DnaJ domain-containing protein // 0.01387445226230068 # SSO:000021669__Phospholipase // 0.013526452988257684 # SSO:000002612__Excinuclease ABC subunit B // 0.012366078359310712 # SSO:000016736__Fibronectin/fibrinogen-binding protein // 0.012111647234948926 # SSO:000025517__Tellurium resistance protein terC // 0.011777267254805218 # SSO:000006213__Precorrin-6A synthase (deacetylating) (EC 2.1.1.152) // 0.011435131611305691 # SSO:000010477__Alpha-L-Rha alpha-13-L-rhamnosyltransferase (EC 2.4.1.-) // 0.010403358061838562 # SSO:000020394__NTPase // 0.009857375151134566 # SSO:000035873__FMNH2-dependent alkanesulfonate monooxygenase (EC 1.14.14.5) // 0.008708830742885743
Random Forest,0.831522,0.837909,0.831522,0.834454,0.725329,TP=135 TN=18 FP=17FN=14,SSO:000005994__Phosphomevalonate kinase (EC 2.7.4.2) // 0.008156234949632333 # SSO:000004907__Mevalonate kinase (EC 2.7.1.36) // 0.007384606992095407 # SSO:000004128__L-aspartate oxidase (EC 1.4.3.16) // 0.007092299633233205 # SSO:000000045__1-deoxy-D-xylulose 5-phosphate reductoisomerase (EC 1.1.1.267) // 0.0065776293259810084 # SSO:000003528__Hydroxymethylglutaryl-CoA synthase (EC 2.3.3.10) // 0.005623084575908864 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.0055929229227425 # SSO:000000315__3-methyl-2-oxobutanoate hydroxymethyltransferase (EC 2.1.2.11) // 0.005076132797370987 # SSO:000002350__Diphosphomevalonate decarboxylase (EC 4.1.1.33) // 0.0050313984706366055 # SSO:000001091__Aspartate 1-decarboxylase (EC 4.1.1.11) // 0.004593661392624475 # SSO:000004710__Mannose-1-phosphate guanylyltransferase (EC 2.7.7.13) // 0.004320480296349204 # SSO:000029148__YibE/F family protein // 0.00419990474250911 # SSO:000000364__4-hydroxy-3-methylbut-2-enyl diphosphate reductase (EC 1.17.1.2) // 0.00417433556671509 # SSO:000001419__Carbonic anhydrase (EC 4.2.1.1) // 0.004170094326208657 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.004092877925176501 # SSO:000003103__Gluconate permease // 0.003927268007233627 # SSO:000006853__Pyruvate oxidase (EC 1.2.3.3) // 0.00384508420661918 # SSO:000013154__Dipeptidase // 0.003806321643759694 # SSO:000021723__Phosphotransferase // 0.0037975430472093645 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.003789827880115899 # SSO:000010090__ATPase // 0.0037722677671259515
CatBoost,0.836957,0.845313,0.836957,0.840665,0.740954,TP=135 TN=19 FP=17FN=13,SSO:000005994__Phosphomevalonate kinase (EC 2.7.4.2) // 0.516620434908972 # SSO:000004710__Mannose-1-phosphate guanylyltransferase (EC 2.7.7.13) // 0.3816575426215928 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.31198189010194094 # SSO:000003103__Gluconate permease // 0.2922848090484999 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.23795061960656627 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.21786583586864125 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.2057732917592809 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.20252435738944094 # SSO:000004128__L-aspartate oxidase (EC 1.4.3.16) // 0.19980978939682104 # SSO:000001091__Aspartate 1-decarboxylase (EC 4.1.1.11) // 0.18380287144798865 # SSO:000002148__DNA-cytosine methyltransferase (EC 2.1.1.37) // 0.17426769174003678 # SSO:000035189__Conjugal transfer protein TraF // 0.13859569149734538 # SSO:000012146__Chemotaxis protein // 0.13553709695313024 # SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.13447463339220572 # SSO:000013144__DinB family protein // 0.13169786420554713 # SSO:000021723__Phosphotransferase // 0.13034868795776436 # SSO:000009661__4-alpha-glucanotransferase // 0.12849977167385077 # SSO:000010090__ATPase // 0.12672462560196895 # SSO:000029148__YibE/F family protein // 0.11788209968388175 # SSO:000012290__CoA-binding protein // 0.11737824528937917


Numer of genomes:290
Number of genomic features :2516
Shape of y:(290,)
Count of 1: 70
Count of 0: 220
Running Decision Tree for phenotype pullulan--builds_acid_from




Running Random Forest for phenotype pullulan--builds_acid_from




Running CatBoost for phenotype pullulan--builds_acid_from




######### Combined report for pullulan--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.931034,0.927277,0.931034,0.926785,0.8025,TP=49 TN=5 FP=1FN=3,SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.23540563582097646 # SSO:000012207__Choloylglycine hydrolase family protein // 0.0888649725551544 # SSO:000017411__Glycosyl transferase family 8 // 0.050993778302732724 # SSO:000012951__DNA/RNA non-specific endonuclease // 0.03144720728258883 # SSO:000002311__Dihydroorotate dehydrogenase (quinone) (EC 1.3.5.2) // 0.02276830376637796 # SSO:000024584__Serine/threonine protein kinase // 0.02211590039739052 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.021898118942623352 # SSO:000036119__Glutamine synthetase type III (EC 6.3.1.2) // 0.0213352685537988 # SSO:000002129__DNA-3-methyladenine glycosylase (EC 3.2.2.20) // 0.01678393945163278 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.016563474970395693 # SSO:000018207__IS30 family transposase // 0.012660973517494898 # SSO:000033137__rubrerythrin family protein // 0.011718639685259652 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.00885373696194558 # SSO:000039268__Succinate dehydrogenase (quinone) flavoprotein subunit (EC 1.3.5.1) // 0.005660715847578992 # SSO:000018727__L-2-amino-thiazoline-4-carboxylic acid hydrolase (EC 3.5.2.-) // 0.00564614035890497 # SSO:000026255__Two component system sensor histidine kinase CiaH (EC 2.7.3.-) // 0.005609044838809118 # SSO:000003470__Holo-acyl-carrier protein synthase (EC 2.7.8.7) // 0.005558308609472535 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.002917281250580985 # SSO:000026007__Transglutaminase-like protein // 0.0018681281146344234 # SSO:000001489__Cell filamentation protein fic // 0.0
Random Forest,0.844828,0.852999,0.844828,0.848608,0.7,TP=45 TN=4 FP=5FN=4,SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.0162170279903264 # SSO:000037530__Undecaprenyldiphospho-muramoylpentapeptide beta-N-acetylglucosaminyltransferase (EC 2.4.1.227) // 0.011009514539430238 # SSO:000042259__nucleoside-triphosphate diphosphatase (EC 3.6.1.19) // 0.010949108182033393 # SSO:000026421__Tyrosine recombinase XerC // 0.010924947962852791 # SSO:000008622__UDP-N-acetylglucosamine--N-acetylmuramyl-(pentapeptide) pyrophosphoryl-undecaprenol N-acetylglucosamine transferase (EC 2.4.1.227) // 0.010709915047051611 # SSO:000001098__Aspartate--ammonia ligase (EC 6.3.1.1) // 0.009624128967538037 # SSO:000005972__Phosphoglycerate mutase (EC 5.4.2.1) // 0.008411590133476516 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.008265875187284416 # SSO:000000618__ATP-dependent nuclease subunit B // 0.008193986526860523 # SSO:000009057__biotin--acetyl-CoA-carboxylase ligase (EC 6.3.4.15 ) // 0.007783072322261846 # SSO:000002612__Excinuclease ABC subunit B // 0.007294971164972697 # SSO:000018657__Isoprenylcysteine carboxyl methyltransferase family protein // 0.007175849276471093 # SSO:000006841__Pyruvate carboxylase (EC 6.4.1.1) // 0.007017414650207907 # SSO:000002097__DNA recombination protein RmuC // 0.006883614883557213 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.006563667626440448 # SSO:000012884__DNA topology modulation protein // 0.00605624720635934 # SSO:000005955__Phosphoenolpyruvate carboxylase (EC 4.1.1.31) // 0.005917604129111739 # SSO:000043945__histidinol-phosphate transaminase (EC 2.6.1.9) // 0.005813961245210752 # SSO:000012375__Competence protein ComGF // 0.0056763192731643895 # SSO:000000542__ATP phosphoribosyltransferase (EC 2.4.2.17) // 0.005558229051502146
CatBoost,0.896552,0.88771,0.896552,0.890177,0.73,TP=48 TN=4 FP=2FN=4,SSO:000018921__Pullulanase (EC 3.2.1.41) // 1.4682247394254486 # SSO:000003212__Glutathione S-transferase family protein // 0.5053877150177457 # SSO:000006048__Phosphoribulokinase (EC 2.7.1.19) // 0.4878847692279922 # SSO:000018657__Isoprenylcysteine carboxyl methyltransferase family protein // 0.45446626213336466 # SSO:000012375__Competence protein ComGF // 0.26811719200338946 # SSO:000003247__Glycerol dehydrogenase (EC 1.1.1.6) // 0.24691315639008393 # SSO:000001098__Aspartate--ammonia ligase (EC 6.3.1.1) // 0.24336473811261347 # SSO:000010724__Antirestriction protein ArdA // 0.238179954198414 # SSO:000009590__3'-5' exonuclease // 0.2320372030907317 # SSO:000006782__Putrescine carbamoyltransferase (EC 2.1.3.6) // 0.23007125087524627 # SSO:000042259__nucleoside-triphosphate diphosphatase (EC 3.6.1.19) // 0.2250397144124441 # SSO:000005972__Phosphoglycerate mutase (EC 5.4.2.1) // 0.21065958457942782 # SSO:000012884__DNA topology modulation protein // 0.20289895964174876 # SSO:000002988__GTP pyrophosphokinase (EC 2.7.6.5) // 0.17472459997976145 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.16333017977650202 # SSO:000024584__Serine/threonine protein kinase // 0.14890410851829697 # SSO:000002612__Excinuclease ABC subunit B // 0.14879421087660918 # SSO:000002129__DNA-3-methyladenine glycosylase (EC 3.2.2.20) // 0.13326989590063507 # SSO:000011933__Carbohydrate-binding domain containing protein // 0.11800144571333933 # SSO:000018210__IS5 family transposase // 0.11707993193474321


Numer of genomes:1047
Number of genomic features :4109
Shape of y:(1047,)
Count of 1: 144
Count of 0: 903
Running Decision Tree for phenotype melezitose--builds_acid_from




Running Random Forest for phenotype melezitose--builds_acid_from




Running CatBoost for phenotype melezitose--builds_acid_from




######### Combined report for melezitose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.8,0.805478,0.8,0.802667,0.605556,TP=158 TN=10 FP=22FN=20,SSO:000012972__DUF1801 domain-containing protein // 0.111920031690834 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.08053260724673882 # SSO:000012004__Cation:proton antiporter // 0.0452494349163733 # SSO:000002805__Flagellar biosynthesis protein FliQ // 0.044983989906594045 # SSO:000005809__Peptidyl-prolyl cis-trans isomerase (EC 5.2.1.8) // 0.03522351251718968 # SSO:000001558__Chorismate synthase (EC 4.2.3.5) // 0.03008421987108974 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.030035209052953517 # SSO:000004189__LOG family protein // 0.023981514514273772 # SSO:000001077__Arylsulfatase (EC 3.1.6.1) // 0.021091145004791638 # SSO:000017734__Hemolysin III // 0.0196449327017513 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.018718334097456067 # SSO:000012375__Competence protein ComGF // 0.01696263535118191 # SSO:000024332__SH3 domain-containing protein // 0.015090849368822587 # SSO:000025517__Tellurium resistance protein terC // 0.013585192389051766 # SSO:000002607__Excinuclease ABC subunit A // 0.011826317514920942 # SSO:000033847__zinc-binding dehydrogenase // 0.011798399003530203 # SSO:000000080__23-dihydro-23-dihydroxybenzoate dehydrogenase (EC 1.3.1.28) // 0.01153411708593377 # SSO:000001378__CTP synthase (EC 6.3.4.2) // 0.011523884114928454 # SSO:000023761__Pyrimidine-specific ribonucleoside hydrolase RihA // 0.011254846348542886 # SSO:000044330__glutamate formimidoyltransferase (EC 2.1.2.5) // 0.0109468396444003
Random Forest,0.857143,0.83137,0.857143,0.838078,0.611111,TP=172 TN=8 FP=8FN=22,SSO:000012972__DUF1801 domain-containing protein // 0.008913998312905338 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.006070388719977574 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.005368018457311489 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.0052730585301965635 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.004923045704864584 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.0046506828196907135 # SSO:000002281__Diaminopimelate epimerase (EC 5.1.1.7) // 0.004421000565362473 # SSO:000007094__Ribonuclease PH (EC 2.7.7.56) // 0.004187591012300536 # SSO:000008176__Transcription termination factor Rho // 0.004116365959166806 # SSO:000012290__CoA-binding protein // 0.004085442089968629 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.004035183471086789 # SSO:000000826__Adenylylsulfate kinase (EC 2.7.1.25) // 0.003773060404546284 # SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.003754295931254693 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.0036780622214562264 # SSO:000025241__Sugar kinase // 0.0036204535040432508 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.0033198388147995827 # SSO:000021723__Phosphotransferase // 0.0032074758110580276 # SSO:000033999__2-oxo-4-hydroxy-4-carboxy-5-ureidoimidazoline decarboxylase (EC 4.1.1.97) // 0.0030284253849822786 # SSO:000009661__4-alpha-glucanotransferase // 0.0030025645482910095 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.0028463211191569514
CatBoost,0.828571,0.81394,0.828571,0.820453,0.608333,TP=165 TN=9 FP=15FN=21,SSO:000012972__DUF1801 domain-containing protein // 0.43896057321291515 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.43233975359795485 # SSO:000020231__N-formylglutamate amidohydrolase // 0.3146518455097132 # SSO:000005809__Peptidyl-prolyl cis-trans isomerase (EC 5.2.1.8) // 0.31200471233874605 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.25926980461416366 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.24302681372709856 # SSO:000017367__Glycosidase // 0.2185209722518882 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.21643449954270588 # SSO:000016921__Fumarylacetoacetate hydrolase family protein // 0.21632350862633998 # SSO:000023930__Regulator // 0.18391247258379909 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.16318636975098735 # SSO:000036869__Methyltransferase // 0.15261141417509438 # SSO:000004449__Lipase (EC 3.1.1.3) // 0.134292268270339 # SSO:000021037__PAS domain-containing protein // 0.13424356166651477 # SSO:000012004__Cation:proton antiporter // 0.12211786288827445 # SSO:000030058__glycosyl transferase family 8 protein // 0.12180404110779633 # SSO:000029834__dihydrodipicolinate reductase // 0.12119738338808739 # SSO:000009661__4-alpha-glucanotransferase // 0.11895329874956102 # SSO:000042933__glycerophosphodiester phosphodiesterase (EC 3.1.4.46) // 0.11054900986525634 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.10772326645787779


Numer of genomes:284
Number of genomic features :2637
Shape of y:(284,)
Count of 1: 134
Count of 0: 150
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:885
Number of genomic features :3932
Shape of y:(885,)
Count of 1: 136
Count of 0: 749
Running Decision Tree for phenotype D-tagatose--builds_acid_from




Running Random Forest for phenotype D-tagatose--builds_acid_from




Running CatBoost for phenotype D-tagatose--builds_acid_from




######### Combined report for D-tagatose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.80226,0.786465,0.80226,0.792876,0.633207,TP=130 TN=12 FP=14FN=21,SSO:000044077__Magnesium transporter // 0.1285017444589277 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.06681814585761513 # SSO:000009334__tRNA (cytidine(34)-2'-O)-methyltransferase (EC 2.1.1.207) // 0.06127905333344379 # SSO:000007178__Rod shape-determining protein MreC // 0.04427720452396088 # SSO:000012473__Cytochrome c // 0.042632952069643446 # SSO:000000826__Adenylylsulfate kinase (EC 2.7.1.25) // 0.03969711562803339 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.03134899551999154 # SSO:000029838__dihydrolipoyl dehydrogenase // 0.029514773166917793 # SSO:000000176__2-keto-4-pentenoate hydratase (EC 4.2.1.80) // 0.029387256334699647 # SSO:000011127__Bifunctional DNA primase/polymerase // 0.028700492892026877 # SSO:000013144__DinB family protein // 0.028418819826818785 # SSO:000020511__Nicotinamide mononucleotide transporter // 0.021679508566736956 # SSO:000013151__Dioxygenase // 0.021591574076171054 # SSO:000002802__Flagellar biosynthesis protein FlhF // 0.018657198458067518 # SSO:000000975__Ammonium transporter // 0.016616833650857483 # SSO:000029784__cytochrome c oxidase subunit III // 0.016312693259100675 # SSO:000006834__Pyrroline-5-carboxylate reductase (EC 1.5.1.2) // 0.016172513344604574 # SSO:000011951__Carboxylate-amine ligase // 0.013967780847616906 # SSO:000007698__Stage III sporulation protein AB // 0.013124625967865344 # SSO:000030043__glycoside hydrolase family 28 // 0.012272225146317832
Random Forest,0.858757,0.845876,0.858757,0.841482,0.679609,TP=139 TN=13 FP=5FN=20,SSO:000044077__Magnesium transporter // 0.00706254764093084 # SSO:000043641__N4-(beta-N-acetylglucosaminyl)-L-asparaginase (EC 3.5.1.26) // 0.006776737292318337 # SSO:000025471__Tagatose-bisphosphate aldolase (EC 4.1.2.40) // 0.006694878275835393 # SSO:000010272__Acyl-CoA thioesterase // 0.005424030815455411 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.005366951402416237 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.0047397921102624 # SSO:000009334__tRNA (cytidine(34)-2'-O)-methyltransferase (EC 2.1.1.207) // 0.00471136494375816 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.0044456851964164035 # SSO:000019509__Metal-dependent hydrolase // 0.004407746157548572 # SSO:000006782__Putrescine carbamoyltransferase (EC 2.1.3.6) // 0.004246610998557832 # SSO:000007416__Septum formation protein Maf // 0.004215367650128937 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.004098056071132845 # SSO:000012290__CoA-binding protein // 0.004046156541310179 # SSO:000020985__Oxygen-independent coproporphyrinogen III oxidase // 0.004032220673282864 # SSO:000029108__YaiI/YqxD family protein // 0.00391403746931495 # SSO:000026225__Tryptophan-rich sensory protein // 0.003508854959704381 # SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.0032497217231077058 # SSO:000018613__Iron-containing alcohol dehydrogenase // 0.003178200057521238 # SSO:000008902__V-type ATP synthase subunit K (EC 3.6.3.14) // 0.0031584477096181056 # SSO:000010559__Amidohydrolase family protein // 0.0030041505626480828
CatBoost,0.881356,0.873647,0.881356,0.871674,0.740215,TP=139 TN=17 FP=5FN=16,SSO:000025471__Tagatose-bisphosphate aldolase (EC 4.1.2.40) // 0.6773965908807702 # SSO:000043641__N4-(beta-N-acetylglucosaminyl)-L-asparaginase (EC 3.5.1.26) // 0.4863147589342313 # SSO:000018613__Iron-containing alcohol dehydrogenase // 0.35447289804641774 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.2896426790253424 # SSO:000020342__NADH:flavin oxidoreductase/NADH oxidase // 0.24802736883088872 # SSO:000044077__Magnesium transporter // 0.236251751267322 # SSO:000000176__2-keto-4-pentenoate hydratase (EC 4.2.1.80) // 0.2235460911522993 # SSO:000018921__Pullulanase (EC 3.2.1.41) // 0.20931076105490592 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.19881048150272068 # SSO:000006782__Putrescine carbamoyltransferase (EC 2.1.3.6) // 0.18208687352860312 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.17788723066030346 # SSO:000019153__Lysozyme // 0.17039008493184046 # SSO:000012973__DUF1850 domain-containing protein // 0.15383240815728874 # SSO:000010272__Acyl-CoA thioesterase // 0.14867635333645382 # SSO:000006855__Pyruvatephosphate dikinase (EC 2.7.9.1) // 0.13837556253275127 # SSO:000007416__Septum formation protein Maf // 0.1382600086213592 # SSO:000013618__FAD-binding protein // 0.13730898817464737 # SSO:000025277__Sulfotransferase // 0.1278304638068753 # SSO:000022945__Protein-L-isoaspartate(D-aspartate) O-methyltransferase( EC:2.1.1.77 ) // 0.11738413748563187 # SSO:000013473__Esterase // 0.11439609965331052


Numer of genomes:1359
Number of genomic features :4725
Shape of y:(1359,)
Count of 1: 792
Count of 0: 567
Running Decision Tree for phenotype malate--assimilation




Running Random Forest for phenotype malate--assimilation




Running CatBoost for phenotype malate--assimilation




######### Combined report for malate--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.75,0.758237,0.75,0.752137,0.749589,TP=80 TN=124 FP=27FN=41,SSO:000042813__amino-acid N-acetyltransferase (EC 2.3.1.1) // 0.18349257099379873 # SSO:000033044__pyrimidine 5'-nucleotidase // 0.08349675565428188 # SSO:000003240__Glycerate kinase (EC 2.7.1.31) // 0.05746040122676675 # SSO:000000380__4-hydroxyphenylpyruvate dioxygenase (EC 1.13.11.27) // 0.04066568623870409 # SSO:000000115__2-C-methyl-D-erythritol 24-cyclodiphosphate synthase (EC 4.6.1.12) // 0.03686907324756931 # SSO:000012584__Cytochrome C biogenesis protein // 0.02489853972834325 # SSO:000002338__Dimethylsulfoniopropionate demethylase (EC 2.1.1.269) // 0.023144527830320446 # SSO:000006398__Propionate catabolism operon regulatory protein PrpR // 0.02086930117139704 # SSO:000021918__Porin // 0.019102015584949533 # SSO:000025628__Thioredoxin family protein // 0.018617769974413396 # SSO:000005791__Peptide chain release factor 3 (EC 4.1.1.68 ) // 0.01772551121595784 # SSO:000005132__N-formylglutamate deformylase (EC 3.5.1.68) // 0.016795433049455473 # SSO:000024288__S1/P1 nuclease // 0.015363346415685071 # SSO:000030144__hsp20/alpha crystallin family protein // 0.013730586107433445 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.012285694411426327 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.011977572912500989 # SSO:000043998__3-deoxy-7-phosphoheptulonate synthase (EC 2.5.1.54) // 0.011955892355408906 # SSO:000012216__Chromate transporter // 0.011010857407847323 # SSO:000001604__CoA-disulfide reductase (EC 1.8.1.14) // 0.01081240945511834 # SSO:000017006__GPW/gp25 family protein // 0.010526236369975909
Random Forest,0.772059,0.783331,0.772059,0.774326,0.775984,TP=85 TN=125 FP=22FN=40,SSO:000042813__amino-acid N-acetyltransferase (EC 2.3.1.1) // 0.006256441113532751 # SSO:000008463__Two-component system response regulator OmpR // 0.005373532651670427 # SSO:000002003__D-amino acid dehydrogenase (EC 1.4.99.1) // 0.004521801564219486 # SSO:000037110__NADPH dependent aldo-keto reductase // 0.004073531903026762 # SSO:000003445__Histidine utilization repressor // 0.0038702225361480574 # SSO:000012016__Cd(II)/Pb(II)-responsive transcriptional regulator // 0.00383316652398142 # SSO:000002791__Flagellar assembly protein FliH // 0.0037973030327865948 # SSO:000043392__chorismate lyase (EC 4.1.3.40) // 0.0037475610599100057 # SSO:000011524__CDP-6-deoxy-delta-34-glucoseen reductase // 0.0037417880696530605 # SSO:000021084__PHP domain-containing protein // 0.0035876087352099723 # SSO:000036741__Malate dehydrogenase (quinone) (EC 1.1.5.4) // 0.0035495176602708214 # SSO:000042832__succinyldiaminopimelate transaminase (EC 2.6.1.17) // 0.003415823232977598 # SSO:000035324__D-alanyl-D-alanine endopeptidase // 0.003306992352552492 # SSO:000009637__3-oxoadipate enol-lactonase // 0.0032560987294109243 # SSO:000003160__Glutamate--cysteine ligase (EC 6.3.2.2) // 0.003229465085909475 # SSO:000004390__Lactoylglutathione lyase (EC 4.4.1.5) // 0.003197126975925858 # SSO:000001968__Cytosine permease // 0.0031826989530406614 # SSO:000001479__Cell division topological specificity factor MinE // 0.0031458070454514126 # SSO:000002935__Fructose-bisphosphate aldolase class II (EC 4.1.2.13) // 0.0030134924164279173 # SSO:000002797__Flagellar basal-body rod protein FlgF // 0.002916536237824372
CatBoost,0.775735,0.786102,0.775735,0.777897,0.779014,TP=85 TN=126 FP=22FN=39,SSO:000036741__Malate dehydrogenase (quinone) (EC 1.1.5.4) // 0.3145167693275298 # SSO:000042813__amino-acid N-acetyltransferase (EC 2.3.1.1) // 0.231643077716221 # SSO:000018641__Isochorismatase family protein // 0.22808159475716633 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.21479932346166652 # SSO:000012584__Cytochrome C biogenesis protein // 0.2083981503059712 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.20584445660357728 # SSO:000044216__diaminobutyrate acetyltransferase (EC 2.3.1.178) // 0.17365242643750695 # SSO:000024633__Siderophore-interacting protein // 0.15319062291935678 # SSO:000012973__DUF1850 domain-containing protein // 0.13654150614489907 # SSO:000011987__Catechol 12-dioxygenase (EC 1.13.11.1) // 0.1296134693574781 # SSO:000004128__L-aspartate oxidase (EC 1.4.3.16) // 0.1256618628363672 # SSO:000003248__Glycerol kinase (EC 2.7.1.30) // 0.12447594068109234 # SSO:000018283__IclR-family transcriptional regulator // 0.12394755720660103 # SSO:000010141__AbrB family transcriptional regulator // 0.10322322296248307 # SSO:000003445__Histidine utilization repressor // 0.09518355823057856 # SSO:000020656__Nucleoside diphosphate kinase regulator // 0.09509699883148623 # SSO:000029128__YeeE/YedE family protein // 0.09498862445069535 # SSO:000002791__Flagellar assembly protein FliH // 0.09365932890155076 # SSO:000012811__DNA polymerase III subunit delta' // 0.0909656858306518 # SSO:000029094__YHS domain protein // 0.08942654439712627


Numer of genomes:1247
Number of genomic features :4422
Shape of y:(1247,)
Count of 1: 240
Count of 0: 1007
Running Decision Tree for phenotype adipate--assimilation




Running Random Forest for phenotype adipate--assimilation




Running CatBoost for phenotype adipate--assimilation




######### Combined report for adipate--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.816,0.823169,0.816,0.819064,0.759878,TP=166 TN=38 FP=26FN=20,SSO:000043056__arylformamidase (EC 3.5.1.9) // 0.10926652476741308 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.08755272519303654 # SSO:000029945__flavodoxin family protein // 0.06444575345914778 # SSO:000000272__3-carboxy-ciscis-muconate cycloisomerase (EC 5.5.1.2) // 0.05202464228354462 # SSO:000003097__Glucokinase (EC 2.7.1.2) // 0.04065834009450151 # SSO:000021761__Phytase // 0.03839072738650805 # SSO:000008131__Trans-aconitate 2-methyltransferase (EC 2.1.1.144) // 0.030223681687617222 # SSO:000036741__Malate dehydrogenase (quinone) (EC 1.1.5.4) // 0.029092058357364142 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.027532449138660764 # SSO:000038753__Protein-methionine-sulfoxide reductase catalytic subunit MsrP // 0.025531885140457827 # SSO:000013622__FAD-dependent monooxygenase // 0.025414360179915517 # SSO:000012503__Cu(I)-responsive transcriptional regulator // 0.021293757794011943 # SSO:000001479__Cell division topological specificity factor MinE // 0.01912217357180878 # SSO:000009637__3-oxoadipate enol-lactonase // 0.0162377673802008 # SSO:000031065__methionine synthase // 0.015173881871167968 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.014822486464842338 # SSO:000043198__threonine-phosphate decarboxylase (EC 4.1.1.81) // 0.01455035927196388 # SSO:000008042__Thioredoxin (EC 1.8.1.8) // 0.012116176143170893 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.01191114897027476 # SSO:000010823__Arsenate reductase family protein // 0.01171588424100557
Random Forest,0.872,0.87315,0.872,0.859069,0.748204,TP=188 TN=30 FP=4FN=28,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.005583791923507036 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.005451839192118749 # SSO:000042942__threo-3-hydroxy-L-aspartate ammonia-lyase (EC 4.3.1.16) // 0.00512397259344994 # SSO:000043056__arylformamidase (EC 3.5.1.9) // 0.00494622985283511 # SSO:000012016__Cd(II)/Pb(II)-responsive transcriptional regulator // 0.0045389926244501656 # SSO:000016918__Fumarylacetoacetase (EC 3.7.1.2) // 0.004388375010473642 # SSO:000043848__aspartate dehydrogenase (EC 1.4.1.21) // 0.004305744142427305 # SSO:000042832__succinyldiaminopimelate transaminase (EC 2.6.1.17) // 0.00423241274774712 # SSO:000000028__12-phenylacetyl-CoA epoxidase subunit B (EC 1.14.13.149) // 0.003952641053549976 # SSO:000025479__Tartrate dehydrogenase (EC 1.1.1.93) // 0.003938398547750206 # SSO:000025615__Thiolase // 0.003901610073925345 # SSO:000010806__Arginine/lysine/ornithine decarboxylase // 0.0038775404518657164 # SSO:000019560__Methionine biosynthesis protein MetW // 0.0038504036234503817 # SSO:000039426__Taurine ABC transporter substrate-binding protein // 0.0038440160883711646 # SSO:000021678__Phospholipase D family protein // 0.0036078020255829054 # SSO:000000027__12-phenylacetyl-CoA epoxidase subunit A (EC 1.14.13.149) // 0.003551740841347947 # SSO:000003484__Homoserine O-succinyltransferase (EC 2.3.1.46) // 0.0035386857809762947 # SSO:000029597__class II aldolase/adducin family protein // 0.003516056410231186 # SSO:000002935__Fructose-bisphosphate aldolase class II (EC 4.1.2.13) // 0.0034772320957799545 # SSO:000024194__Rieske (2Fe-2S) domain-containing protein // 0.0032625386075280944
CatBoost,0.844,0.834866,0.844,0.833913,0.729975,TP=181 TN=30 FP=11FN=28,SSO:000025615__Thiolase // 0.47133329095163506 # SSO:000021642__Phosphoesterase // 0.356535501623902 # SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.3114132329376957 # SSO:000010472__Alpha-2-macroglobulin // 0.266196949781535 # SSO:000000028__12-phenylacetyl-CoA epoxidase subunit B (EC 1.14.13.149) // 0.23921545496632673 # SSO:000010419__Alkane-1 monooxygenase (EC 1.14.15.3) // 0.22716082880821073 # SSO:000029597__class II aldolase/adducin family protein // 0.21561319932858194 # SSO:000002043__D-serine ammonia-lyase (EC 4.3.1.18) // 0.21459136130651973 # SSO:000013622__FAD-dependent monooxygenase // 0.17141966000649295 # SSO:000030039__glycoside hydrolase family protein // 0.16542851543683212 # SSO:000029945__flavodoxin family protein // 0.15794728584677228 # SSO:000029073__Xylose isomerase // 0.15555658274837372 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.15471789681141346 # SSO:000043427__L-serine ammonia-lyase (EC 4.3.1.17) // 0.1335881758162894 # SSO:000043848__aspartate dehydrogenase (EC 1.4.1.21) // 0.13219609100215515 # SSO:000033631__transglutaminase domain-containing protein // 0.126098577517953 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.1220195019977126 # SSO:000011856__Calcium binding protein // 0.12151681839237531 # SSO:000012992__DUF350 domain-containing protein // 0.1171691913809783 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.11559967175713148


Numer of genomes:1250
Number of genomic features :4542
Shape of y:(1250,)
Count of 1: 354
Count of 0: 896
Running Decision Tree for phenotype decanoate--assimilation




Running Random Forest for phenotype decanoate--assimilation




Running CatBoost for phenotype decanoate--assimilation




######### Combined report for decanoate--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.832,0.829394,0.832,0.830452,0.782937,TP=161 TN=47 FP=19FN=23,SSO:000008463__Two-component system response regulator OmpR // 0.18763100460547377 # SSO:000013151__Dioxygenase // 0.07217563470308133 # SSO:000000195__2-methylcitrate synthase (EC 2.3.3.5) // 0.06507242050743427 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.06108355333995091 # SSO:000005475__O-succinylhomoserine sulfhydrylase (EC 2.5.1.48) // 0.036411525570971805 # SSO:000012151__Chemotaxis protein CheY // 0.032360259436741715 # SSO:000025706__TonB-dependent copper receptor // 0.017950031465698183 # SSO:000009126__glutamine amidotransferase (EC 4.1.3.27 ) // 0.01611925272926196 # SSO:000002043__D-serine ammonia-lyase (EC 4.3.1.18) // 0.014141506884429457 # SSO:000012155__Chemotaxis protein motC // 0.012262655077543637 # SSO:000041594__Undecaprenyl-phosphate glucose phosphotransferase (EC 2.7.8.31) // 0.01134783077955567 # SSO:000012386__Coniferyl aldehyde dehydrogenase (EC 1.2.1.68) // 0.010257295265944993 # SSO:000007884__TDP-N-acetylfucosamine:lipid II N-acetylfucosaminyltransferase (EC 2.4.1.325) // 0.009770201715118566 # SSO:000033521__toprim domain-containing protein // 0.009380410940098479 # SSO:000019307__Maltodextrin glucosidase (EC 3.2.1.20) // 0.008999942300376968 # SSO:000009559__2-phospho-L-lactate guanylyltransferase (EC 2.7.7.68) // 0.008513819575341442 # SSO:000001996__D-alanyl-D-alanine dipeptidase (EC 3.4.13.22) // 0.008467297239696645 # SSO:000006816__Pyridoxine 5'-phosphate synthase (EC 2.6.99.2) // 0.007554641009869198 # SSO:000012533__Cyclase family protein // 0.007358763241153534 # SSO:000043039__acetone carboxylase (EC 6.4.1.6) // 0.007080090699924799
Random Forest,0.9,0.898948,0.9,0.899318,0.869444,TP=169 TN=56 FP=11FN=14,SSO:000000195__2-methylcitrate synthase (EC 2.3.3.5) // 0.010479151142409618 # SSO:000017664__Heavy metal sensor histidine kinase // 0.0077253155033408065 # SSO:000008463__Two-component system response regulator OmpR // 0.007461026355317865 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.007255257613968591 # SSO:000043392__chorismate lyase (EC 4.1.3.40) // 0.006811415398232365 # SSO:000012016__Cd(II)/Pb(II)-responsive transcriptional regulator // 0.006660861669190563 # SSO:000009443__uracil-xanthine permease // 0.005766696667428374 # SSO:000035324__D-alanyl-D-alanine endopeptidase // 0.005629287282042061 # SSO:000029105__YaeQ family protein // 0.005496419038058876 # SSO:000018959__Lipid A export permease/ATP-binding protein MsbA // 0.005495773636567959 # SSO:000037110__NADPH dependent aldo-keto reductase // 0.0054074165700776335 # SSO:000013151__Dioxygenase // 0.0049730831417157855 # SSO:000042832__succinyldiaminopimelate transaminase (EC 2.6.1.17) // 0.004940351427961453 # SSO:000012386__Coniferyl aldehyde dehydrogenase (EC 1.2.1.68) // 0.004794387141720141 # SSO:000019560__Methionine biosynthesis protein MetW // 0.004595342658486192 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.004429622378254635 # SSO:000037190__Nif3-like dinuclear metal center hexameric protein // 0.004146770968843702 # SSO:000039426__Taurine ABC transporter substrate-binding protein // 0.004114059823268906 # SSO:000021837__Polyamine ABC transporter ATP-binding protein // 0.004045345155948897 # SSO:000002791__Flagellar assembly protein FliH // 0.004019003650611359
CatBoost,0.892,0.893652,0.892,0.892679,0.872619,TP=165 TN=58 FP=15FN=12,SSO:000000195__2-methylcitrate synthase (EC 2.3.3.5) // 0.5470666443079557 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.3090298902429219 # SSO:000010160__Acetyl-CoA hydrolase/transferase family protein // 0.26616004368912194 # SSO:000011135__Bile acid:sodium symporter // 0.2438003201970718 # SSO:000020656__Nucleoside diphosphate kinase regulator // 0.2368917354961811 # SSO:000012386__Coniferyl aldehyde dehydrogenase (EC 1.2.1.68) // 0.22162103703390307 # SSO:000009756__Membrane protein // 0.19182762211699694 # SSO:000024633__Siderophore-interacting protein // 0.18575846472224292 # SSO:000013151__Dioxygenase // 0.1691901141518522 # SSO:000013261__Dyp-type peroxidase // 0.16511339463347038 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.15740026841749064 # SSO:000001968__Cytosine permease // 0.1571567266675777 # SSO:000009443__uracil-xanthine permease // 0.13511978500397362 # SSO:000012465__Copper resistance protein B // 0.12355386820063734 # SSO:000002043__D-serine ammonia-lyase (EC 4.3.1.18) // 0.11414827089635406 # SSO:000001879__Cytochrome O ubiquinol oxidase subunit III (EC 1.10.3.-) // 0.11275837067825478 # SSO:000030895__iron-sulfur cluster assembly accessory protein // 0.11206189774954364 # SSO:000012963__DUF1176 domain-containing protein // 0.10869468370242057 # SSO:000008463__Two-component system response regulator OmpR // 0.1073208818460894 # SSO:000043392__chorismate lyase (EC 4.1.3.40) // 0.10437381835672933


Numer of genomes:996
Number of genomic features :4352
Shape of y:(996,)
Count of 1: 451
Count of 0: 545
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:1387
Number of genomic features :4736
Shape of y:(1387,)
Count of 1: 549
Count of 0: 838
Running Decision Tree for phenotype maltose--assimilation




Running Random Forest for phenotype maltose--assimilation




Running CatBoost for phenotype maltose--assimilation




######### Combined report for maltose--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.719424,0.725286,0.719424,0.72102,0.718485,TP=118 TN=82 FP=45FN=33,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.1869111188908814 # SSO:000042475__5-methyltetrahydropteroyltriglutamate---homocysteine S-methyltransferase (EC 2.1.1.14) // 0.06912266892837272 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.05584350785893729 # SSO:000001880__Cytochrome O ubiquinol oxidase subunit IV (EC 1.10.3.-) // 0.04272969180650241 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.029990221182015737 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.02703126988775443 # SSO:000003486__Homoserine kinase (EC 2.7.1.39) // 0.024257306454355178 # SSO:000012160__Chemotaxis response regulator protein-glutamate methylesterase (EC 3.1.1.61) // 0.02418674087910433 # SSO:000002008__D-aminopeptidase (EC 3.4.11.19) // 0.020879723789896262 # SSO:000005853__Peroxidase (EC 1.11.1.7) // 0.019436557515099064 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.01801343462494238 # SSO:000017864__Homogentisate 12-dioxygenase (EC 1.13.11.5) // 0.01581615875150327 # SSO:000019607__Methylated-DNA-(protein)-cysteine S-methyltransferase // 0.015485012633519774 # SSO:000002619__Exodeoxyribonuclease III (EC 3.1.11.2) // 0.014152644957121825 # SSO:000036105__Glutamate-5-semialdehyde dehydrogenase (EC 1.2.1.41) // 0.0136541988940187 # SSO:000010272__Acyl-CoA thioesterase // 0.012591046107550748 # SSO:000009200__peptidase (EC 3.4.24.64 ) // 0.01094087948307129 # SSO:000005313__Nicotinate phosphoribosyltransferase (EC 6.3.4.21) // 0.010735890769554314 # SSO:000017607__HTH-type transcriptional regulator malT // 0.010672468631060502 # SSO:000038540__Phosphoketolase // 0.010217090909559851
Random Forest,0.791367,0.796688,0.791367,0.792553,0.792638,TP=128 TN=92 FP=35FN=23,SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.009547585896884784 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.008559594703655945 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.007206705770895711 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.006752636874096631 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.004614602594133216 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.004342120575588103 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.004286473412066163 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.004191580376792649 # SSO:000029073__Xylose isomerase // 0.0041366652746736895 # SSO:000019560__Methionine biosynthesis protein MetW // 0.004132591890462842 # SSO:000042832__succinyldiaminopimelate transaminase (EC 2.6.1.17) // 0.0038620953484828032 # SSO:000039286__Sugar O-acetyltransferase // 0.003617626818128221 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.0034726556521831476 # SSO:000000326__3-oxoacyl-ACP reductase (EC 1.1.1.100) // 0.003062189164862908 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.002948076147326808 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.0028834738688803788 # SSO:000010359__Alanine transaminase (EC 2.6.1.2) // 0.002831391840892496 # SSO:000000543__ATP phosphoribosyltransferase regulatory subunit (EC 2.4.2.17) // 0.002708488434695368 # SSO:000042613__N-acetylglucosamine kinase (EC 2.7.1.59) // 0.0026184219174820835 # SSO:000018959__Lipid A export permease/ATP-binding protein MsbA // 0.0025956353285937293
CatBoost,0.816547,0.823681,0.816547,0.817739,0.820512,TP=130 TN=97 FP=33FN=18,SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.689604389760925 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.4943790103295973 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.4939584806383544 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.3572146671016386 # SSO:000004189__LOG family protein // 0.23592132449438863 # SSO:000039286__Sugar O-acetyltransferase // 0.2008449765119518 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.18686683348043429 # SSO:000033834__xylulokinase // 0.1295763598702846 # SSO:000029307__aldolase // 0.12932615038744436 # SSO:000018973__Lipopolysaccharide biosynthesis protein // 0.12093897452174181 # SSO:000003145__Glutamate 5-kinase (EC 2.7.2.11) // 0.10652297069023832 # SSO:000034571__Aldose 1-epimerase family protein // 0.10622468707527992 # SSO:000008019__Thiazole synthase (EC 2.8.1.10) // 0.0955853055978401 # SSO:000039114__SapC family protein // 0.09364227634698569 # SSO:000002839__Flagellin // 0.09292513072020007 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.09289034276521649 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.0741708969041345 # SSO:000003304__Glycosyl transferase family 2 (EC 2.4.1.83) // 0.0733461382587808 # SSO:000003103__Gluconate permease // 0.06867847115529196 # SSO:000003484__Homoserine O-succinyltransferase (EC 2.3.1.46) // 0.06848874296369345


Numer of genomes:1297
Number of genomic features :4575
Shape of y:(1297,)
Count of 1: 494
Count of 0: 803
Running Decision Tree for phenotype N-acetylglucosamine--assimilation




Running Random Forest for phenotype N-acetylglucosamine--assimilation




Running CatBoost for phenotype N-acetylglucosamine--assimilation




######### Combined report for N-acetylglucosamine--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.773077,0.783128,0.773077,0.776694,0.754086,TP=144 TN=57 FP=35FN=24,SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.23761500248155992 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.08309893432015936 # SSO:000004190__LPS export ABC transporter permease LptG // 0.054024485384212166 # SSO:000001879__Cytochrome O ubiquinol oxidase subunit III (EC 1.10.3.-) // 0.048259317253737756 # SSO:000012974__DUF1854 domain-containing protein // 0.03234368552742997 # SSO:000001468__Cell division protein FtsL // 0.02867961159452667 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.023632493999138294 # SSO:000002796__Flagellar basal-body rod protein FlgC // 0.02031526797865109 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.020143880163161397 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.019806246600630753 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.019144612795641978 # SSO:000000824__Adenylosuccinate synthetase (EC 6.3.4.4) // 0.014336586339162074 # SSO:000002255__Deoxyribose-phosphate aldolase (EC 4.1.2.4) // 0.01393554183800958 # SSO:000017027__GTPase // 0.013657357631285501 # SSO:000024592__Serine/threonine protein phosphatase (EC 3.1.3.16) // 0.011409380607424396 # SSO:000000292__3-hydroxyanthranilate 34-dioxygenase (EC 1.13.11.6) // 0.01059498555582362 # SSO:000017589__HPT domain containing protein // 0.01043316628480324 # SSO:000029584__chromosome partitioning protein ParB // 0.009521013918015716 # SSO:000008131__Trans-aconitate 2-methyltransferase (EC 2.1.1.144) // 0.009438847285878677 # SSO:000005097__N-acetyl-gamma-glutamyl-phosphate reductase (EC 1.2.1.38) // 0.009392651710039264
Random Forest,0.815385,0.821466,0.815385,0.817619,0.798331,TP=151 TN=61 FP=28FN=20,SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.020995753123841193 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.006127820654248487 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.005868719217933413 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.005789380159967543 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.005671595489857778 # SSO:000043098__myo-inosose-2 dehydratase (EC 4.2.1.44) // 0.0052506870489410305 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.004880113991181641 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.004663927311643161 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.004383746310811973 # SSO:000005278__Na+/H+ antiporter // 0.004356124881836891 # SSO:000029073__Xylose isomerase // 0.004350958431727153 # SSO:000029506__carbohydrate kinase // 0.004315460342806117 # SSO:000001024__Aquaporin Z // 0.0042341473110699395 # SSO:000042613__N-acetylglucosamine kinase (EC 2.7.1.59) // 0.004133607817335981 # SSO:000018792__L-threonine 3-dehydrogenase // 0.004023785384109869 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.003735940279352689 # SSO:000025241__Sugar kinase // 0.0036540777138754244 # SSO:000000381__4-hydroxyproline epimerase (EC 5.1.1.8) // 0.003635471380928179 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.0033151084662921488 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0032961488901139878
CatBoost,0.803846,0.815145,0.803846,0.807436,0.793331,TP=147 TN=62 FP=32FN=19,SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 1.982558347942438 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.22451963462082414 # SSO:000018792__L-threonine 3-dehydrogenase // 0.18380918020930764 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.14635724085686497 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.14579723545762976 # SSO:000001024__Aquaporin Z // 0.13681006003803298 # SSO:000010198__Acid phosphatase (EC 3.1.3.2) // 0.13655944612771917 # SSO:000007808__Sulfate permease // 0.13560521984678386 # SSO:000043641__N4-(beta-N-acetylglucosaminyl)-L-asparaginase (EC 3.5.1.26) // 0.12232031184134619 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.11063306220788667 # SSO:000037089__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit H // 0.10856082197230747 # SSO:000012972__DUF1801 domain-containing protein // 0.10660717114946074 # SSO:000033404__sulfite oxidase( EC:1.8.3.1 ) // 0.10600282315792209 # SSO:000030852__integrase family protein // 0.09464358300893189 # SSO:000000381__4-hydroxyproline epimerase (EC 5.1.1.8) // 0.09355111455547303 # SSO:000029073__Xylose isomerase // 0.09193307032860466 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.08808568615728225 # SSO:000017470__GreA/GreB family elongation factor // 0.07995965639754012 # SSO:000012974__DUF1854 domain-containing protein // 0.07107843899166498 # SSO:000000826__Adenylylsulfate kinase (EC 2.7.1.25) // 0.07007689694672987


Numer of genomes:1303
Number of genomic features :4664
Shape of y:(1303,)
Count of 1: 478
Count of 0: 825
Running Decision Tree for phenotype D-mannitol--assimilation




Running Random Forest for phenotype D-mannitol--assimilation




Running CatBoost for phenotype D-mannitol--assimilation




######### Combined report for D-mannitol--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.781609,0.784533,0.781609,0.782779,0.768888,TP=136 TN=68 FP=31FN=26,SSO:000033834__xylulokinase // 0.2169168956375437 # SSO:000042813__amino-acid N-acetyltransferase (EC 2.3.1.1) // 0.10031296428705164 # SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.04801911312694415 # SSO:000031624__prepilin-type N-terminal cleavage/methylation domain-containing protein // 0.04719378768178219 # SSO:000002255__Deoxyribose-phosphate aldolase (EC 4.1.2.4) // 0.03957114131594912 # SSO:000012503__Cu(I)-responsive transcriptional regulator // 0.03637105447978925 # SSO:000000536__AMP nucleosidase (EC 3.2.2.4) // 0.03416296974802993 # SSO:000033521__toprim domain-containing protein // 0.022160812822482032 # SSO:000033054__radical SAM protein // 0.02202225705671558 # SSO:000000870__Aldose 1-epimerase (EC 5.1.3.3) // 0.019158638636166445 # SSO:000001155__Bacteriocin production protein // 0.01636080736971628 # SSO:000033776__tyrosine recombinase // 0.015479992405096427 # SSO:000030106__heme-binding protein // 0.01365645517540293 # SSO:000017637__Haloalkane dehalogenase (EC 3.8.1.5) // 0.01336224885148691 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.013227899674972267 # SSO:000000306__3-isopropylmalate dehydratase large subunit (EC 4.2.1.33) // 0.012493564610315632 # SSO:000043793__3-hexulose-6-phosphate synthase (EC 4.1.2.43) // 0.012300292314853115 # SSO:000016627__Fatty acid cis/trans isomerase // 0.010845313466022726 # SSO:000017750__Heparinase II/III family protein // 0.010812065396536816 # SSO:000005515__Ornithine cyclodeaminase (EC 4.3.1.12) // 0.00954926072816799
Random Forest,0.812261,0.810103,0.812261,0.810557,0.78819,TP=146 TN=66 FP=21FN=28,SSO:000033834__xylulokinase // 0.01535954153449005 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.012028445584479125 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.009555025216606134 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.006657354161193396 # SSO:000043098__myo-inosose-2 dehydratase (EC 4.2.1.44) // 0.006611048319209781 # SSO:000029506__carbohydrate kinase // 0.005895079674964484 # SSO:000001540__Choline ABC transporter substrate-binding protein // 0.005733731975327383 # SSO:000018763__L-iditol 2-dehydrogenase // 0.005667629100214004 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.0046180059416261006 # SSO:000002078__DNA polymerase II (EC 2.7.7.7) // 0.004449651326663243 # SSO:000008865__Urease accessory protein UreF // 0.004023077090005971 # SSO:000000897__Allantoicase (EC 3.5.3.4) // 0.0039879372487069175 # SSO:000036484__L-arabinose ABC transporter ATP-binding protein AraG // 0.003986958535912809 # SSO:000002255__Deoxyribose-phosphate aldolase (EC 4.1.2.4) // 0.003757798231275476 # SSO:000001198__Betaine aldehyde dehydrogenase (EC 1.2.1.8) // 0.0035628363419968096 # SSO:000000159__2-hydroxy-3-oxopropionate reductase (EC 1.1.1.60) // 0.0035021080877704097 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.00342038107031345 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.0031547846386178074 # SSO:000001549__Choline-sulfatase (EC 3.1.6.6) // 0.003121613946949809 # SSO:000010586__Amino acid transporter // 0.003111221995070243
CatBoost,0.858238,0.857443,0.858238,0.857719,0.842719,TP=150 TN=74 FP=17FN=20,SSO:000033834__xylulokinase // 1.4665855172171085 # SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.45068419435789225 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.43970662727749427 # SSO:000018763__L-iditol 2-dehydrogenase // 0.20252896125946296 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.20021473423917435 # SSO:000033773__type IV secretion system protein // 0.1951678973733217 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.18312860426818886 # SSO:000018613__Iron-containing alcohol dehydrogenase // 0.152990100949517 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.1420528731563894 # SSO:000029964__fumarate hydratase( EC:4.2.1.2 ) // 0.12966825810010696 # SSO:000012964__DUF1232 domain-containing protein // 0.12730198853617572 # SSO:000002108__DNA repair protein RadC // 0.1222001424440342 # SSO:000029506__carbohydrate kinase // 0.12110208220302535 # SSO:000008105__TonB-dependent siderophore receptor // 0.12087931789411405 # SSO:000000464__8-amino-7-oxononanoate synthase (EC 2.3.1.47) // 0.11650445137613667 # SSO:000025231__Sugar binding protein // 0.11073167618307322 # SSO:000020771__Oligopeptide transporter OPT family // 0.10981614490285033 # SSO:000001427__Carboxynorspermidine decarboxylase (EC 4.1.1.96) // 0.10665815068598676 # SSO:000000870__Aldose 1-epimerase (EC 5.1.3.3) // 0.10235778208159726 # SSO:000017750__Heparinase II/III family protein // 0.09805797514547371


Numer of genomes:1317
Number of genomic features :4648
Shape of y:(1317,)
Count of 1: 571
Count of 0: 746
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:1315
Number of genomic features :4565
Shape of y:(1315,)
Count of 1: 485
Count of 0: 830
Running Decision Tree for phenotype L-arabinose--assimilation




Running Random Forest for phenotype L-arabinose--assimilation




Running CatBoost for phenotype L-arabinose--assimilation




######### Combined report for L-arabinose--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.741445,0.745113,0.741445,0.742891,0.729716,TP=128 TN=67 FP=37FN=31,SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.18780286885005518 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.10855680099769215 # SSO:000002819__Flagellar motor switch protein FliM // 0.04718331338300086 # SSO:000022980__PspC domain-containing protein // 0.041770015955359244 # SSO:000002338__Dimethylsulfoniopropionate demethylase (EC 2.1.1.269) // 0.03906798544459533 # SSO:000009614__3-deoxy-D-manno-octulosonic acid kinase (EC 2.7.1.-) // 0.03484145191423257 # SSO:000029597__class II aldolase/adducin family protein // 0.02596750333063449 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.025538845310552047 # SSO:000008105__TonB-dependent siderophore receptor // 0.021389596147106355 # SSO:000017050__Gas vesicle protein // 0.01944563276514523 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.013883476465897108 # SSO:000021548__Phage tail assembly chaperone // 0.013405599404285146 # SSO:000017356__Glycogen synthase (EC 2.4.1.11) // 0.012367239899881184 # SSO:000029780__cytochrome b562 // 0.010631736877446613 # SSO:000010559__Amidohydrolase family protein // 0.009833199107891918 # SSO:000008622__UDP-N-acetylglucosamine--N-acetylmuramyl-(pentapeptide) pyrophosphoryl-undecaprenol N-acetylglucosamine transferase (EC 2.4.1.227) // 0.00928548834681306 # SSO:000044269__UDP-N-acetylmuramoyl-L-alanyl-D-glutamate---26-diaminopimelate ligase (EC 6.3.2.13) // 0.00894662660999365 # SSO:000042516__ribonucleoside-triphosphate reductase (EC 1.17.4.2) // 0.00874111669072849 # SSO:000017202__Glucoamylase (EC 3.2.1.3) // 0.008618520511651281 # SSO:000008849__Uracil phosphoribosyltransferase (EC 2.4.2.9) // 0.00844795792396174
Random Forest,0.813688,0.811702,0.813688,0.811678,0.791435,TP=145 TN=69 FP=20FN=29,SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.013614331475682563 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.009335889717167945 # SSO:000033834__xylulokinase // 0.008368435227941865 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.007915428167076848 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.007499898101734157 # SSO:000034571__Aldose 1-epimerase family protein // 0.006754121607797662 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.006568951959308652 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.006465048185221539 # SSO:000029073__Xylose isomerase // 0.006091090059490142 # SSO:000025241__Sugar kinase // 0.005975701620042674 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.004886783142988925 # SSO:000000417__5-dehydro-4-deoxyglucarate dehydratase (EC 4.2.1.41) // 0.004881683233332029 # SSO:000036484__L-arabinose ABC transporter ATP-binding protein AraG // 0.004876853905278341 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.004824950859987858 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.004810354489705951 # SSO:000000381__4-hydroxyproline epimerase (EC 5.1.1.8) // 0.00445000541957101 # SSO:000034554__Aldehyde dehydrogenase (NAD(P)(+)) (EC 1.2.1.5) // 0.003973855111625365 # SSO:000000936__Altronate dehydratase (EC 4.2.1.7) // 0.003654172408670827 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.003489564527783106 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.003381401514989143
CatBoost,0.813688,0.811702,0.813688,0.811678,0.791435,TP=145 TN=69 FP=20FN=29,SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 1.183832160186353 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.38146389894829386 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.33128293493912514 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.19543981552129047 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.18774188024302677 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.1708766091443838 # SSO:000033404__sulfite oxidase( EC:1.8.3.1 ) // 0.17053470857957004 # SSO:000033667__transposase family protein // 0.16526321691217363 # SSO:000000417__5-dehydro-4-deoxyglucarate dehydratase (EC 4.2.1.41) // 0.1635784776938982 # SSO:000034571__Aldose 1-epimerase family protein // 0.15659568581039718 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.14185345565975324 # SSO:000009982__ANTAR domain-containing protein // 0.1409402928361696 # SSO:000010751__Arabinan endo-15-alpha-L-arabinosidase (EC 3.2.1.99) // 0.12557929841717488 # SSO:000026225__Tryptophan-rich sensory protein // 0.12503887132709632 # SSO:000023851__RNA-binding protein // 0.119647727369838 # SSO:000036484__L-arabinose ABC transporter ATP-binding protein AraG // 0.11047341268919743 # SSO:000021890__Polysaccharide biosynthesis protein // 0.10981041342345677 # SSO:000007156__Ribulokinase (EC 2.7.1.16) // 0.10855755237891222 # SSO:000021696__Phosphonate ABC transporter ATP-binding protein // 0.1084543646494603 # SSO:000033834__xylulokinase // 0.1072778651110264


Numer of genomes:1353
Number of genomic features :4764
Shape of y:(1353,)
Count of 1: 794
Count of 0: 559
Running Decision Tree for phenotype D-glucose--assimilation




Running Random Forest for phenotype D-glucose--assimilation




Running CatBoost for phenotype D-glucose--assimilation




######### Combined report for D-glucose--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.697417,0.699762,0.697417,0.698438,0.683757,TP=65 TN=124 FP=39FN=43,SSO:000030003__glucose-6-phosphate dehydrogenase // 0.11229175009578979 # SSO:000035257__Cystine ABC transporter substrate-binding protein // 0.06756219410340793 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.060494050089295814 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.0578893704084717 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.04217419560540098 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.03312836131785696 # SSO:000033686__trehalose-phosphatase // 0.03233811180551495 # SSO:000007612__Sodium/glutamate symporter // 0.031496338728661546 # SSO:000009584__2OG-Fe(II) oxygenase // 0.026998327166341606 # SSO:000033404__sulfite oxidase( EC:1.8.3.1 ) // 0.023511697381615845 # SSO:000012216__Chromate transporter // 0.02261283141645558 # SSO:000019203__METHYL-ACCEPTING CHEMOTAXIS PROTEIN // 0.022590447180883523 # SSO:000000151__2-dehydropantoate 2-reductase (EC 1.1.1.169) // 0.02242717991972984 # SSO:000013154__Dipeptidase // 0.017423277047381513 # SSO:000011135__Bile acid:sodium symporter // 0.013538343449380017 # SSO:000001024__Aquaporin Z // 0.013302212005139124 # SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.013219995165110241 # SSO:000033819__virulence factor family protein // 0.01227686511973073 # SSO:000016804__Flagellar motor protein // 0.01219050631954636 # SSO:000041675__WD40 repeat domain-containing protein // 0.011520359694148847
Random Forest,0.763838,0.763036,0.763838,0.763394,0.748532,TP=71 TN=136 FP=33FN=31,SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.007771158378886526 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.006122588778518275 # SSO:000033834__xylulokinase // 0.006105368497227744 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.005151694420424943 # SSO:000005968__Phosphogluconate dehydratase (EC 4.2.1.12) // 0.0041484220889604784 # SSO:000004698__Malto-oligosyltrehalose synthase (EC 5.4.99.15) // 0.004145694170249076 # SSO:000034571__Aldose 1-epimerase family protein // 0.004120747886650702 # SSO:000005278__Na+/H+ antiporter // 0.004079801310619625 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.003921119860364657 # SSO:000033819__virulence factor family protein // 0.003893716965044113 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.0037549400166860102 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.0037278519432961863 # SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.003715455989062171 # SSO:000035257__Cystine ABC transporter substrate-binding protein // 0.0036997340972485743 # SSO:000042548__succinylglutamate-semialdehyde dehydrogenase (EC 1.2.1.71) // 0.00365415624604936 # SSO:000003445__Histidine utilization repressor // 0.003603676619756602 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.003184525628651252 # SSO:000004699__Malto-oligosyltrehalose trehalohydrolase (EC 3.2.1.141) // 0.0031176574512387853 # SSO:000030003__glucose-6-phosphate dehydrogenase // 0.0031116873309861106 # SSO:000029073__Xylose isomerase // 0.002959662534356123
CatBoost,0.771218,0.772104,0.771218,0.771618,0.759961,TP=74 TN=135 FP=30FN=32,SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.2586497349460534 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.24043410318958736 # SSO:000033834__xylulokinase // 0.19821411948939038 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.18725702443719391 # SSO:000005968__Phosphogluconate dehydratase (EC 4.2.1.12) // 0.18253494101053325 # SSO:000003097__Glucokinase (EC 2.7.1.2) // 0.1816421314655106 # SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.15798423455078606 # SSO:000007828__Sulfoacetaldehyde acetyltransferase (EC 2.3.3.15) // 0.13941212405095318 # SSO:000030003__glucose-6-phosphate dehydrogenase // 0.1389105501929335 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.13652766760791912 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.13609367905509331 # SSO:000005278__Na+/H+ antiporter // 0.1349292766392619 # SSO:000033404__sulfite oxidase( EC:1.8.3.1 ) // 0.1345018586368152 # SSO:000034571__Aldose 1-epimerase family protein // 0.12763973146592847 # SSO:000018761__L-glyceraldehyde 3-phosphate reductase // 0.11599721570390208 # SSO:000018763__L-iditol 2-dehydrogenase // 0.11192665655948209 # SSO:000042937__maltose alpha-D-glucosyltransferase (EC 5.4.99.16) // 0.10176643437144943 # SSO:000005853__Peroxidase (EC 1.11.1.7) // 0.0988095762978488 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.09506363435518317 # SSO:000019412__Membrane dipeptidase // 0.09441841433143083


Numer of genomes:3003
Number of genomic features :5734
Shape of y:(3003,)
Count of 1: 1027
Count of 0: 1976
Running Decision Tree for phenotype gelatin--hydrolysis




Running Random Forest for phenotype gelatin--hydrolysis




Running CatBoost for phenotype gelatin--hydrolysis




######### Combined report for gelatin--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.702163,0.717816,0.702163,0.707076,0.693505,TP=283 TN=139 FP=109FN=70,SSO:000012169__Chitinase (EC 3.2.1.14) // 0.1118057082721249 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.08888161282977476 # SSO:000000908__Alpha-D-ribose 1-methylphosphonate 5-triphosphate diphosphatase (EC 3.6.1.63) // 0.04391061950699785 # SSO:000000380__4-hydroxyphenylpyruvate dioxygenase (EC 1.13.11.27) // 0.0419295756227213 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.02736779899684069 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.023938090685957095 # SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.017794838998139703 # SSO:000018206__IS3 family transposase // 0.017606778245578845 # SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.017220734062972108 # SSO:000037265__Nucleoside-specific channel-forming protein Tsx // 0.016440533919733404 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.01591742330661984 # SSO:000000270__3-aminobutyryl-CoA ammonia-lyase (EC 4.3.1.14) // 0.013987505067046473 # SSO:000024182__Ribosome maturation factor rimM // 0.013831719167155887 # SSO:000011121__Beta-xylosidase // 0.01318907861630161 # SSO:000009129__glutaredoxin 3 // 0.011264213533820606 # SSO:000000673__Acetoacetate decarboxylase (EC 4.1.1.4) // 0.010888758658555629 # SSO:000030016__glutaredoxin family protein // 0.010616103472885963 # SSO:000043047__thiamine-phosphate kinase (EC 2.7.4.16) // 0.010347031641505404 # SSO:000024142__Ribonuclease // 0.009994010551988603 # SSO:000005248__NADPH:quinone oxidoreductase (EC 1.6.5.5) // 0.009674125645028734
Random Forest,0.815308,0.816916,0.815308,0.815997,0.800343,TP=333 TN=157 FP=59FN=52,SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.008363002087077114 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.007504986450728365 # SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.006443846094378919 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.006218249886710505 # SSO:000003439__Histidine ammonia-lyase (EC 4.3.1.3) // 0.0053734564475565755 # SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.004870017791764929 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.004770044615767215 # SSO:000004673__Malate synthase G (EC 2.3.3.9) // 0.00457664858456086 # SSO:000004091__Kynureninase (EC 3.7.1.3) // 0.004432213353817845 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.004223158465905753 # SSO:000029348__amidinotransferase // 0.004083147732950149 # SSO:000000826__Adenylylsulfate kinase (EC 2.7.1.25) // 0.0036597989824800276 # SSO:000018792__L-threonine 3-dehydrogenase // 0.003638111835305697 # SSO:000029508__carbohydrate-binding protein // 0.003341141996241206 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.0032510705690654155 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.0032188409747408732 # SSO:000000536__AMP nucleosidase (EC 3.2.2.4) // 0.0031787441880713994 # SSO:000017864__Homogentisate 12-dioxygenase (EC 1.13.11.5) // 0.003150753859579363 # SSO:000029178__Zinc carboxypeptidase // 0.002926887543198654 # SSO:000036507__L-glutamate gamma-semialdehyde dehydrogenase (EC 1.2.1.88) // 0.0029169671435120724
CatBoost,0.803661,0.805091,0.803661,0.804293,0.786947,TP=330 TN=153 FP=62FN=56,SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.3379884480430378 # SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.24036195695455204 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.23999299319819561 # SSO:000018641__Isochorismatase family protein // 0.18952728387102097 # SSO:000024950__Sodium/solute symporter // 0.18518129721803592 # SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.18433875565583802 # SSO:000035962__Formate-dependent phosphoribosylglycinamide formyltransferase // 0.17157699820642094 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.15117027900124214 # SSO:000018206__IS3 family transposase // 0.14278819622145014 # SSO:000029508__carbohydrate-binding protein // 0.13304420264134356 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.1242937562808172 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.12077470442582448 # SSO:000012684__D-alanyl-D-alanine carboxypeptidase family protein // 0.11787301449012037 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.11301700196429357 # SSO:000025068__Sporulation protein // 0.11203285587472381 # SSO:000004091__Kynureninase (EC 3.7.1.3) // 0.11073409887725981 # SSO:000024633__Siderophore-interacting protein // 0.10383197452576738 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.09919995498884492 # SSO:000020504__Nickel-dependent hydrogenase large subunit // 0.09474999033444356 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.0935138211674705


Numer of genomes:3088
Number of genomic features :5558
Shape of y:(3088,)
Count of 1: 1864
Count of 0: 1224
Running Decision Tree for phenotype esculin--hydrolysis




Running Random Forest for phenotype esculin--hydrolysis




Running CatBoost for phenotype esculin--hydrolysis




######### Combined report for esculin--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.7411,0.742487,0.7411,0.741669,0.735009,TP=178 TN=280 FP=76FN=84,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.15074925211496962 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.09871511729278076 # SSO:000022980__PspC domain-containing protein // 0.05210165719876465 # SSO:000008871__Ureidoglycolate lyase (EC 4.3.2.3) // 0.039996358081715366 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.038835078352831065 # SSO:000025241__Sugar kinase // 0.03862830962243628 # SSO:000000685__Acetolactate synthase large subunit (EC 2.2.1.6) // 0.02668720599313883 # SSO:000029265__acetyl-CoA carboxylase biotin carboxyl carrier protein // 0.019808994454127656 # SSO:000001303__CDP-diacylglycerol--serine O-phosphatidyltransferase (EC 2.7.8.8) // 0.016981768121548307 # SSO:000004938__Molybdenum cofactor guanylyltransferase (EC 2.7.7.77) // 0.015897798224761252 # SSO:000029073__Xylose isomerase // 0.013697792358268263 # SSO:000012746__DNA binding protein // 0.013670177378838352 # SSO:000009961__ADP-ribosylglycohydrolase family protein // 0.011456075207835564 # SSO:000037230__Nitronate monooxygenase (EC 1.13.12.16) // 0.010752961421402037 # SSO:000009556__2-oxo-hepta-3-ene-17-dioic acid hydratase (EC 4.2.-.-) // 0.010492298125452135 # SSO:000037035__NAD(P)/FAD-dependent oxidoreductase // 0.01043794675444246 # SSO:000005958__Phosphoenolpyruvate synthase (EC 2.7.9.2) // 0.009660403932258544 # SSO:000043747__(23-dihydroxybenzoyl)adenylate synthase (EC 2.7.7.58) // 0.008272310196476437 # SSO:000025362__TM2 domain containing protein // 0.008095848556096652 # SSO:000008849__Uracil phosphoribosyltransferase (EC 2.4.2.9) // 0.008072836920657665
Random Forest,0.81877,0.819393,0.81877,0.816129,0.802133,TP=180 TN=326 FP=74FN=38,SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.009756254630000084 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.009098915791581693 # SSO:000025241__Sugar kinase // 0.007975916896140828 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.007357100768752473 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.007246095451409926 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.007008050598620956 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.005624292360683922 # SSO:000029073__Xylose isomerase // 0.005516456284826084 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.005356023563594795 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.004815386418041565 # SSO:000022980__PspC domain-containing protein // 0.004757435431613888 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.004366896984277665 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.0041316542435085426 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.0035155034126692496 # SSO:000024103__Rhamnogalacturonan acetylesterase // 0.003291456800196972 # SSO:000036741__Malate dehydrogenase (quinone) (EC 1.1.5.4) // 0.0032273197786015506 # SSO:000042143__Lipopolysaccharide heptosyltransferase I // 0.003151425476466729 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.0031492044424984043 # SSO:000018768__L-lactate permease // 0.0030944829461861624 # SSO:000017367__Glycosidase // 0.0030854171677227545
CatBoost,0.807443,0.806799,0.807443,0.807015,0.799061,TP=191 TN=308 FP=63FN=56,SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.577457661636694 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.44003855140629083 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.2728475018903912 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.24759782798484337 # SSO:000025241__Sugar kinase // 0.23767782694055514 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.1662394127866552 # SSO:000018768__L-lactate permease // 0.1660508685692846 # SSO:000008131__Trans-aconitate 2-methyltransferase (EC 2.1.1.144) // 0.13772015542031324 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.13075614135637464 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.12962579099113086 # SSO:000016591__FMN-binding protein // 0.1292343924059291 # SSO:000009756__Membrane protein // 0.1263687427704605 # SSO:000010823__Arsenate reductase family protein // 0.1124191699987665 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.09801413649040501 # SSO:000005278__Na+/H+ antiporter // 0.09477951921650561 # SSO:000005916__Phosphate acetyltransferase (EC 2.3.1.8) // 0.09369462213884618 # SSO:000016939__GAF domain-containing protein // 0.09227806258777736 # SSO:000002027__D-glycero-beta-D-manno-heptose 7-phosphate kinase // 0.08864085301605973 # SSO:000002897__Formate--tetrahydrofolate ligase (EC 6.3.4.3) // 0.0824483988018927 # SSO:000000922__Alpha-glucuronidase (EC 3.2.1.139) // 0.08143856194253327


Numer of genomes:1811
Number of genomic features :5188
Shape of y:(1811,)
Count of 1: 595
Count of 0: 1216
Running Decision Tree for phenotype D-glucose--fermentation




Running Random Forest for phenotype D-glucose--fermentation




Running CatBoost for phenotype D-glucose--fermentation




######### Combined report for D-glucose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.84022,0.84022,0.84022,0.84022,0.820248,TP=213 TN=92 FP=29FN=29,SSO:000007208__S-ribosylhomocysteine lyase (EC 4.4.1.21) // 0.2511523113126365 # SSO:000003483__Homoserine O-acetyltransferase (EC 2.3.1.31) // 0.07403798483534002 # SSO:000006811__Pyridoxamine 5'-phosphate oxidase (EC 1.4.3.5) // 0.04835303926940898 # SSO:000008678__Ubiquinol-cytochrome C reductase iron-sulfur subunit (EC 1.10.2.2) // 0.04353598184020243 # SSO:000044184__tRNAMet cytidine acetyltransferase (EC 2.3.1.193) // 0.028251509923637078 # SSO:000004155__L-lactate dehydrogenase (EC 1.1.1.27) // 0.025712367061382614 # SSO:000020158__Mycoredoxin (EC 1.20.4.3) // 0.014483834345094348 # SSO:000000948__Amino acid permease // 0.013006471787468837 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.01119627598620882 # SSO:000009472__1-acylglycerol-3-phosphate O-acyltransferase // 0.011160830045510887 # SSO:000043309__branched-chain-amino-acid transaminase (EC 2.6.1.42) // 0.011006178712468971 # SSO:000021890__Polysaccharide biosynthesis protein // 0.009679829686953634 # SSO:000034428__Acetoacetate decarboxylase family protein // 0.009133292105853624 # SSO:000037230__Nitronate monooxygenase (EC 1.13.12.16) // 0.008865897989323714 # SSO:000002885__Formate dehydrogenase beta subunit (EC 1.2.1.2) // 0.00801068505960034 # SSO:000033667__transposase family protein // 0.006988441362102235 # SSO:000017748__Heparin-binding hemagglutinin // 0.006979570258231723 # SSO:000024257__S-adenosylmethionine decarboxylase proenzyme // 0.006936984542094949 # SSO:000006855__Pyruvatephosphate dikinase (EC 2.7.9.1) // 0.006864600481546448 # SSO:000017208__Isochorismatase (EC 3.3.2.1) // 0.006754909908005054
Random Forest,0.887052,0.886035,0.887052,0.886175,0.865702,TP=225 TN=97 FP=17FN=24,SSO:000007208__S-ribosylhomocysteine lyase (EC 4.4.1.21) // 0.008079312709233594 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.007088547928470767 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.005948025094051275 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.005646037708036029 # SSO:000034727__Autonomous glycyl radical cofactor GrcA // 0.00559813232736366 # SSO:000009028__Protein-PII uridylyltransferase (EC 2.7.7.59) // 0.005102620699180009 # SSO:000000219__2-succinyl-6-hydroxy-24-cyclohexadiene-1-carboxylate synthase (EC 4.2.99.20) // 0.004902816152574533 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.004770317862317421 # SSO:000002977__GMP reductase (EC 1.7.1.7) // 0.004464088389555269 # SSO:000005170__NAD(P)H-flavin reductase (EC 1.16.1.3) (EC 1.5.1.29) // 0.004339189219519059 # SSO:000036310__Methylenetetrahydrofolate reductase (NAD(P)H) (EC 1.5.1.20) // 0.003989150562341483 # SSO:000019001__Lipoprotein NlpI // 0.003968311881786731 # SSO:000043905__4a-hydroxytetrahydrobiopterin dehydratase (EC 4.2.1.96) // 0.00392699948911812 # SSO:000002246__Deoxyguanosinetriphosphate triphosphohydrolase (EC 3.1.5.1) // 0.0038372355284351494 # SSO:000000218__2-succinyl-5-enolpyruvyl-6-hydroxy-3-cyclohexene-1-carboxylic-acid synthase (EC 2.2.1.9) // 0.0037353948445467294 # SSO:000021631__Phosphocarrier protein HPr // 0.0037154830948837037 # SSO:000037085__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit D // 0.0035746747611638562 # SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.0034418642127614476 # SSO:000001476__Cell division protein ZapC // 0.003414788894745668 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.0034127704504170807
CatBoost,0.892562,0.89177,0.892562,0.891978,0.873967,TP=225 TN=99 FP=17FN=22,SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.43408820504314055 # SSO:000007208__S-ribosylhomocysteine lyase (EC 4.4.1.21) // 0.40140214025800935 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.2667090905720984 # SSO:000004155__L-lactate dehydrogenase (EC 1.1.1.27) // 0.22115878758194377 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.2141874181794137 # SSO:000004043__Isochorismate synthase (EC 5.4.4.2) // 0.19300152548855717 # SSO:000037084__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit C // 0.14757019463929938 # SSO:000018566__Intradiol ring-cleavage dioxygenase // 0.14061450113970173 # SSO:000029603__cobalamin B12-binding domain-containing protein // 0.12973387690987515 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.12741064550097492 # SSO:000002246__Deoxyguanosinetriphosphate triphosphohydrolase (EC 3.1.5.1) // 0.12012584558402474 # SSO:000001995__D-alanyl-D-alanine carboxypeptidase (EC 3.4.16.4) // 0.11930883299472217 # SSO:000010371__Glutathione-dependent formaldehyde dehydrogenase // 0.11569194494879866 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.1106677399264515 # SSO:000029953__formate/nitrite transporter family protein // 0.10824705612720999 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.10737645043720069 # SSO:000010617__Aminotransferase // 0.10680190719229854 # SSO:000021599__Phosphatase // 0.0995581444717059 # SSO:000001095__Aspartate carbamoyltransferase (EC 2.1.3.2) // 0.09646086996168995 # SSO:000012359__Collagen-like protein // 0.09641516625031327


Numer of genomes:482
Number of genomic features :3317
Shape of y:(482,)
Count of 1: 41
Count of 0: 441
Running Decision Tree for phenotype glycogen--fermentation




Running Random Forest for phenotype glycogen--fermentation




Running CatBoost for phenotype glycogen--fermentation




######### Combined report for glycogen--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.824742,0.921777,0.824742,0.860113,0.77381,TP=75 TN=5 FP=15FN=2,SSO:000026177__Trehalose-6-phosphate synthase // 0.20111045920786988 # SSO:000024959__Sodium:solute symporter family protein // 0.08025299949130367 # SSO:000028881__Uroporphyrinogen decarboxylase // 0.05618596863602213 # SSO:000009598__34-dihydroxyphenylacetate 23-dioxygenase (EC 1.13.11.15) // 0.053598322298699166 # SSO:000017367__Glycosidase // 0.04943017747827568 # SSO:000002795__Flagellar basal-body rod protein FlgB // 0.037133515290316964 # SSO:000000263__34-dihydroxy-2-butanone 4-phosphate synthase (EC 4.1.99.12) // 0.03437615318901977 # SSO:000029193__Zinc transporter ZupT // 0.0314024013055664 # SSO:000008674__UTP--glucose-1-phosphate uridylyltransferase (EC 2.7.7.9) // 0.02601188088753379 # SSO:000012109__ChaB family protein // 0.024804541230023704 # SSO:000025241__Sugar kinase // 0.02418737489472912 # SSO:000010002__ATP-binding protein // 0.021077103919118687 # SSO:000029317__alkylhydroperoxidase // 0.01861868261225401 # SSO:000021510__Phage holin // 0.01583627313227559 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.015018021132805648 # SSO:000017405__Glycosyl transferase family 28 // 0.014977021264241327 # SSO:000025277__Sulfotransferase // 0.014644301490484476 # SSO:000018191__IS110 family transposase // 0.009707447247963578 # SSO:000009334__tRNA (cytidine(34)-2'-O)-methyltransferase (EC 2.1.1.207) // 0.009267416988209301 # SSO:000005203__NADH dehydrogenase (EC 1.6.99.3) // 0.009029556703505387
Random Forest,0.958763,0.958763,0.958763,0.958763,0.846032,TP=88 TN=5 FP=2FN=2,SSO:000026177__Trehalose-6-phosphate synthase // 0.00987875435765538 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.0072763579101233385 # SSO:000003133__Glucosyl-3-phosphoglycerate synthase (EC 2.4.1.266) // 0.007161109658446453 # SSO:000043812__N-acetyl-1-D-myo-inositol-2-amino-2-deoxy-alpha-D-glucopyranoside deacetylase (EC 3.5.1.103) // 0.005138920391515284 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.005130769037245357 # SSO:000019203__METHYL-ACCEPTING CHEMOTAXIS PROTEIN // 0.004448815465544457 # SSO:000010266__Acyl-CoA synthetase // 0.00440566161884051 # SSO:000023911__RecB family exonuclease // 0.0043778223170644615 # SSO:000013242__DoxX family protein // 0.004376591210008991 # SSO:000012979__DUF2017 domain-containing protein // 0.004317056877905165 # SSO:000025241__Sugar kinase // 0.004303219734183283 # SSO:000006305__Prephenate dehydrogenase (EC 1.3.1.12) // 0.0038462945903918553 # SSO:000002814__Flagellar hook-basal body complex protein FliE // 0.0037639987981266713 # SSO:000025597__Thioesterase family protein // 0.003629367436769078 # SSO:000002815__Flagellar hook-length control protein FliK // 0.0035190024727519485 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0033646338042570157 # SSO:000024959__Sodium:solute symporter family protein // 0.0033047723749070595 # SSO:000016682__Ferredoxin reductase // 0.003291154760426011 # SSO:000000735__Aconitate hydratase (EC 4.2.1.3) // 0.003273487581247865 # SSO:000016805__Flagellar motor protein MotB // 0.0030864077845096437
CatBoost,0.948454,0.952088,0.948454,0.950028,0.840476,TP=87 TN=5 FP=3FN=2,SSO:000026177__Trehalose-6-phosphate synthase // 0.8938514212449549 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.5513180414367959 # SSO:000019203__METHYL-ACCEPTING CHEMOTAXIS PROTEIN // 0.3493448851224122 # SSO:000003133__Glucosyl-3-phosphoglycerate synthase (EC 2.4.1.266) // 0.3072298857404737 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.28416303817151833 # SSO:000017067__Phage major capsid protein // 0.18930924883912092 # SSO:000017367__Glycosidase // 0.18665715450387219 # SSO:000022348__Prenyltransferase // 0.1843695058024972 # SSO:000026421__Tyrosine recombinase XerC // 0.17000443876096324 # SSO:000023974__RelA/SpoT domain-containing protein // 0.16770690329695997 # SSO:000012221__Chromosome partitioning protein // 0.1637597339474874 # SSO:000018768__L-lactate permease // 0.1522710146026192 # SSO:000017669__Helicase // 0.1499025322717978 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.1493485429734119 # SSO:000013242__DoxX family protein // 0.14716774091302115 # SSO:000001864__Cysteine synthase (EC 2.5.1.47) // 0.1382206727040849 # SSO:000043536__GDP-L-fucose synthase (EC 1.1.1.271) // 0.135591057501321 # SSO:000000991__Anhydro-N-acetylmuramic acid kinase (EC 2.7.1.-) // 0.1276093639017396 # SSO:000025597__Thioesterase family protein // 0.1268265751219853 # SSO:000019056__Low temperature requirement protein A // 0.12235509813567431


Numer of genomes:959
Number of genomic features :4521
Shape of y:(959,)
Count of 1: 476
Count of 0: 483
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:588
Number of genomic features :3626
Shape of y:(588,)
Count of 1: 141
Count of 0: 447
Running Decision Tree for phenotype lactose--fermentation




Running Random Forest for phenotype lactose--fermentation




Running CatBoost for phenotype lactose--fermentation




######### Combined report for lactose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.805085,0.811165,0.805085,0.807571,0.772014,TP=72 TN=23 FP=13FN=10,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.20103591840547927 # SSO:000017291__Glutaminyl-peptide cyclotransferase (EC 2.3.2.5) // 0.07551377589457844 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.060715010225341905 # SSO:000006371__Prolyl aminopeptidase (EC 3.4.11.5 ) // 0.043226097069429405 # SSO:000002795__Flagellar basal-body rod protein FlgB // 0.0396559360507606 # SSO:000000568__ATP-dependent Clp protease proteolytic subunit (EC 3.4.21.92) // 0.038770273679170304 # SSO:000004091__Kynureninase (EC 3.7.1.3) // 0.03089910678286743 # SSO:000004883__Methylisocitrate lyase (EC 4.1.3.30) // 0.021529819065377428 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.020667279786033745 # SSO:000024950__Sodium/solute symporter // 0.020624950794634476 # SSO:000002060__DNA (cytosine-5-)-methyltransferase (EC 2.1.1.37 ) // 0.020617795896106655 # SSO:000012884__DNA topology modulation protein // 0.019098788814954134 # SSO:000033834__xylulokinase // 0.01633485299662901 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.014805311501985665 # SSO:000003528__Hydroxymethylglutaryl-CoA synthase (EC 2.3.3.10) // 0.014575312967812124 # SSO:000012178__Chitosanase // 0.014033663610843244 # SSO:000005271__Na(+)-translocating NADH-quinone reductase subunit A (EC 1.6.5.-) // 0.01150484007488775 # SSO:000005313__Nicotinate phosphoribosyltransferase (EC 6.3.4.21) // 0.011305048762404367 # SSO:000021338__Peptidase M23 // 0.009578628629654794 # SSO:000029953__formate/nitrite transporter family protein // 0.009276349216697441
Random Forest,0.864407,0.864407,0.864407,0.864407,0.831729,TP=77 TN=25 FP=8FN=8,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.011349201828876455 # SSO:000012628__Cytochrome c oxidase subunit I // 0.00759381387587723 # SSO:000026177__Trehalose-6-phosphate synthase // 0.006156959168351284 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.006015988121675001 # SSO:000029482__c-type cytochrome // 0.005867571931062953 # SSO:000029759__coproporphyrinogen III oxidase // 0.004733848582611842 # SSO:000013422__Enoyl-CoA hydratase/isomerase family protein // 0.004552868327576558 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.00437860972340865 # SSO:000000543__ATP phosphoribosyltransferase regulatory subunit (EC 2.4.2.17) // 0.0038854181319028635 # SSO:000010245__Acyl-CoA dehydrogenase // 0.0038449129303864886 # SSO:000043162__1-(5-phosphoribosyl)-5-(5-phosphoribosylamino)methylideneaminoimidazole-4-carboxamide isomerase (EC 5.3.1.16) // 0.0037957171487311807 # SSO:000003133__Glucosyl-3-phosphoglycerate synthase (EC 2.4.1.266) // 0.0037263399486195383 # SSO:000020985__Oxygen-independent coproporphyrinogen III oxidase // 0.0035123411357909436 # SSO:000006827__Pyrimidine-nucleoside phosphorylase (EC 2.4.2.2) // 0.0034667468466116204 # SSO:000043727__thiamine diphosphokinase (EC 2.7.6.2) // 0.003431332240054848 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.003367207358456931 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.0032129939679162744 # SSO:000009370__tRNA(1)(Val) (adenine(37)-N(6))-methyltransferase (EC 2.1.1.223) // 0.0032035105904548358 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0031862790911661282 # SSO:000033686__trehalose-phosphatase // 0.003135936159598172
CatBoost,0.864407,0.862333,0.864407,0.863076,0.82246,TP=78 TN=24 FP=7FN=9,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.7417291659121389 # SSO:000020611__Non-ribosomal peptide synthetase // 0.23761149226594236 # SSO:000012150__Chemotaxis protein CheX // 0.20411824495121503 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.19625223328292613 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.19418199598917152 # SSO:000013422__Enoyl-CoA hydratase/isomerase family protein // 0.16345413109995605 # SSO:000029482__c-type cytochrome // 0.1614596544870225 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.15864132955146884 # SSO:000028897__VTC domain-containing protein // 0.1556676550471261 # SSO:000006213__Precorrin-6A synthase (deacetylating) (EC 2.1.1.152) // 0.14590103265110735 # SSO:000012628__Cytochrome c oxidase subunit I // 0.12942483084282838 # SSO:000001094__Aspartate ammonia-lyase (EC 4.3.1.1) // 0.12728027271984138 # SSO:000000568__ATP-dependent Clp protease proteolytic subunit (EC 3.4.21.92) // 0.12602996894818483 # SSO:000021529__Phage portal protein // 0.1185482060211478 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.11565332622008397 # SSO:000034554__Aldehyde dehydrogenase (NAD(P)(+)) (EC 1.2.1.5) // 0.11546359399538877 # SSO:000007796__Sucrose-6-phosphate hydrolase (EC 3.2.1.26) // 0.11276050929248826 # SSO:000021554__Phage tail protein // 0.10557949721242835 # SSO:000000450__6-phospho-beta-galactosidase (EC 3.2.1.85) // 0.10272689252166574 # SSO:000043162__1-(5-phosphoribosyl)-5-(5-phosphoribosylamino)methylideneaminoimidazole-4-carboxamide isomerase (EC 5.3.1.16) // 0.10230019984954594


Numer of genomes:576
Number of genomic features :3629
Shape of y:(576,)
Count of 1: 214
Count of 0: 362
Running Decision Tree for phenotype maltose--fermentation




Running Random Forest for phenotype maltose--fermentation




Running CatBoost for phenotype maltose--fermentation




######### Combined report for maltose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.741379,0.754725,0.741379,0.744549,0.743056,TP=53 TN=33 FP=19FN=11,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.1735846780924813 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.10396435177494344 # SSO:000002402__Ectoine synthase (EC 4.2.1.108) // 0.0635193518505394 # SSO:000021899__Polysaccharide deacetylase family protein // 0.049512269524275 # SSO:000004605__Lysophospholipase (EC 3.1.1.5) // 0.038636685344105685 # SSO:000036082__Glucose-6-phosphate dehydrogenase (coenzyme F420) (EC 1.1.98.2) // 0.03480599808762585 # SSO:000020367__NERD domain-containing protein // 0.027607899334712572 # SSO:000031477__polyhydroxybutyrate depolymerase // 0.02440521958864071 # SSO:000006147__Polyketide synthase // 0.022359601727109556 # SSO:000029459__beta-phosphoglucomutase family hydrolase // 0.01853314460273577 # SSO:000012961__DUF1015 domain-containing protein // 0.018434882507726402 # SSO:000021770__Phytoene desaturase (EC 1.14.99.-) // 0.018208550270806974 # SSO:000010242__Acyl transferase // 0.017924334491483555 # SSO:000029948__flavohemoprotein // 0.016240532063055344 # SSO:000002607__Excinuclease ABC subunit A // 0.014832137436532392 # SSO:000031187__nucleic acid binding protein // 0.01477759606861196 # SSO:000025627__Thioredoxin domain-containing protein // 0.014605680925019848 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.013844753297877668 # SSO:000022788__Protein kinase // 0.013306278905037699 # SSO:000023742__Pyridoxal phosphate-dependent aminotransferase // 0.012790353042873985
Random Forest,0.827586,0.827586,0.827586,0.827586,0.816919,TP=62 TN=34 FP=10FN=10,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.009899567338622942 # SSO:000035277__Cytochrome c oxidase subunit 4 (EC 1.9.3.1) // 0.006801418000982362 # SSO:000012589__Cytochrome P-450 // 0.0066017456399348065 # SSO:000007208__S-ribosylhomocysteine lyase (EC 4.4.1.21) // 0.006395475547370279 # SSO:000039072__S-(hydroxymethyl)mycothiol dehydrogenase (EC 1.1.1.306) // 0.006270443941179647 # SSO:000043620__mycothiol synthase (EC 2.3.1.189) // 0.0061926602371715775 # SSO:000002029__D-inositol-3-phosphate glycosyltransferase (EC 2.4.1.250) // 0.005252762684059082 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.005155559017217759 # SSO:000010813__Leucyl aminopeptidase (EC 3.4.11.10) // 0.0050698962969109775 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.00498691339411629 # SSO:000012628__Cytochrome c oxidase subunit I // 0.004821894132122453 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.004246134467706724 # SSO:000001853__Cystathionine beta-synthase (EC 4.2.1.22) // 0.004217481185125688 # SSO:000029482__c-type cytochrome // 0.004103004693819031 # SSO:000009559__2-phospho-L-lactate guanylyltransferase (EC 2.7.7.68) // 0.003846501544502271 # SSO:000007096__Ribonuclease Y // 0.0038371968159749154 # SSO:000043727__thiamine diphosphokinase (EC 2.7.6.2) // 0.003755668945454894 # SSO:000007570__Single-stranded-DNA-specific exonuclease RecJ (EC 3.1.-.-) // 0.0036875350771438907 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.003617516916998717 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.003586123269806716
CatBoost,0.827586,0.826455,0.827586,0.826762,0.8125,TP=63 TN=33 FP=9FN=11,SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.5622298389338655 # SSO:000005432__Nucleotidyltransferase (EC 2.7.7.-) // 0.5363680784612799 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.5067747139702251 # SSO:000012589__Cytochrome P-450 // 0.35678596185270545 # SSO:000020367__NERD domain-containing protein // 0.3355492183479116 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.27013301459540867 # SSO:000000817__Adenosylhomocysteinase (EC 3.3.1.1) // 0.2670060919455198 # SSO:000039072__S-(hydroxymethyl)mycothiol dehydrogenase (EC 1.1.1.306) // 0.21885101998316356 # SSO:000001777__Copper-translocating P-type ATPase (EC 3.6.3.4) // 0.21119802909008994 # SSO:000022925__Protein tyrosine phosphatase (EC 3.1.3.48) // 0.2065892764109205 # SSO:000025277__Sulfotransferase // 0.18182113299576513 # SSO:000012972__DUF1801 domain-containing protein // 0.17829147003525525 # SSO:000010295__Acyltransferase family protein // 0.16252060303995902 # SSO:000021899__Polysaccharide deacetylase family protein // 0.15852043509623492 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.1408752421411249 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.1380605750009361 # SSO:000030895__iron-sulfur cluster assembly accessory protein // 0.12686242721974367 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.10952438076975744 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.10555046807235705 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.10421374378481733


Numer of genomes:875
Number of genomic features :4356
Shape of y:(875,)
Count of 1: 351
Count of 0: 524
Running Decision Tree for phenotype D-mannitol--fermentation




Running Random Forest for phenotype D-mannitol--fermentation




Running CatBoost for phenotype D-mannitol--fermentation




######### Combined report for D-mannitol--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.817143,0.826313,0.817143,0.819054,0.820688,TP=87 TN=56 FP=21FN=11,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.3001501697262392 # SSO:000029108__YaiI/YqxD family protein // 0.11260392021354726 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.04569847627694271 # SSO:000021650__Phosphoglycerate dehydrogenase // 0.032318007385541477 # SSO:000023921__Redox-sensing transcriptional repressor Rex // 0.03161139914431324 # SSO:000000406__5-(carboxyamino)imidazole ribonucleotide synthase (EC 6.3.4.18) // 0.028926457308107573 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.025000064790556584 # SSO:000003012__Galactose-1-phosphate uridylyltransferase (EC 2.7.7.10) // 0.022621631623685634 # SSO:000000212__2-phosphosulfolactate phosphatase (EC 3.1.3.71 ) // 0.01335737000508144 # SSO:000005037__Murein hydrolase activator NlpD // 0.011925120499744422 # SSO:000003262__Glycerophosphoryl diester phosphodiesterase (EC 3.1.4.46) // 0.011210057387726246 # SSO:000028897__VTC domain-containing protein // 0.010342726942852449 # SSO:000016592__FMN-dependent NADH-azoreductase (EC 1.7.1.6) // 0.009264499301989809 # SSO:000033054__radical SAM protein // 0.009189178861947091 # SSO:000007627__Spermidine synthase (EC 2.5.1.16) // 0.009122688843894194 # SSO:000024577__Serine/threonine kinase // 0.007865092285992048 # SSO:000025504__Teicoplanin resistance protein vanZ // 0.0073510270083908475 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.0069826280794311 # SSO:000031187__nucleic acid binding protein // 0.0068151472452070575 # SSO:000020367__NERD domain-containing protein // 0.0056592500359597675
Random Forest,0.897143,0.899146,0.897143,0.897663,0.896835,TP=97 TN=60 FP=11FN=7,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.01595637931628706 # SSO:000002977__GMP reductase (EC 1.7.1.7) // 0.007565088220326706 # SSO:000023873__RNase E specificity factor CsrD // 0.005809862477107817 # SSO:000029108__YaiI/YqxD family protein // 0.005074566295499131 # SSO:000005953__Phosphoenolpyruvate carboxykinase ATP (EC 4.1.1.49) // 0.004681260935468183 # SSO:000012882__DNA topoisomerase IV subunit B // 0.004529937315769333 # SSO:000000624__ATP-dependent protease subunit HslV (EC 3.4.25.2) // 0.004302261944176144 # SSO:000017332__Glycerol-3-phosphate transporter // 0.004286873844419081 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.004122970663969892 # SSO:000005654__PTS IIA-like nitrogen-regulatory protein PtsN (EC 2.7.1.69 ) // 0.004030401555613603 # SSO:000043162__1-(5-phosphoribosyl)-5-(5-phosphoribosylamino)methylideneaminoimidazole-4-carboxamide isomerase (EC 5.3.1.16) // 0.003973734689501773 # SSO:000006028__Phosphopentomutase (EC 5.4.2.7) // 0.003694171938277651 # SSO:000016592__FMN-dependent NADH-azoreductase (EC 1.7.1.6) // 0.003563402975466035 # SSO:000000219__2-succinyl-6-hydroxy-24-cyclohexadiene-1-carboxylate synthase (EC 4.2.99.20) // 0.0034987196574965376 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.003433058342701112 # SSO:000029759__coproporphyrinogen III oxidase // 0.00300394091246649 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0029550412725100265 # SSO:000002138__DNA-binding protein HU-alpha // 0.002907359277456981 # SSO:000042555__erythrose-4-phosphate dehydrogenase (EC 1.2.1.72) // 0.0029014889791344617 # SSO:000008961__Xanthine phosphoribosyltransferase (EC 2.4.2.22) // 0.002872678735545832
CatBoost,0.897143,0.900762,0.897143,0.897878,0.899668,TP=96 TN=61 FP=12FN=6,SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 1.9635008790652346 # SSO:000004554__Low-specificity L-threonine aldolase (EC 4.1.2.48) // 0.3529225800690037 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.30441194803676275 # SSO:000026218__Trypsin-like serine protease // 0.2725048994344145 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.23564094406964772 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.19184960935875656 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.18463429665982306 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.177089712453829 # SSO:000002977__GMP reductase (EC 1.7.1.7) // 0.15097135656713828 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.14369081905092124 # SSO:000025723__Toxin // 0.14196974642575094 # SSO:000000936__Altronate dehydratase (EC 4.2.1.7) // 0.12458561115945103 # SSO:000016969__GGDEF domain containing protein // 0.11263567207576956 # SSO:000001777__Copper-translocating P-type ATPase (EC 3.6.3.4) // 0.10969509955471325 # SSO:000004475__Lipopolysaccharide N-acetylmannosaminouronosyltransferase (EC 2.4.1.180) // 0.10635297969733246 # SSO:000031442__phosphotriesterase // 0.09682758704768744 # SSO:000029759__coproporphyrinogen III oxidase // 0.08931379825514717 # SSO:000002138__DNA-binding protein HU-alpha // 0.08918154852361253 # SSO:000004890__Methylmalonyl-CoA epimerase (EC 5.1.99.1) // 0.08742473462133195 # SSO:000029506__carbohydrate kinase // 0.08590029098519684


Numer of genomes:490
Number of genomic features :3359
Shape of y:(490,)
Count of 1: 61
Count of 0: 429
Running Decision Tree for phenotype D-xylose--fermentation




Running Random Forest for phenotype D-xylose--fermentation




Running CatBoost for phenotype D-xylose--fermentation




######### Combined report for D-xylose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.877551,0.898931,0.877551,0.886331,0.754545,TP=80 TN=6 FP=8FN=4,SSO:000026177__Trehalose-6-phosphate synthase // 0.16564270491111732 # SSO:000029073__Xylose isomerase // 0.11857279358130378 # SSO:000036082__Glucose-6-phosphate dehydrogenase (coenzyme F420) (EC 1.1.98.2) // 0.06538526995645436 # SSO:000002811__Flagellar hook protein FlgE // 0.051999143061119295 # SSO:000025721__Toxic anion resistance protein // 0.044076944025398024 # SSO:000000417__5-dehydro-4-deoxyglucarate dehydratase (EC 4.2.1.41) // 0.04156285049016205 # SSO:000044350__aspartate---tRNA ligase (EC 6.1.1.12) // 0.03863466769186476 # SSO:000001301__CDP-diacylglycerol--glycerol-3-phosphate 3-phosphatidyltransferase (EC 2.7.8.5) // 0.036250515646532824 # SSO:000017027__GTPase // 0.03613082084793031 # SSO:000034599__Amino acid aminotransferase // 0.014646199032803833 # SSO:000020170__N-6 DNA methylase // 0.014348725313390185 # SSO:000000536__AMP nucleosidase (EC 3.2.2.4) // 0.008966713869537929 # SSO:000029598__coagulation factor 5/8 type domain protein // 0.008304291883076981 # SSO:000044309__deoxyribonuclease I (EC 3.1.21.1) // 0.007612260493517752 # SSO:000025517__Tellurium resistance protein terC // 0.0058261289721763315 # SSO:000010699__Anti-sigma-factor antagonist // 0.005344125730875372 # SSO:000011502__CAAX protease // 0.0048008168914168954 # SSO:000024628__Sialidase (EC 3.2.1.18) // 0.0045040899212850155 # SSO:000002626__Exonuclease SbcD // 0.0012029532248415344 # SSO:000000365__4-hydroxy-tetrahydrodipicolinate reductase (EC 1.17.1.8) // 0.0
Random Forest,0.928571,0.921507,0.928571,0.922867,0.738636,TP=86 TN=5 FP=2FN=5,SSO:000029073__Xylose isomerase // 0.012371218872993904 # SSO:000026177__Trehalose-6-phosphate synthase // 0.007840822596412743 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.007760046833754474 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.007649514977296886 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.007157407114097785 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.00704461901987604 # SSO:000005954__Phosphoenolpyruvate carboxykinase GTP (EC 4.1.1.32) // 0.006774398302982954 # SSO:000019203__METHYL-ACCEPTING CHEMOTAXIS PROTEIN // 0.0065169183411918526 # SSO:000010727__Antitoxin // 0.0063125009121150575 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.00607118058610511 # SSO:000036947__Mycothione reductase (EC 1.8.1.15) // 0.006006179572759826 # SSO:000043812__N-acetyl-1-D-myo-inositol-2-amino-2-deoxy-alpha-D-glucopyranoside deacetylase (EC 3.5.1.103) // 0.005648639409718849 # SSO:000010503__Alpha-ketoglutarate-dependent dioxygenase AlkB (EC 1.14.11.33) // 0.005292473319588054 # SSO:000029482__c-type cytochrome // 0.004629971471096562 # SSO:000043620__mycothiol synthase (EC 2.3.1.189) // 0.00460579486885468 # SSO:000035277__Cytochrome c oxidase subunit 4 (EC 1.9.3.1) // 0.004377703896666833 # SSO:000012160__Chemotaxis response regulator protein-glutamate methylesterase (EC 3.1.1.61) // 0.004265382144572665 # SSO:000021874__Polyphosphate kinase 2 (EC 2.7.4.1) // 0.004175886472304919 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.004036339141467338 # SSO:000004891__Methylmalonyl-CoA mutase (EC 5.4.99.2) // 0.003985050581591721
CatBoost,0.94898,0.945952,0.94898,0.944905,0.794318,TP=87 TN=6 FP=1FN=4,SSO:000029073__Xylose isomerase // 1.4464173482961855 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.46404668008984074 # SSO:000026177__Trehalose-6-phosphate synthase // 0.4273594113712628 # SSO:000010727__Antitoxin // 0.3552099945056375 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.2638924937200976 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.2204068566240417 # SSO:000012150__Chemotaxis protein CheX // 0.2084423071590562 # SSO:000001635__Cobalt-precorrin-6A reductase (EC 1.3.1.54) // 0.20513445020081522 # SSO:000021874__Polyphosphate kinase 2 (EC 2.7.4.1) // 0.18572670749626025 # SSO:000024618__Shikimate kinase // 0.16433875081816512 # SSO:000005475__O-succinylhomoserine sulfhydrylase (EC 2.5.1.48) // 0.16182327634131174 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.15700781249935306 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.15612417336699563 # SSO:000013400__Endonuclease/exonuclease/phosphatase family protein // 0.15123296416470958 # SSO:000000027__12-phenylacetyl-CoA epoxidase subunit A (EC 1.14.13.149) // 0.14876880563243816 # SSO:000004710__Mannose-1-phosphate guanylyltransferase (EC 2.7.7.13) // 0.13923843597303392 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.13840607563125756 # SSO:000000417__5-dehydro-4-deoxyglucarate dehydratase (EC 4.2.1.41) // 0.1378208613704461 # SSO:000037530__Undecaprenyldiphospho-muramoylpentapeptide beta-N-acetylglucosaminyltransferase (EC 2.4.1.227) // 0.13737975767946978 # SSO:000009310__sortase // 0.13373772985483942


Numer of genomes:546
Number of genomic features :3385
Shape of y:(546,)
Count of 1: 116
Count of 0: 430
Running Decision Tree for phenotype D-ribose--fermentation




Running Random Forest for phenotype D-ribose--fermentation




Running CatBoost for phenotype D-ribose--fermentation




######### Combined report for D-ribose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.790909,0.841243,0.790909,0.811197,0.666667,TP=80 TN=7 FP=16FN=7,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.166346999627195 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.06109895184327017 # SSO:000038656__Primary-amine oxidase (EC 1.4.3.21) // 0.050256638008182405 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.04757618716236278 # SSO:000033137__rubrerythrin family protein // 0.04419449710849097 # SSO:000033453__tRNA-dihydrouridine synthase DusB // 0.038098506681075464 # SSO:000017841__Histone deacetylase family protein // 0.029889048327837498 # SSO:000013437__Epoxide hydrolase // 0.023475555602342064 # SSO:000010002__ATP-binding protein // 0.019527086791892658 # SSO:000009637__3-oxoadipate enol-lactonase // 0.018902476011630583 # SSO:000024332__SH3 domain-containing protein // 0.017475206004014986 # SSO:000010617__Aminotransferase // 0.015402119133937358 # SSO:000018447__Inorganic phosphate transporter // 0.015378547581252714 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.013318105719050542 # SSO:000018213__IS630 family transposase // 0.013050448812621903 # SSO:000025239__Sugar isomerase // 0.012802763271375637 # SSO:000020436__Na+/H+ antiporter family protein // 0.011592150315500423 # SSO:000025517__Tellurium resistance protein terC // 0.011560300397376766 # SSO:000010570__Amino acid carrier protein // 0.010820168541334389 # SSO:000012966__DUF1275 domain-containing protein // 0.010623504762758607
Random Forest,0.863636,0.876315,0.863636,0.869152,0.738839,TP=87 TN=8 FP=9FN=6,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.014121098179461377 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.009028783006336244 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.007901349940344622 # SSO:000017977__Hydroxylase // 0.006996993643487823 # SSO:000006811__Pyridoxamine 5'-phosphate oxidase (EC 1.4.3.5) // 0.0069221053009327835 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.006791950469673038 # SSO:000039072__S-(hydroxymethyl)mycothiol dehydrogenase (EC 1.1.1.306) // 0.005979943308805628 # SSO:000036082__Glucose-6-phosphate dehydrogenase (coenzyme F420) (EC 1.1.98.2) // 0.0049810885322377155 # SSO:000010245__Acyl-CoA dehydrogenase // 0.0037549542957996678 # SSO:000036507__L-glutamate gamma-semialdehyde dehydrogenase (EC 1.2.1.88) // 0.0037517896818927703 # SSO:000012589__Cytochrome P-450 // 0.0036462170017698753 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.003596253213416282 # SSO:000010239__Acyl dehydratase // 0.00351097909332805 # SSO:000004155__L-lactate dehydrogenase (EC 1.1.1.27) // 0.0034710123573278968 # SSO:000018961__Lipid-transfer protein // 0.003308736156400288 # SSO:000004891__Methylmalonyl-CoA mutase (EC 5.4.99.2) // 0.0032932630351325104 # SSO:000034830__D-ribose pyranase (EC 5.4.99.62) // 0.003290956888426341 # SSO:000020822__Organic hydroperoxide resistance protein // 0.003227902693480875 # SSO:000024197__Rieske (2Fe-2S) protein // 0.0032162781181040126 # SSO:000001864__Cysteine synthase (EC 2.5.1.47) // 0.0032004224519673243
CatBoost,0.863636,0.876315,0.863636,0.869152,0.738839,TP=87 TN=8 FP=9FN=6,SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.8174115767267015 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.59622461849972 # SSO:000034830__D-ribose pyranase (EC 5.4.99.62) // 0.4799548092610805 # SSO:000010058__ATP-dependent helicase HrpB // 0.4533483484866284 # SSO:000012966__DUF1275 domain-containing protein // 0.33814703088282644 # SSO:000036507__L-glutamate gamma-semialdehyde dehydrogenase (EC 1.2.1.88) // 0.28869768859310513 # SSO:000042567__mercury(II) reductase (EC 1.16.1.1) // 0.27479191084182913 # SSO:000003114__Glucosamine-6-phosphate deaminase (EC 3.5.99.6) // 0.2651413788662477 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.2208233735882724 # SSO:000000449__6-phospho-3-hexuloisomerase (EC 5.3.1.27) // 0.20191267427362014 # SSO:000010295__Acyltransferase family protein // 0.1938410536885045 # SSO:000000870__Aldose 1-epimerase (EC 5.1.3.3) // 0.1629248820584008 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.15140611812485683 # SSO:000006811__Pyridoxamine 5'-phosphate oxidase (EC 1.4.3.5) // 0.1456848863672355 # SSO:000025520__TerC family protein // 0.14257433387763682 # SSO:000021890__Polysaccharide biosynthesis protein // 0.1400386692274151 # SSO:000020822__Organic hydroperoxide resistance protein // 0.13033436568254755 # SSO:000010239__Acyl dehydratase // 0.127168457307523 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.12578114167951795 # SSO:000002765__Ferrous iron transport protein A // 0.12173735653515345


Numer of genomes:656
Number of genomic features :4008
Shape of y:(656,)
Count of 1: 237
Count of 0: 419
Running Decision Tree for phenotype casein--hydrolysis




Running Random Forest for phenotype casein--hydrolysis




Running CatBoost for phenotype casein--hydrolysis




######### Combined report for casein--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.606061,0.632855,0.606061,0.614643,0.59387,TP=55 TN=25 FP=32FN=20,SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.12194187339088725 # SSO:000012503__Cu(I)-responsive transcriptional regulator // 0.08668641188306404 # SSO:000019936__MmgE/PrpD family protein // 0.0537465455116174 # SSO:000035891__Fe3+ ABC transporter (EC 7.2.2.7) substrate-binding protein // 0.04829179444899088 # SSO:000029515__carboxylesterase family protein // 0.03680717910595681 # SSO:000012746__DNA binding protein // 0.030125682980306497 # SSO:000013635__FHA domain-containing protein // 0.026206458544391468 # SSO:000044350__aspartate---tRNA ligase (EC 6.1.1.12) // 0.024447507226913547 # SSO:000009310__sortase // 0.02405633003490206 # SSO:000024007__Replication protein // 0.023926398131717756 # SSO:000012328__Colanic acid biosynthesis acetyltransferase WcaF (EC 2.3.1.-) // 0.023699492493644673 # SSO:000024899__Small acid-soluble spore protein O // 0.022768922484330434 # SSO:000020510__Nicotinamide mononucleotide adenylyltransferase // 0.020718012416734953 # SSO:000016742__Fic family protein // 0.01947402984517056 # SSO:000008399__Tryptophanase (EC 4.1.99.1) // 0.019147127346594118 # SSO:000020822__Organic hydroperoxide resistance protein // 0.018848620817301917 # SSO:000043497__glutathione synthase (EC 6.3.2.3) // 0.01870340851839062 # SSO:000017510__HAD family hydrolase // 0.016696815488385124 # SSO:000036646__Lipopolysaccharide assembly protein LapB // 0.014542513521832145 # SSO:000020342__NADH:flavin oxidoreductase/NADH oxidase // 0.013684871923819165
Random Forest,0.757576,0.763596,0.757576,0.759882,0.740996,TP=69 TN=31 FP=18FN=14,SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.0048529427167822384 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.0047474903640101545 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.004720558584127132 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.004245699247710287 # SSO:000036507__L-glutamate gamma-semialdehyde dehydrogenase (EC 1.2.1.88) // 0.003692468624430254 # SSO:000018792__L-threonine 3-dehydrogenase // 0.0030790165219695674 # SSO:000033511__thiolase family protein // 0.0029659358688507074 # SSO:000002230__DegV family protein // 0.00294326981648462 # SSO:000019936__MmgE/PrpD family protein // 0.0029008738397200765 # SSO:000017864__Homogentisate 12-dioxygenase (EC 1.13.11.5) // 0.0028743622447072767 # SSO:000003439__Histidine ammonia-lyase (EC 4.3.1.3) // 0.002817223054961216 # SSO:000006362__Proline dehydrogenase (EC 1.5.5.2) // 0.0027291013788838907 # SSO:000000464__8-amino-7-oxononanoate synthase (EC 2.3.1.47) // 0.002668323999455575 # SSO:000000380__4-hydroxyphenylpyruvate dioxygenase (EC 1.13.11.27) // 0.0026670940912750774 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.002626375576281416 # SSO:000006040__Phosphoribosylanthranilate isomerase (EC 5.3.1.24) // 0.00236680395143856 # SSO:000024791__Site-specific DNA-methyltransferase // 0.0023536178046965357 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.002326447670591911 # SSO:000043561__dethiobiotin synthase (EC 6.3.3.3) // 0.002242542786768012 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.0021396106374912873
CatBoost,0.727273,0.730368,0.727273,0.728652,0.701916,TP=68 TN=28 FP=19FN=17,SSO:000033511__thiolase family protein // 0.49303869818307017 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.3747324426374826 # SSO:000018202__IS21 family transposase // 0.3467213011731026 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.28588602886851605 # SSO:000024791__Site-specific DNA-methyltransferase // 0.2704979899524193 # SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.23203062604078964 # SSO:000019936__MmgE/PrpD family protein // 0.2075482447610778 # SSO:000010198__Acid phosphatase (EC 3.1.3.2) // 0.20753299568485625 # SSO:000028963__Voltage-gated chloride channel family protein // 0.20306155380972873 # SSO:000025571__Thermonuclease family protein // 0.17001704129745077 # SSO:000017006__GPW/gp25 family protein // 0.16806187147203028 # SSO:000020822__Organic hydroperoxide resistance protein // 0.16654288680740642 # SSO:000029515__carboxylesterase family protein // 0.16368773312656368 # SSO:000024584__Serine/threonine protein kinase // 0.1310981396858313 # SSO:000006362__Proline dehydrogenase (EC 1.5.5.2) // 0.12796923172318891 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.12614372127263962 # SSO:000021409__Peptidoglycan-binding protein // 0.1237264492098835 # SSO:000001076__Arylamine N-acetyltransferase (EC 2.3.1.5) // 0.1082557342594734 # SSO:000024558__Serine protease // 0.10733990653538819 # SSO:000001042__Arginase (EC 3.5.3.1) // 0.10421641945870647


Numer of genomes:460
Number of genomic features :3904
Shape of y:(460,)
Count of 1: 126
Count of 0: 334
Running Decision Tree for phenotype glucose--fermentation




Running Random Forest for phenotype glucose--fermentation




Running CatBoost for phenotype glucose--fermentation




######### Combined report for glucose--fermentation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.728261,0.741168,0.728261,0.733958,0.65,TP=56 TN=11 FP=14FN=11,SSO:000043905__4a-hydroxytetrahydrobiopterin dehydratase (EC 4.2.1.96) // 0.13433480544626877 # SSO:000017669__Helicase // 0.08761688915229761 # SSO:000029482__c-type cytochrome // 0.08229547920156625 # SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.06864645905638687 # SSO:000029307__aldolase // 0.04662220631349394 # SSO:000017481__GtrA family protein // 0.04481054277503661 # SSO:000013541__Exopolysaccharide biosynthesis protein // 0.04185783015172489 # SSO:000023742__Pyridoxal phosphate-dependent aminotransferase // 0.037075960022701346 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.032994932665164615 # SSO:000011951__Carboxylate-amine ligase // 0.031009114672145347 # SSO:000043641__N4-(beta-N-acetylglucosaminyl)-L-asparaginase (EC 3.5.1.26) // 0.03030777974485678 # SSO:000024197__Rieske (2Fe-2S) protein // 0.03013308052414819 # SSO:000002619__Exodeoxyribonuclease III (EC 3.1.11.2) // 0.02084565485509565 # SSO:000002401__Ectoine hydroxylase (EC 1.17.-.-) // 0.019198456153708902 # SSO:000000701__Acetyl-CoA C-acyltransferase (EC 2.3.1.16) // 0.011790040313615049 # SSO:000012783__DNA methylase // 0.01154513870037242 # SSO:000019395__Mechanosensitive ion channel family protein // 0.011468560784103422 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.01096215207562467 # SSO:000004419__Leucine dehydrogenase (EC 1.4.1.9) // 0.010552363465038538 # SSO:000016591__FMN-binding protein // 0.009824976999507051
Random Forest,0.815217,0.807671,0.815217,0.810395,0.722727,TP=63 TN=12 FP=7FN=10,SSO:000012628__Cytochrome c oxidase subunit I // 0.007006951973240018 # SSO:000043905__4a-hydroxytetrahydrobiopterin dehydratase (EC 4.2.1.96) // 0.006925810667775965 # SSO:000000701__Acetyl-CoA C-acyltransferase (EC 2.3.1.16) // 0.006845451610167713 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.005831674566746017 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.005542874880602437 # SSO:000010682__Anti-sigma factor // 0.005355054612456637 # SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.004864483045563281 # SSO:000000670__Acetate kinase (EC 2.7.2.1) // 0.004753763123367398 # SSO:000029482__c-type cytochrome // 0.0046441710257468195 # SSO:000004091__Kynureninase (EC 3.7.1.3) // 0.004516578674378282 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.00441586899681958 # SSO:000029372__anaerobic ribonucleoside-triphosphate reductase activating protein // 0.004412218721346508 # SSO:000009028__Protein-PII uridylyltransferase (EC 2.7.7.59) // 0.0038920894980209605 # SSO:000000380__4-hydroxyphenylpyruvate dioxygenase (EC 1.13.11.27) // 0.0037981495729496866 # SSO:000004152__L-histidine N(alpha)-methyltransferase (EC 2.1.1.44) // 0.0033985553989387125 # SSO:000005968__Phosphogluconate dehydratase (EC 4.2.1.12) // 0.0031314841225788693 # SSO:000017864__Homogentisate 12-dioxygenase (EC 1.13.11.5) // 0.0030679755206591336 # SSO:000012589__Cytochrome P-450 // 0.0030635418109571807 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.002843827808241371 # SSO:000002299__Dihydroneopterin aldolase (EC 4.1.2.25) // 0.0027834046812217936
CatBoost,0.836957,0.830679,0.836957,0.832702,0.752597,TP=64 TN=13 FP=6FN=9,SSO:000043905__4a-hydroxytetrahydrobiopterin dehydratase (EC 4.2.1.96) // 0.3799543105587642 # SSO:000000701__Acetyl-CoA C-acyltransferase (EC 2.3.1.16) // 0.3686515207242705 # SSO:000026362__Type II secretion system protein // 0.32019821644443847 # SSO:000009961__ADP-ribosylglycohydrolase family protein // 0.28227950380909045 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.2591929247486283 # SSO:000011529__CDP-alcohol phosphatidyltransferase family protein // 0.25185246755118945 # SSO:000010682__Anti-sigma factor // 0.2404193817361676 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.22759146265085067 # SSO:000017669__Helicase // 0.20637596256731147 # SSO:000025571__Thermonuclease family protein // 0.20609124573181503 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.20088437342140222 # SSO:000034830__D-ribose pyranase (EC 5.4.99.62) // 0.18521026972060786 # SSO:000009590__3'-5' exonuclease // 0.1826264033522657 # SSO:000033963__Peptide-methionine (R)-S-oxide reductase MsrB (EC 1.8.4.12) // 0.18070899459495227 # SSO:000012589__Cytochrome P-450 // 0.16532713164785914 # SSO:000001193__Beta-phosphoglucomutase (EC 5.4.2.6) // 0.16528065227488004 # SSO:000006048__Phosphoribulokinase (EC 2.7.1.19) // 0.15368889276529693 # SSO:000018447__Inorganic phosphate transporter // 0.14595094501235667 # SSO:000017470__GreA/GreB family elongation factor // 0.13715283266218367 # SSO:000007612__Sodium/glutamate symporter // 0.1362869945228766


Numer of genomes:915
Number of genomic features :4301
Shape of y:(915,)
Count of 1: 330
Count of 0: 585
Running Decision Tree for phenotype starch--hydrolysis




Running Random Forest for phenotype starch--hydrolysis




Running CatBoost for phenotype starch--hydrolysis




######### Combined report for starch--hydrolysis#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.617486,0.656359,0.617486,0.628334,0.613008,TP=77 TN=36 FP=46FN=24,SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.1346413288411643 # SSO:000017381__Glycosyl hydrolase // 0.09142390141767553 # SSO:000037230__Nitronate monooxygenase (EC 1.13.12.16) // 0.04353204156311656 # SSO:000018777__L-rhamnose isomerase // 0.03648429688161093 # SSO:000009334__tRNA (cytidine(34)-2'-O)-methyltransferase (EC 2.1.1.207) // 0.03017332541185831 # SSO:000012992__DUF350 domain-containing protein // 0.030154487535540262 # SSO:000013427__Enterochelin esterase // 0.027239897780429147 # SSO:000005298__Nicotinamidase (EC 3.5.1.19) // 0.025839350405778953 # SSO:000000948__Amino acid permease // 0.023316935988921095 # SSO:000043066__hypoxanthine phosphoribosyltransferase (EC 2.4.2.8) // 0.02075597374708793 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.020736882326542447 # SSO:000021298__Penicillin-binding protein // 0.019884899603386432 # SSO:000025253__Sugar transferase // 0.018622947852480864 # SSO:000021696__Phosphonate ABC transporter ATP-binding protein // 0.018467494469374648 # SSO:000030003__glucose-6-phosphate dehydrogenase // 0.016717687934047272 # SSO:000005113__N-acetylmannosamine-6-phosphate 2-epimerase (EC 5.1.3.9) // 0.016640941856357755 # SSO:000025628__Thioredoxin family protein // 0.015782671229413157 # SSO:000043239__Cytochrome c oxidase subunit 2a (EC 1.9.3.1) // 0.014405009080508123 # SSO:000012473__Cytochrome c // 0.013921479610587012 # SSO:000007074__Ribonuclease E inhibitor RraB // 0.013034213317004049
Random Forest,0.655738,0.683839,0.655738,0.664319,0.645732,TP=83 TN=37 FP=40FN=23,SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.006666598610656029 # SSO:000029508__carbohydrate-binding protein // 0.005990098754986703 # SSO:000005955__Phosphoenolpyruvate carboxylase (EC 4.1.1.31) // 0.004994159962477871 # SSO:000003006__Galactokinase (EC 2.7.1.6) // 0.004747132891327944 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.0040956300714935275 # SSO:000017381__Glycosyl hydrolase // 0.003837952506428972 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.003643796610204673 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.0034504449443399235 # SSO:000000670__Acetate kinase (EC 2.7.2.1) // 0.0033376753379072673 # SSO:000024550__Serine O-acetyltransferase // 0.0029836768173582087 # SSO:000024558__Serine protease // 0.0028576989993993533 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.00272609638348892 # SSO:000013144__DinB family protein // 0.0024454933821839073 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.0023499757795366885 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.002340626611521027 # SSO:000003212__Glutathione S-transferase family protein // 0.002290864029882133 # SSO:000029108__YaiI/YqxD family protein // 0.002185248141409638 # SSO:000004673__Malate synthase G (EC 2.3.3.9) // 0.0020925962611913936 # SSO:000020231__N-formylglutamate amidohydrolase // 0.002082134504940177 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.002054107158913391
CatBoost,0.612022,0.634165,0.612022,0.62016,0.587602,TP=81 TN=31 FP=42FN=29,SSO:000005955__Phosphoenolpyruvate carboxylase (EC 4.1.1.31) // 0.4905843930018286 # SSO:000008877__Uridine kinase (EC 2.7.1.48) // 0.28165889053301413 # SSO:000024558__Serine protease // 0.25554272930850763 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.21020450545643113 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.20799144468155428 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.17285372804404228 # SSO:000029508__carbohydrate-binding protein // 0.1649911033795131 # SSO:000017381__Glycosyl hydrolase // 0.15759899449254405 # SSO:000013094__Dienelactone hydrolase // 0.13544080399698982 # SSO:000031462__plasmid stabilization protein // 0.12593769776075556 # SSO:000020231__N-formylglutamate amidohydrolase // 0.12053133686459162 # SSO:000009090__dTDP-4-dehydrorhamnose reductase (EC 1.1.1.133) // 0.11967394547877563 # SSO:000024584__Serine/threonine protein kinase // 0.1191637738732701 # SSO:000025068__Sporulation protein // 0.10339379887396781 # SSO:000039438__TetR family transcriptional regulator // 0.10278112366336983 # SSO:000006915__RNA polymerase sigma-70 factor (EC 2.7.7.6 ) // 0.09594466860095677 # SSO:000021897__Polysaccharide deacetylase // 0.0948200988642731 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.09338210108924883 # SSO:000020300__NAD(P)H-quinone oxidoreductase subunit 3 (EC 1.6.5.2) // 0.09320816159689327 # SSO:000024328__SET domain-containing protein // 0.09043383881198418


Numer of genomes:152
Number of genomic features :3224
Shape of y:(152,)
Count of 1: 76
Count of 0: 76
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:191
Number of genomic features :3311
Shape of y:(191,)
Count of 1: 74
Count of 0: 117
Running Decision Tree for phenotype propionate--assimilation




Running Random Forest for phenotype propionate--assimilation




Running CatBoost for phenotype propionate--assimilation




######### Combined report for propionate--assimilation#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.641026,0.714286,0.641026,0.650624,0.673077,TP=15 TN=10 FP=11FN=3,SSO:000007776__Succinate dehydrogenase cytochrome b-556 subunit // 0.22179186922790925 # SSO:000006305__Prephenate dehydrogenase (EC 1.3.1.12) // 0.10082688531549845 # SSO:000031196__nucleotidyltransferase family protein // 0.09507756147327165 # SSO:000013044__Dehydrogenase // 0.05045955389832164 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.04405837388374152 # SSO:000031342__peptidase M42 // 0.04372677780454536 # SSO:000012708__D-sedoheptulose 7-phosphate isomerase (EC 5.3.1.28) // 0.043123402756286205 # SSO:000034061__3-hydroxybenzoate 6-monooxygenase (EC 1.14.13.24) // 0.03245584557875516 # SSO:000003497__Hydantoin racemase (EC 5.1.99.5) // 0.026152762357317096 # SSO:000008864__Urease accessory protein UreE // 0.022511935685955296 # SSO:000018641__Isochorismatase family protein // 0.01607452097648176 # SSO:000021342__Peptidase M48 // 0.014819339373811174 # SSO:000043419__Cobalamin biosynthesis protein CbiG // 0.013675213820689721 # SSO:000025605__Thiol-disulfide isomerase // 0.004304927099044746 # SSO:000012882__DNA topoisomerase IV subunit B // 0.0 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0 # SSO:000004933__Molybdenum ABC transporter ATP-binding protein ModC // 0.0 # SSO:000021269__Patatin family protein // 0.0 # SSO:000018213__IS630 family transposase // 0.0 # SSO:000021611__Phosphate starvation-inducible protein PsiF // 0.0
Random Forest,0.717949,0.756614,0.717949,0.725692,0.730769,TP=18 TN=10 FP=8FN=3,SSO:000007776__Succinate dehydrogenase cytochrome b-556 subunit // 0.010456762908958597 # SSO:000001478__Cell division protein ZapE // 0.008141407834926883 # SSO:000003160__Glutamate--cysteine ligase (EC 6.3.2.2) // 0.007833380395150686 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.00738972319913372 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.006490904825459081 # SSO:000003209__Glutathione S-transferase (EC 2.5.1.18) // 0.005778785075312017 # SSO:000000670__Acetate kinase (EC 2.7.2.1) // 0.005043847448794339 # SSO:000006305__Prephenate dehydrogenase (EC 1.3.1.12) // 0.005039335618150567 # SSO:000010610__Aminopeptidase N // 0.004671057457866073 # SSO:000000464__8-amino-7-oxononanoate synthase (EC 2.3.1.47) // 0.004581867376909285 # SSO:000021359__Peptidase T (EC 3.4.11.-) // 0.004333371525664485 # SSO:000006033__Phosphoribosyl-AMP cyclohydrolase (EC 3.5.4.19) // 0.0042890097915744795 # SSO:000007779__Succinate dehydrogenase flavoprotein subunit (EC 1.3.99.1) // 0.0038373419794033928 # SSO:000009028__Protein-PII uridylyltransferase (EC 2.7.7.59) // 0.003789813163067818 # SSO:000002447__Electron transfer flavoprotein-ubiquinone oxidoreductase (EC 1.5.5.1) // 0.0034900804265591825 # SSO:000043061__asparagine---tRNA ligase (EC 6.1.1.22) // 0.003420070633977794 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.00339105811557336 # SSO:000010266__Acyl-CoA synthetase // 0.003388032118170776 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.003304240350641885 # SSO:000001157__Bacterioferritin (EC 1.16.3.1) // 0.0032941394046459957
CatBoost,0.717949,0.756614,0.717949,0.725692,0.730769,TP=18 TN=10 FP=8FN=3,SSO:000007776__Succinate dehydrogenase cytochrome b-556 subunit // 0.42118718698641044 # SSO:000006305__Prephenate dehydrogenase (EC 1.3.1.12) // 0.3954098754704544 # SSO:000000670__Acetate kinase (EC 2.7.2.1) // 0.37668342069148447 # SSO:000003209__Glutathione S-transferase (EC 2.5.1.18) // 0.36479922494763 # SSO:000031342__peptidase M42 // 0.32258788450076714 # SSO:000044308__adenosylmethionine---8-amino-7-oxononanoate transaminase (EC 2.6.1.62) // 0.2973081380586809 # SSO:000003160__Glutamate--cysteine ligase (EC 6.3.2.2) // 0.2943007480078069 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.292163134815713 # SSO:000000298__3-hydroxyisobutyrate dehydrogenase (EC 1.1.1.31) // 0.24137553853416752 # SSO:000001478__Cell division protein ZapE // 0.2065183645433451 # SSO:000034061__3-hydroxybenzoate 6-monooxygenase (EC 1.14.13.24) // 0.2038980109560627 # SSO:000002447__Electron transfer flavoprotein-ubiquinone oxidoreductase (EC 1.5.5.1) // 0.1795144263411733 # SSO:000010266__Acyl-CoA synthetase // 0.17310340528388282 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.15331758028965956 # SSO:000017666__Heavy metal transport/detoxification protein // 0.1419879304986772 # SSO:000043378__Gamma-glutamyl cyclotransferase (EC 2.3.2.4) // 0.1396792370550213 # SSO:000021359__Peptidase T (EC 3.4.11.-) // 0.13075710994586134 # SSO:000031196__nucleotidyltransferase family protein // 0.13061922482274896 # SSO:000001067__Arsenate reductase (EC 1.20.4.1) // 0.1284465903081066 # SSO:000020771__Oligopeptide transporter OPT family // 0.12636653821881408


Numer of genomes:842
Number of genomic features :4125
Shape of y:(842,)
Count of 1: 188
Count of 0: 654
Running Decision Tree for phenotype glycerol--builds_acid_from




Running Random Forest for phenotype glycerol--builds_acid_from




Running CatBoost for phenotype glycerol--builds_acid_from




######### Combined report for glycerol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.792899,0.786317,0.792899,0.788064,0.731176,TP=105 TN=29 FP=14FN=21,SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.16740435502540765 # SSO:000006304__Prephenate dehydratase (EC 4.2.1.51) // 0.07816708525957881 # SSO:000007416__Septum formation protein Maf // 0.07098970955177514 # SSO:000029128__YeeE/YedE family protein // 0.04197939163996505 # SSO:000012207__Choloylglycine hydrolase family protein // 0.03753853772461104 # SSO:000008817__Undecaprenyl-diphosphatase (EC 3.6.1.27) // 0.03248137462903843 # SSO:000018792__L-threonine 3-dehydrogenase // 0.024981817976874335 # SSO:000001303__CDP-diacylglycerol--serine O-phosphatidyltransferase (EC 2.7.8.8) // 0.024394180925736957 # SSO:000009959__ADP-ribosylglycohydrolase // 0.017808091440078076 # SSO:000001052__Argininosuccinate synthase (EC 6.3.4.5) // 0.01774707006479694 # SSO:000029430__bacteriocin // 0.017433302400483127 # SSO:000004595__Lysine 23-aminomutase (EC 5.4.3.2) // 0.015492487012474033 # SSO:000012915__Transcriptional regulator // 0.015492403637822599 # SSO:000004710__Mannose-1-phosphate guanylyltransferase (EC 2.7.7.13) // 0.015347679892790273 # SSO:000003426__Hexokinase (EC 2.7.1.1) // 0.014746880117679552 # SSO:000002683__Fatty acid desaturase (EC 1.14.19.1) // 0.012638107504550583 # SSO:000034727__Autonomous glycyl radical cofactor GrcA // 0.012603038020873494 # SSO:000024142__Ribonuclease // 0.012593179089220616 # SSO:000006038__Phosphoribosylaminoimidazole-succinocarboxamide synthase (EC 6.3.2.6) // 0.01180818786336571 # SSO:000001067__Arsenate reductase (EC 1.20.4.1) // 0.011514029671709808
Random Forest,0.846154,0.846154,0.846154,0.846154,0.815378,TP=106 TN=37 FP=13FN=13,SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.007805326080564223 # SSO:000006525__Protein translocase subunit SecF // 0.00776858143057873 # SSO:000007094__Ribonuclease PH (EC 2.7.7.56) // 0.006862667186297533 # SSO:000010953__BCCT family transporter // 0.0067081617615654026 # SSO:000007779__Succinate dehydrogenase flavoprotein subunit (EC 1.3.99.1) // 0.006344008670030935 # SSO:000024550__Serine O-acetyltransferase // 0.0057302467228024815 # SSO:000019235__MOSC domain containing protein // 0.004819718913597385 # SSO:000006304__Prephenate dehydratase (EC 4.2.1.51) // 0.004641994851798924 # SSO:000043047__thiamine-phosphate kinase (EC 2.7.4.16) // 0.004328923284500068 # SSO:000006523__Protein translocase subunit SecD // 0.0042470116013289295 # SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.004205375534897663 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.004102986943296001 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.00400924276306381 # SSO:000010813__Leucyl aminopeptidase (EC 3.4.11.10) // 0.003925463575325617 # SSO:000004504__Lipoyl synthase (EC 2.8.1.8) // 0.0038258853465982833 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.003613214282305163 # SSO:000004670__Malate dehydrogenase (EC 1.1.1.37) // 0.003522001917859366 # SSO:000005425__Nucleoside diphosphate kinase (EC 2.7.4.6) // 0.003490010798649406 # SSO:000010610__Aminopeptidase N // 0.0034107045348545357 # SSO:000006161__Polyribonucleotide nucleotidyltransferase (EC 2.7.7.8) // 0.0033215315251469875
CatBoost,0.840237,0.836193,0.840237,0.836506,0.787983,TP=109 TN=33 FP=10FN=17,SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.4658931528383868 # SSO:000010953__BCCT family transporter // 0.45486303295802966 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.25118286373088045 # SSO:000000886__Alkaline phosphatase (EC 3.1.3.1) // 0.2168664257218155 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.1952562557670939 # SSO:000007094__Ribonuclease PH (EC 2.7.7.56) // 0.19321308834746972 # SSO:000002985__GTP cyclohydrolase II (EC 3.5.4.25) // 0.14370129785344457 # SSO:000009299__rhodanese-related sulfurtransferase (EC 3.1.2.6 ) // 0.12866793422953265 # SSO:000012210__Chorismate mutase // 0.12843064640440235 # SSO:000018905__Leucine-rich repeat protein // 0.12594495850835652 # SSO:000004880__Methylglyoxal synthase (EC 4.2.3.3) // 0.1108381105455052 # SSO:000029128__YeeE/YedE family protein // 0.1092632241819177 # SSO:000004670__Malate dehydrogenase (EC 1.1.1.37) // 0.1050354688265017 # SSO:000018889__LemA family protein // 0.10460524721548561 # SSO:000010266__Acyl-CoA synthetase // 0.10449420590244962 # SSO:000010813__Leucyl aminopeptidase (EC 3.4.11.10) // 0.103879189215332 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.10346103870270947 # SSO:000020511__Nicotinamide mononucleotide transporter // 0.10114256878651996 # SSO:000013192__DnaD domain protein // 0.10066581666619448 # SSO:000009959__ADP-ribosylglycohydrolase // 0.09324621257806132


Numer of genomes:953
Number of genomic features :4244
Shape of y:(953,)
Count of 1: 378
Count of 0: 575
Running Decision Tree for phenotype D-xylose--builds_acid_from




Running Random Forest for phenotype D-xylose--builds_acid_from




Running CatBoost for phenotype D-xylose--builds_acid_from




######### Combined report for D-xylose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.769634,0.784663,0.769634,0.773032,0.773022,TP=92 TN=55 FP=29FN=15,SSO:000029073__Xylose isomerase // 0.3442955269024262 # SSO:000000670__Acetate kinase (EC 2.7.2.1) // 0.06696440315524989 # SSO:000009310__sortase // 0.03410439541983924 # SSO:000033834__xylulokinase // 0.02946552138646909 # SSO:000025241__Sugar kinase // 0.021905289765601847 # SSO:000000870__Aldose 1-epimerase (EC 5.1.3.3) // 0.017097457155967836 # SSO:000020510__Nicotinamide mononucleotide adenylyltransferase // 0.0135317289781363 # SSO:000017244__Glutamate 23-aminomutase (EC 5.4.3.9) // 0.013325833962860838 # SSO:000007633__Spheroidene monooxygenase (EC 1.14.15.9) // 0.012915208407589664 # SSO:000026174__Trehalose utilization protein ThuA // 0.011134756494807142 # SSO:000028997__WGR domain-containing protein // 0.010871179534474968 # SSO:000020611__Non-ribosomal peptide synthetase // 0.010266408258958672 # SSO:000011528__CDP-alcohol phosphatidyltransferase // 0.009120864248376754 # SSO:000012074__Cell wall-binding protein // 0.008236626143080704 # SSO:000019294__Maleate cis-trans isomerase // 0.008012172756003145 # SSO:000017027__GTPase // 0.007412189311772468 # SSO:000010855__Asparaginase // 0.007030819233703779 # SSO:000013147__Dinitrogenase iron-molybdenum cofactor biosynthesis protein // 0.007010359418797352 # SSO:000023748__Pyridoxal-dependent decarboxylase // 0.006840833432913417 # SSO:000044126__methanol dehydrogenase (EC 1.1.1.244) // 0.0063059606905672486
Random Forest,0.816754,0.83913,0.816754,0.819896,0.831287,TP=94 TN=62 FP=27FN=8,SSO:000029073__Xylose isomerase // 0.030591368613829013 # SSO:000033834__xylulokinase // 0.022207649686358986 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.014593004281605534 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.009922859710939314 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.009438040954329239 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.008570142394479834 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.008504806367413062 # SSO:000010479__Alpha-N-arabinofuranosidase( EC:3.2.1.55 ) // 0.0070287664942633695 # SSO:000025241__Sugar kinase // 0.0053922683088730595 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.004795350482497642 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.004459566468551564 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.004251736666517101 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.004174927070195775 # SSO:000039420__Tagaturonate reductase (EC 1.1.1.58) // 0.0036983326281945124 # SSO:000000936__Altronate dehydratase (EC 4.2.1.7) // 0.003539644840846396 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.0034381694559467916 # SSO:000003118__Glucose 1-dehydrogenase (EC 1.1.1.47) // 0.0034091037008358006 # SSO:000033632__transglutaminase family protein // 0.0032918991779365904 # SSO:000000453__6-phosphofructokinase (EC 2.7.1.11) // 0.003144327517504734 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.0031295248857197795
CatBoost,0.82199,0.842514,0.82199,0.824977,0.835419,TP=95 TN=62 FP=26FN=8,SSO:000029073__Xylose isomerase // 3.3622294851409937 # SSO:000033834__xylulokinase // 0.4609319329734219 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.26690987519149856 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.2063198518869754 # SSO:000029597__class II aldolase/adducin family protein // 0.12924890552762203 # SSO:000030003__glucose-6-phosphate dehydrogenase // 0.12285349947570798 # SSO:000009310__sortase // 0.11885500416593459 # SSO:000019052__Low molecular weight phosphotyrosine protein phosphatase // 0.1032179233449624 # SSO:000039420__Tagaturonate reductase (EC 1.1.1.58) // 0.0921225696649255 # SSO:000013035__DedA family protein // 0.0912123745278888 # SSO:000005185__NAD-dependent dihydropyrimidine dehydrogenase subunit PreA (EC 1.3.1.1) // 0.08916612548016585 # SSO:000033632__transglutaminase family protein // 0.07565159298756124 # SSO:000025241__Sugar kinase // 0.0655219210588353 # SSO:000018191__IS110 family transposase // 0.0653503270204461 # SSO:000018641__Isochorismatase family protein // 0.06009734175083912 # SSO:000033163__sel1-like repeat protein // 0.0563540199693457 # SSO:000007097__Ribonuclease Z (EC 3.1.26.11) // 0.05624244389307702 # SSO:000033511__thiolase family protein // 0.0550483830131451 # SSO:000042678__glucuronate isomerase (EC 5.3.1.12) // 0.054651906032381084 # SSO:000005916__Phosphate acetyltransferase (EC 2.3.1.8) // 0.05448818939126564


Numer of genomes:822
Number of genomic features :4066
Shape of y:(822,)
Count of 1: 362
Count of 0: 460
The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.


Numer of genomes:144
Number of genomic features :3269
Shape of y:(144,)
Count of 1: 137
Count of 0: 7
Running Decision Tree for phenotype arginine--carbon_source




Running Random Forest for phenotype arginine--carbon_source




Running CatBoost for phenotype arginine--carbon_source




######### Combined report for arginine--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.931034,0.866825,0.931034,0.897783,0.5,TP=0 TN=27 FP=2FN=0,SSO:000004100__LL-diaminopimelate aminotransferase (EC 2.6.1.83) // 0.2503133297053052 # SSO:000003439__Histidine ammonia-lyase (EC 4.3.1.3) // 0.21480153453264816 # SSO:000006816__Pyridoxine 5'-phosphate synthase (EC 2.6.99.2) // 0.11756404144347542 # SSO:000020125__Murein DD-endopeptidase MepM // 0.0 # SSO:000002915__Formylmethanofuran dehydrogenase subunit B (EC 1.2.99.5) // 0.0 # SSO:000024245__Rubredoxin-type Fe(Cys)4 protein // 0.0 # SSO:000009632__3-methyladenine DNA glycosylase // 0.0 # SSO:000030910__kinase-like protein // 0.0 # SSO:000001489__Cell filamentation protein fic // 0.0 # SSO:000009606__3-carboxyethylcatechol 23-dioxygenase (EC 1.13.11.16) // 0.0 # SSO:000036684__Long-chain-fatty-acyl-CoA reductase (EC 1.2.1.50) // 0.0 # SSO:000002791__Flagellar assembly protein FliH // 0.0 # SSO:000029327__alpha-L-rhamnosidase // 0.0 # SSO:000029062__Xylan 14-beta-xylosidase (EC 3.2.1.37) // 0.0 # SSO:000021345__Peptidase M61 // 0.0 # SSO:000013192__DnaD domain protein // 0.0 # SSO:000010833__Arylesterase // 0.0 # SSO:000012875__DNA sulfur modification protein DndB // 0.0 # SSO:000029894__ethanolamine utilization protein EutH // 0.0 # SSO:000004608__Lysyl endopeptidase (EC 3.4.21.50) // 0.0
Random Forest,0.931034,0.866825,0.931034,0.897783,0.5,TP=0 TN=27 FP=2FN=0,SSO:000004100__LL-diaminopimelate aminotransferase (EC 2.6.1.83) // 0.018066290323950476 # SSO:000003439__Histidine ammonia-lyase (EC 4.3.1.3) // 0.012971017449762133 # SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.012151621566845742 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.011286793252432469 # SSO:000017850__HlyD family secretion protein // 0.0112476744736006 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.010933199575051051 # SSO:000007111__Ribose 5-phosphate isomerase B (EC 5.3.1.6) // 0.009377698025891489 # SSO:000004938__Molybdenum cofactor guanylyltransferase (EC 2.7.7.77) // 0.00835011676723415 # SSO:000002027__D-glycero-beta-D-manno-heptose 7-phosphate kinase // 0.007903540055293576 # SSO:000004091__Kynureninase (EC 3.7.1.3) // 0.007635389338262921 # SSO:000010953__BCCT family transporter // 0.0073776531564003645 # SSO:000021529__Phage portal protein // 0.0067196866719957445 # SSO:000030016__glutaredoxin family protein // 0.006375446839275551 # SSO:000008066__Thymidine phosphorylase (EC 2.4.2.4) // 0.006374159489657165 # SSO:000021874__Polyphosphate kinase 2 (EC 2.7.4.1) // 0.006213442175016545 # SSO:000042222__Arginyltransferase( EC:2.3.2.8 ) // 0.006196435367556238 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.005974366866365283 # SSO:000019998__Molybdenum ABC transporter ATP-binding protein // 0.005754613110021036 # SSO:000005105__N-acetylglucosamine-6-phosphate deacetylase (EC 3.5.1.25) // 0.005526065696629238 # SSO:000022976__PspA/IM30 family protein // 0.00535300400874009
CatBoost,0.931034,0.866825,0.931034,0.897783,0.5,TP=0 TN=27 FP=2FN=0,SSO:000004100__LL-diaminopimelate aminotransferase (EC 2.6.1.83) // 1.2605561628040516 # SSO:000008881__Urocanate hydratase (EC 4.2.1.49) // 0.8730309771571986 # SSO:000003696__Imidazolonepropionase (EC 3.5.2.7) // 0.7720261930257543 # SSO:000007111__Ribose 5-phosphate isomerase B (EC 5.3.1.6) // 0.5586573583015422 # SSO:000002027__D-glycero-beta-D-manno-heptose 7-phosphate kinase // 0.4412138029257228 # SSO:000004938__Molybdenum cofactor guanylyltransferase (EC 2.7.7.77) // 0.42906954947687953 # SSO:000017850__HlyD family secretion protein // 0.41931488594330385 # SSO:000006816__Pyridoxine 5'-phosphate synthase (EC 2.6.99.2) // 0.3599489297312036 # SSO:000010953__BCCT family transporter // 0.2780247117006133 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.23639282868808634 # SSO:000018210__IS5 family transposase // 0.1850441951738448 # SSO:000003439__Histidine ammonia-lyase (EC 4.3.1.3) // 0.16055134836857377 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.15747050342467625 # SSO:000023973__TonB family protein // 0.14940577513791728 # SSO:000004091__Kynureninase (EC 3.7.1.3) // 0.14858027689256914 # SSO:000042222__Arginyltransferase( EC:2.3.2.8 ) // 0.1464448511176619 # SSO:000021760__Phytanoyl-CoA dioxygenase // 0.1310594036489768 # SSO:000011545__CHAP domain containing protein // 0.11582994436030762 # SSO:000020584__Nitroreductase family protein // 0.1088268562110787 # SSO:000010058__ATP-dependent helicase HrpB // 0.09539146254750464


Numer of genomes:1606
Number of genomic features :4937
Shape of y:(1606,)
Count of 1: 1582
Count of 0: 24
Running Decision Tree for phenotype glucose--carbon_source




Running Random Forest for phenotype glucose--carbon_source




Running CatBoost for phenotype glucose--carbon_source




######### Combined report for glucose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.975155,0.981308,0.975155,0.978222,0.492163,TP=0 TN=314 FP=3FN=5,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.08861629424103584 # SSO:000021298__Penicillin-binding protein // 0.06710493114415102 # SSO:000005137__N-methyl-L-tryptophan oxidase (EC 1.5.3.-) // 0.05120394138608538 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.04528977291977059 # SSO:000020367__NERD domain-containing protein // 0.04305875640904669 # SSO:000020611__Non-ribosomal peptide synthetase // 0.041751790415321954 # SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.035459949638112324 # SSO:000025993__Transferase // 0.03543915968808557 # SSO:000006977__Regulatory protein RecX // 0.03272443084207444 # SSO:000017510__HAD family hydrolase // 0.032107233046712626 # SSO:000013618__FAD-binding protein // 0.02925976581475544 # SSO:000005240__NADP-specific glutamate dehydrogenase (EC 1.4.1.4) // 0.026275529967155455 # SSO:000020342__NADH:flavin oxidoreductase/NADH oxidase // 0.022734881631699552 # SSO:000000843__Agmatinase (EC 3.5.3.11) // 0.020716576539652826 # SSO:000017059__GcrA cell cycle regulator // 0.019809564707045573 # SSO:000020029__Monooxygenase // 0.019210137399011333 # SSO:000010516__Endo-14-beta-xylanase (EC 3.2.1.8) // 0.016235153317372766 # SSO:000043116__Shikimate 5-dehydrogenase (EC 1.1.1.25) // 0.011463664953807363 # SSO:000006885__RNA 3'-terminal phosphate cyclase (EC 6.5.1.4) // 0.009194050816769992 # SSO:000006212__Precorrin-6A reductase (EC 1.3.1.54) // 0.006203983215759477
Random Forest,0.990683,0.981453,0.990683,0.986047,0.5,TP=0 TN=319 FP=3FN=0,SSO:000020611__Non-ribosomal peptide synthetase // 0.006623239973605561 # SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.005541347585011152 # SSO:000027090__beta-glucosidase (EC 3.2.1.21) // 0.004334339395381151 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.004092951823600686 # SSO:000005939__Phosphatidylglycerophosphatase A (EC 3.1.3.27) // 0.0038695905788486335 # SSO:000001198__Betaine aldehyde dehydrogenase (EC 1.2.1.8) // 0.0038092123064326277 # SSO:000042917__5-dehydro-2-deoxygluconokinase (EC 2.7.1.92) // 0.003754427429785813 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.0037372964868710003 # SSO:000000142__2-dehydro-3-deoxy-6-phosphogalactonate aldolase (EC 4.1.2.21) // 0.003633937427942448 # SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.0035920187708031667 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.0035439455812216825 # SSO:000042613__N-acetylglucosamine kinase (EC 2.7.1.59) // 0.003456934124609402 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.003447632563981261 # SSO:000013265__EAL domain containing protein // 0.003417508794167908 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.003261642480344013 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.0030958012333200274 # SSO:000013106__Diguanylate cyclase // 0.0030597036464214537 # SSO:000018566__Intradiol ring-cleavage dioxygenase // 0.0029525842099893287 # SSO:000002805__Flagellar biosynthesis protein FliQ // 0.002916842799407604 # SSO:000017356__Glycogen synthase (EC 2.4.1.11) // 0.00273334781795037
CatBoost,0.990683,0.981453,0.990683,0.986047,0.5,TP=0 TN=319 FP=3FN=0,SSO:000008065__Thymidine kinase (EC 2.7.1.21) // 0.6921639449489273 # SSO:000020611__Non-ribosomal peptide synthetase // 0.5119625107200562 # SSO:000029769__cyclic nucleotide-binding domain-containing protein // 0.28437232083084124 # SSO:000024194__Rieske (2Fe-2S) domain-containing protein // 0.23399699966884882 # SSO:000005240__NADP-specific glutamate dehydrogenase (EC 1.4.1.4) // 0.2328552091548621 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.21639106345873715 # SSO:000013254__DsrE family protein // 0.20458437233518703 # SSO:000013106__Diguanylate cyclase // 0.18855584130479497 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.18269500379537296 # SSO:000019560__Methionine biosynthesis protein MetW // 0.18163884754679882 # SSO:000026218__Trypsin-like serine protease // 0.17684172638777795 # SSO:000016742__Fic family protein // 0.1708173743730039 # SSO:000001873__Cytidine deaminase (EC 3.5.4.5) // 0.1587299531017265 # SSO:000000420__5-deoxy-glucuronate isomerase (EC 5.3.1.-) // 0.1555296740632056 # SSO:000012290__CoA-binding protein // 0.1515416257231984 # SSO:000013265__EAL domain containing protein // 0.1503270535371358 # SSO:000000212__2-phosphosulfolactate phosphatase (EC 3.1.3.71 ) // 0.14012112841236873 # SSO:000001198__Betaine aldehyde dehydrogenase (EC 1.2.1.8) // 0.1362007494167984 # SSO:000003257__Glycerol-3-phosphate dehydrogenase (EC 1.1.5.3) // 0.13085198863133232 # SSO:000007068__Ribokinase (EC 2.7.1.15) // 0.12986233563354319


Numer of genomes:479
Number of genomic features :3844
Shape of y:(479,)
Count of 1: 444
Count of 0: 35
Running Decision Tree for phenotype sorbitol--carbon_source




Running Random Forest for phenotype sorbitol--carbon_source




Running CatBoost for phenotype sorbitol--carbon_source




######### Combined report for sorbitol--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.833333,0.892157,0.833333,0.861742,0.43956,TP=0 TN=80 FP=5FN=11,SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.11772845749800054 # SSO:000000151__2-dehydropantoate 2-reductase (EC 1.1.1.169) // 0.0963230951150239 # SSO:000005240__NADP-specific glutamate dehydrogenase (EC 1.4.1.4) // 0.06248767582278011 # SSO:000005955__Phosphoenolpyruvate carboxylase (EC 4.1.1.31) // 0.057678994971070624 # SSO:000005475__O-succinylhomoserine sulfhydrylase (EC 2.5.1.48) // 0.05760586574574225 # SSO:000004108__L-2-hydroxyglutarate oxidase (EC 1.1.3.15) // 0.04880372128657171 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.04130652614497994 # SSO:000006156__Polynucleotide kinase (EC 2.7.1.78) // 0.035567547194874864 # SSO:000025307__Superoxide dismutase // 0.033372997107426715 # SSO:000043047__thiamine-phosphate kinase (EC 2.7.4.16) // 0.030188753775267557 # SSO:000004712__Mannose-6-phosphate isomerase (EC 5.3.1.8) // 0.023370790425690537 # SSO:000017291__Glutaminyl-peptide cyclotransferase (EC 2.3.2.5) // 0.020692585580291 # SSO:000008902__V-type ATP synthase subunit K (EC 3.6.3.14) // 0.016150198926496682 # SSO:000021284__Pectate lyase (EC 4.2.2.2) // 0.016120785374191177 # SSO:000008901__V-type ATP synthase subunit I (EC 3.6.3.14) // 0.015602887453655519 # SSO:000007424__Serine hydroxymethyltransferase (EC 2.1.2.1) // 0.014966504463316222 # SSO:000029454__beta-ketoacyl synthase // 0.014632586727007983 # SSO:000021037__PAS domain-containing protein // 0.013717368671895373 # SSO:000005994__Phosphomevalonate kinase (EC 2.7.4.2) // 0.010036955452497135 # SSO:000024552__Serine esterase // 0.0018185345495454676
Random Forest,0.947917,0.898546,0.947917,0.922571,0.5,TP=0 TN=91 FP=5FN=0,SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.007444438844267571 # SSO:000017291__Glutaminyl-peptide cyclotransferase (EC 2.3.2.5) // 0.006275025014522537 # SSO:000018718__LD-transpeptidase // 0.00595201491166054 # SSO:000002148__DNA-cytosine methyltransferase (EC 2.1.1.37) // 0.005369638043844178 # SSO:000005317__Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase (EC 2.4.2.21) // 0.004592248949795225 # SSO:000042404__cobyrinate ac-diamide synthase (EC 6.3.5.11) // 0.0044514119996904465 # SSO:000010242__Acyl transferase // 0.003824245068251559 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.003761561921722891 # SSO:000004449__Lipase (EC 3.1.1.3) // 0.003690171652708534 # SSO:000003322__Glyoxylate carboligase (EC 4.1.1.47) // 0.003662604602574325 # SSO:000044159__glycine C-acetyltransferase (EC 2.3.1.29) // 0.0035395078764868405 # SSO:000019394__Mechanosensitive ion channel // 0.003501791492000509 # SSO:000003324__Guanine deaminase (EC 3.5.4.3) // 0.003474499558099211 # SSO:000030003__glucose-6-phosphate dehydrogenase // 0.0033123246748395455 # SSO:000009621__3-hydroxybutyrate dehydrogenase (EC 1.1.1.30) // 0.0033086144589147284 # SSO:000034157__56-dimethylbenzimidazole synthase (EC 1.13.11.79) // 0.0032767387201274915 # SSO:000005475__O-succinylhomoserine sulfhydrylase (EC 2.5.1.48) // 0.0031657518947785287 # SSO:000018283__IclR-family transcriptional regulator // 0.003147698218403643 # SSO:000029148__YibE/F family protein // 0.0030185532915237086 # SSO:000043092__8-oxoguanine deaminase (EC 3.5.4.32) // 0.0029452759096433904
CatBoost,0.947917,0.898546,0.947917,0.922571,0.5,TP=0 TN=91 FP=5FN=0,SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.7878261302025074 # SSO:000017291__Glutaminyl-peptide cyclotransferase (EC 2.3.2.5) // 0.41403793360582025 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.3216527543492433 # SSO:000010242__Acyl transferase // 0.3164221006545176 # SSO:000002148__DNA-cytosine methyltransferase (EC 2.1.1.37) // 0.305731710301243 # SSO:000005317__Nicotinate-nucleotide--dimethylbenzimidazole phosphoribosyltransferase (EC 2.4.2.21) // 0.28871755428314116 # SSO:000018283__IclR-family transcriptional regulator // 0.2839068742170252 # SSO:000004449__Lipase (EC 3.1.1.3) // 0.20729982050893536 # SSO:000009621__3-hydroxybutyrate dehydrogenase (EC 1.1.1.30) // 0.20277532615553756 # SSO:000003322__Glyoxylate carboligase (EC 4.1.1.47) // 0.19753822547087288 # SSO:000010350__AhpC/TSA family protein // 0.17137369508705294 # SSO:000005188__NAD-dependent malic enzyme (EC 1.1.1.38) // 0.1655600452505322 # SSO:000018718__LD-transpeptidase // 0.16033755593895466 # SSO:000000151__2-dehydropantoate 2-reductase (EC 1.1.1.169) // 0.14577660547756469 # SSO:000031082__methylenetetrahydrofolate reductase // 0.14136781538389806 # SSO:000006574__Pterin-4-alpha-carbinolamine dehydratase (EC 4.2.1.96) // 0.13567825891969623 # SSO:000017443__Glyoxalase // 0.13421593128740106 # SSO:000033667__transposase family protein // 0.12739136103787418 # SSO:000001076__Arylamine N-acetyltransferase (EC 2.3.1.5) // 0.12712452814917788 # SSO:000035974__Fructose-6-phosphate aldolase // 0.12642206668375636


Numer of genomes:1408
Number of genomic features :4751
Shape of y:(1408,)
Count of 1: 1268
Count of 0: 140
Running Decision Tree for phenotype sucrose--carbon_source




Running Random Forest for phenotype sucrose--carbon_source




Running CatBoost for phenotype sucrose--carbon_source




######### Combined report for sucrose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.836879,0.879677,0.836879,0.85604,0.599476,TP=7 TN=229 FP=15FN=31,SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.12015259199150427 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.05612669764654971 # SSO:000003277__Glycine oxidase ThiO (EC 1.4.3.19) // 0.05304383984747689 # SSO:000007801__Sulfatase (EC 3.1.6.-) // 0.0415079884292471 # SSO:000008961__Xanthine phosphoribosyltransferase (EC 2.4.2.22) // 0.03467171773159213 # SSO:000003500__Hydrogen peroxide-inducible genes activator // 0.02886867077243602 # SSO:000007803__Sulfate adenylyltransferase (EC 2.7.7.4) // 0.02669864288778545 # SSO:000012222__Chromosome partitioning protein ParA // 0.022727557804552698 # SSO:000020611__Non-ribosomal peptide synthetase // 0.021623321229461968 # SSO:000001301__CDP-diacylglycerol--glycerol-3-phosphate 3-phosphatidyltransferase (EC 2.7.8.5) // 0.021194039432171224 # SSO:000005515__Ornithine cyclodeaminase (EC 4.3.1.12) // 0.020565303259178557 # SSO:000012979__DUF2017 domain-containing protein // 0.020102248836425623 # SSO:000006555__Pseudaminic acid synthase (EC 4.1.3.-) // 0.02001164911217706 # SSO:000008896__V-type ATP synthase subunit D (EC 3.6.3.14) // 0.019441223267412374 # SSO:000011964__Carboxypeptidase // 0.01798251326387112 # SSO:000012053__Cell surface protein // 0.01763484563960714 # SSO:000017381__Glycosyl hydrolase // 0.016833368476588208 # SSO:000004832__Methionine ABC transporter substrate-binding protein // 0.016769274342418837 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.016639041438408653 # SSO:000023851__RNA-binding protein // 0.016143449589655662
Random Forest,0.929078,0.913541,0.929078,0.913459,0.607867,TP=5 TN=257 FP=17FN=3,SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.007335264331140217 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.0041350590514100454 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.004045335224236149 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.003306745815817485 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.0031681275606100117 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.0031168671684136504 # SSO:000016590__FMN-binding negative transcriptional regulator // 0.00299868942230722 # SSO:000003519__Hydroxyacylglutathione hydrolase (EC 3.1.2.6) // 0.0029512330217521887 # SSO:000000280__3-deoxy-D-manno-octulosonic acid transferase (EC 2.4.99.13) (EC 2.4.99.12) // 0.0028837779977746616 # SSO:000042222__Arginyltransferase( EC:2.3.2.8 ) // 0.0027643339351739963 # SSO:000029792__cytochrome c-1 // 0.0027413530248122124 # SSO:000029181__Zinc finger domain-containing protein // 0.002733450759787334 # SSO:000004190__LPS export ABC transporter permease LptG // 0.002651513221862159 # SSO:000009621__3-hydroxybutyrate dehydrogenase (EC 1.1.1.30) // 0.0025855394953185127 # SSO:000010245__Acyl-CoA dehydrogenase // 0.0025821656090787375 # SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.0025581379691406326 # SSO:000021054__PASTA domain containing protein // 0.002527547097138375 # SSO:000022980__PspC domain-containing protein // 0.0023243389698776695 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.002308057693787183 # SSO:000003525__Hydroxymethylglutaryl-CoA lyase (EC 4.1.3.4) // 0.0022997351059840915
CatBoost,0.91844,0.89961,0.91844,0.905691,0.602098,TP=5 TN=254 FP=17FN=6,SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.5770029318182524 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.32997190064511456 # SSO:000012746__DNA binding protein // 0.2393679690753004 # SSO:000003500__Hydrogen peroxide-inducible genes activator // 0.1841826985738169 # SSO:000000280__3-deoxy-D-manno-octulosonic acid transferase (EC 2.4.99.13) (EC 2.4.99.12) // 0.18257247985671546 # SSO:000016590__FMN-binding negative transcriptional regulator // 0.17334703736173696 # SSO:000000824__Adenylosuccinate synthetase (EC 6.3.4.4) // 0.17080942520331543 # SSO:000029337__alpha-xylosidase (EC 3.2.1.177) // 0.16468314191109387 # SSO:000007177__Rod shape-determining protein MreB // 0.15982133058453363 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.15463997492883363 # SSO:000020611__Non-ribosomal peptide synthetase // 0.15436315193435962 # SSO:000005114__N-acetylmuramic acid 6-phosphate etherase (EC 4.2.-.-) // 0.15268037226516143 # SSO:000008391__Tryptophan 23-dioxygenase (EC 1.13.11.11) // 0.14570520962485112 # SSO:000037230__Nitronate monooxygenase (EC 1.13.12.16) // 0.14545634398317558 # SSO:000029506__carbohydrate kinase // 0.13687768374240147 # SSO:000003277__Glycine oxidase ThiO (EC 1.4.3.19) // 0.12160480037518956 # SSO:000031442__phosphotriesterase // 0.10027240041367787 # SSO:000020702__O-acetyl-ADP-ribose deacetylase // 0.09779111027954372 # SSO:000029148__YibE/F family protein // 0.094230876771389 # SSO:000018657__Isoprenylcysteine carboxyl methyltransferase family protein // 0.09224310072732593


Numer of genomes:700
Number of genomic features :4340
Shape of y:(700,)
Count of 1: 681
Count of 0: 19
Running Decision Tree for phenotype xylose--carbon_source




Running Random Forest for phenotype xylose--carbon_source




Running CatBoost for phenotype xylose--carbon_source




######### Combined report for xylose--carbon_source#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.95,0.929348,0.95,0.93956,0.492593,TP=0 TN=133 FP=5FN=2,SSO:000013618__FAD-binding protein // 0.07676389669938877 # SSO:000000453__6-phosphofructokinase (EC 2.7.1.11) // 0.07112952953797141 # SSO:000029506__carbohydrate kinase // 0.0700911600571729 # SSO:000042344__threonine ammonia-lyase (EC 4.3.1.19) // 0.06105923967899305 # SSO:000019421__Membrane metalloprotease // 0.052275472125326794 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.05221589312359443 # SSO:000033054__radical SAM protein // 0.05019155758354116 # SSO:000010027__ATP-dependent DNA helicase // 0.050093057988858374 # SSO:000029603__cobalamin B12-binding domain-containing protein // 0.038411160210232685 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.036848417730868964 # SSO:000003324__Guanine deaminase (EC 3.5.4.3) // 0.03664823642823951 # SSO:000012031__Cell division protein // 0.019809060417121507 # SSO:000043186__galactose oxidase (EC 1.1.3.9) // 0.002612066756391799 # SSO:000009704__4Fe-4S ferredoxin // 0.002256433619452263 # SSO:000025776__Transcriptional activator of acetoin/glycerol metabolism // 0.0 # SSO:000000643__ATPase with chaperone activity associated with Flp pilus assembly // 0.0 # SSO:000005211__NADH dehydrogenase subunit 5 // 0.0 # SSO:000043918__DNA-formamidopyrimidine glycosylase (EC 3.2.2.23) // 0.0 # SSO:000033640__translation initiation factor IF-2 // 0.0 # SSO:000009095__dTDP-glucose 46-dehydratase (EC 4.2.1.46) // 0.0
Random Forest,0.964286,0.929847,0.964286,0.946753,0.5,TP=0 TN=135 FP=5FN=0,SSO:000018283__IclR-family transcriptional regulator // 0.008829686540010366 # SSO:000003324__Guanine deaminase (EC 3.5.4.3) // 0.005917506549705795 # SSO:000000453__6-phosphofructokinase (EC 2.7.1.11) // 0.005626722840560329 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.005006832591434178 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.004711729320319742 # SSO:000029603__cobalamin B12-binding domain-containing protein // 0.004635868537171146 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.004580688885013024 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.00434876200700295 # SSO:000034554__Aldehyde dehydrogenase (NAD(P)(+)) (EC 1.2.1.5) // 0.004049169059972939 # SSO:000013106__Diguanylate cyclase // 0.004013697386600914 # SSO:000002820__Flagellar motor switch protein FliN // 0.003541462026233835 # SSO:000011068__Phenylacetate CoA-ligase (EC 6.2.1.30) // 0.0035371772085948296 # SSO:000010027__ATP-dependent DNA helicase // 0.003535276679125843 # SSO:000029506__carbohydrate kinase // 0.003482139575925822 # SSO:000002128__DNA translocase FtsK // 0.0033665737636909597 # SSO:000009756__Membrane protein // 0.0032562745702905647 # SSO:000000794__Acyl-CoA thioesterase II (EC 3.1.2.-) // 0.0032369360950208893 # SSO:000013044__Dehydrogenase // 0.003152045215471928 # SSO:000033511__thiolase family protein // 0.0031167359130506085 # SSO:000019203__METHYL-ACCEPTING CHEMOTAXIS PROTEIN // 0.0030980141087499436
CatBoost,0.971429,0.972251,0.971429,0.962113,0.6,TP=1 TN=135 FP=4FN=0,SSO:000018283__IclR-family transcriptional regulator // 0.8272255561646731 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.5217279520461828 # SSO:000029506__carbohydrate kinase // 0.4341723600200312 # SSO:000025246__Sugar phosphate isomerase/epimerase // 0.35302563319033686 # SSO:000011068__Phenylacetate CoA-ligase (EC 6.2.1.30) // 0.3264374713853573 # SSO:000010027__ATP-dependent DNA helicase // 0.28765456282737634 # SSO:000003324__Guanine deaminase (EC 3.5.4.3) // 0.27154638943081283 # SSO:000009756__Membrane protein // 0.2523456845312231 # SSO:000003224__Glutathione peroxidase (EC 1.11.1.9) // 0.2143004465072267 # SSO:000003007__Galactonate dehydratase (EC 4.2.1.6) // 0.18765268944214958 # SSO:000013044__Dehydrogenase // 0.18419252989328064 # SSO:000000263__34-dihydroxy-2-butanone 4-phosphate synthase (EC 4.1.99.12) // 0.17736973948982462 # SSO:000013618__FAD-binding protein // 0.17423556396473322 # SSO:000001856__Cysteine desulfurase (EC 2.8.1.7) // 0.16673451142490536 # SSO:000029603__cobalamin B12-binding domain-containing protein // 0.16576070652398622 # SSO:000010242__Acyl transferase // 0.1445725825913777 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.1332326451724607 # SSO:000013622__FAD-dependent monooxygenase // 0.11408659224275262 # SSO:000000453__6-phosphofructokinase (EC 2.7.1.11) // 0.11179928992300306 # SSO:000039114__SapC family protein // 0.10960190013775513


Numer of genomes:418
Number of genomic features :3310
Shape of y:(418,)
Count of 1: 93
Count of 0: 325
Running Decision Tree for phenotype gluconate--builds_acid_from




Running Random Forest for phenotype gluconate--builds_acid_from




Running CatBoost for phenotype gluconate--builds_acid_from




######### Combined report for gluconate--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.738095,0.79904,0.738095,0.758881,0.694853,TP=52 TN=10 FP=16FN=6,SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.16403631906084343 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.14322110421722914 # SSO:000043998__3-deoxy-7-phosphoheptulonate synthase (EC 2.5.1.54) // 0.07537754492772157 # SSO:000013473__Esterase // 0.06825232463547215 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.04251160019536473 # SSO:000023719__Response regulator // 0.039256386958079335 # SSO:000017665__Heavy metal translocating P-type ATPase // 0.031292596873399595 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.0280949006976197 # SSO:000017669__Helicase // 0.02579753976478939 # SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 0.02325770940340909 # SSO:000021696__Phosphonate ABC transporter ATP-binding protein // 0.022985209004077307 # SSO:000017510__HAD family hydrolase // 0.020730305673672202 # SSO:000017589__HPT domain containing protein // 0.0200493490858074 # SSO:000036190__Glycosyltransferase family 1 // 0.016514220413854336 # SSO:000001582__Citrate lyase holo-acyl-carrier-protein synthase (EC 2.7.7.61) // 0.01533938782486643 # SSO:000033847__zinc-binding dehydrogenase // 0.014423869755625925 # SSO:000008061__Threonine synthase (EC 4.2.3.1) // 0.014225774011383408 # SSO:000018191__IS110 family transposase // 0.013397475872325437 # SSO:000021897__Polysaccharide deacetylase // 0.01099113582264739 # SSO:000020331__NADH-dependent flavin oxidoreductase // 0.009024981725852551
Random Forest,0.869048,0.86158,0.869048,0.863687,0.751838,TP=64 TN=9 FP=4FN=7,SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.011907036980926428 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.009694252061562297 # SSO:000000466__A/G-specific adenine glycosylase (EC 3.2.2.-) // 0.00744222540300807 # SSO:000003103__Gluconate permease // 0.006473368879301731 # SSO:000043117__shikimate dehydrogenase (EC 1.1.1.25) // 0.005908222899029767 # SSO:000043998__3-deoxy-7-phosphoheptulonate synthase (EC 2.5.1.54) // 0.005624026962863264 # SSO:000013473__Esterase // 0.005200355822840995 # SSO:000010823__Arsenate reductase family protein // 0.004479434218912951 # SSO:000000864__Aldehyde dehydrogenase (EC 1.2.1.3) // 0.004458915308176977 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.004302542819196467 # SSO:000000457__6-phosphogluconolactonase (EC 3.1.1.31) // 0.004172550852467488 # SSO:000000049__1-phosphofructokinase (EC 2.7.1.56) // 0.004134099022597809 # SSO:000005239__NADP-dependent malic enzyme (EC 1.1.1.40) // 0.004121457738690564 # SSO:000018613__Iron-containing alcohol dehydrogenase // 0.003904189198765482 # SSO:000001968__Cytosine permease // 0.0037137861334347586 # SSO:000018739__L-arabinose isomerase (EC 5.3.1.4) // 0.003700274747724109 # SSO:000021578__Phenolic acid decarboxylase (EC 4.1.1.-) // 0.0034284896951201965 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.0032327876017569056 # SSO:000000860__Alcohol dehydrogenase (EC 1.1.1.1) // 0.0030437158688939323 # SSO:000005939__Phosphatidylglycerophosphatase A (EC 3.1.3.27) // 0.003032601044056923
CatBoost,0.904762,0.900794,0.904762,0.89932,0.797794,TP=66 TN=10 FP=2FN=6,SSO:000003108__Gluconokinase (EC 2.7.1.12) // 1.0727122805575102 # SSO:000019343__Mannose-6-phosphate isomerase class I (EC 5.3.1.8) // 0.5684047868074937 # SSO:000042727__UDP-glucose---hexose-1-phosphate uridylyltransferase (EC 2.7.7.12) // 0.41017643772881285 # SSO:000005239__NADP-dependent malic enzyme (EC 1.1.1.40) // 0.381091502249188 # SSO:000013473__Esterase // 0.37752138848874983 # SSO:000023719__Response regulator // 0.34781773022516044 # SSO:000000466__A/G-specific adenine glycosylase (EC 3.2.2.-) // 0.33781876114256365 # SSO:000017850__HlyD family secretion protein // 0.22057962219666347 # SSO:000043117__shikimate dehydrogenase (EC 1.1.1.25) // 0.2169548302713094 # SSO:000013593__Extracellular solute-binding protein // 0.1916028116567955 # SSO:000043998__3-deoxy-7-phosphoheptulonate synthase (EC 2.5.1.54) // 0.1465343375799155 # SSO:000010823__Arsenate reductase family protein // 0.13630861404362535 # SSO:000001777__Copper-translocating P-type ATPase (EC 3.6.3.4) // 0.13628761502190465 # SSO:000017481__GtrA family protein // 0.13474196317042014 # SSO:000017578__HNH endonuclease // 0.1327034373334867 # SSO:000033209__sialate O-acetylesterase // 0.1296119707880866 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.12414367178972865 # SSO:000029803__dUTP diphosphatase (EC 3.6.1.23) // 0.12291587560719239 # SSO:000024005__Replication initiation protein // 0.12285803376444887 # SSO:000033667__transposase family protein // 0.12166422161519859


Numer of genomes:633
Number of genomic features :3741
Shape of y:(633,)
Count of 1: 71
Count of 0: 562
Running Decision Tree for phenotype L-fucose--builds_acid_from




Running Random Forest for phenotype L-fucose--builds_acid_from




Running CatBoost for phenotype L-fucose--builds_acid_from




######### Combined report for L-fucose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.866142,0.863081,0.866142,0.864535,0.713303,TP=101 TN=9 FP=8FN=9,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.21464209302018786 # SSO:000041675__WD40 repeat domain-containing protein // 0.08827790751583528 # SSO:000009028__Protein-PII uridylyltransferase (EC 2.7.7.59) // 0.06194708773233844 # SSO:000033225__sigma-54-dependent transcriptional regulator // 0.05291943562584429 # SSO:000010823__Arsenate reductase family protein // 0.05221242620015753 # SSO:000012031__Cell division protein // 0.045920513676729884 # SSO:000018910__LicD family protein // 0.039329415200664276 # SSO:000022969__Protoporphyrinogen oxidase // 0.025427554901407905 # SSO:000005432__Nucleotidyltransferase (EC 2.7.7.-) // 0.023481175220425143 # SSO:000019374__Maturase // 0.02133071604275206 # SSO:000029444__beta-N-acetylhexosaminidase // 0.015701283909789213 # SSO:000012974__DUF1854 domain-containing protein // 0.012953134957092514 # SSO:000020300__NAD(P)H-quinone oxidoreductase subunit 3 (EC 1.6.5.2) // 0.009389973044004551 # SSO:000017312__Glutathione-independent formaldehyde dehydrogenase (EC 1.2.1.46) // 0.007778052483733184 # SSO:000025751__Transcription elongation factor // 0.006857363842266013 # SSO:000025603__Thiol oxidoreductase // 0.004837151205217284 # SSO:000000809__Adenine phosphoribosyltransferase (EC 2.4.2.7) // 0.004621709180166723 # SSO:000039261__Stress response protein AzuC // 0.004528801566800598 # SSO:000024404__Sarcosine oxidase // 0.004066726830183315 # SSO:000022950__Protein-disulfide reductase // 0.003573839117070623
Random Forest,0.874016,0.85885,0.874016,0.863105,0.671509,TP=104 TN=7 FP=5FN=11,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.012402653695588158 # SSO:000036573__LPS-assembly protein LptD // 0.009523296943232342 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.007530450065257577 # SSO:000029482__c-type cytochrome // 0.006650479801542604 # SSO:000004460__Lipid-A-disaccharide synthase (EC 2.4.1.182) // 0.006291694577090617 # SSO:000007937__Tetraacyldisaccharide 4'-kinase (EC 2.7.1.130) // 0.006237998807836636 # SSO:000004504__Lipoyl synthase (EC 2.8.1.8) // 0.005717785628162464 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.005157831429832392 # SSO:000033632__transglutaminase family protein // 0.005016455196315836 # SSO:000007415__Septation ring formation regulator EzrA // 0.004489124899059185 # SSO:000000281__3-deoxy-manno-octulosonate cytidylyltransferase (EC 2.7.7.38) // 0.004436939948386697 # SSO:000004144__L-fucose isomerase (EC 5.3.1.25) // 0.004111804842120464 # SSO:000018777__L-rhamnose isomerase // 0.004087663518747561 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.003923460408586305 # SSO:000008604__UDP-3-O-3-hydroxymyristoyl glucosamine N-acyltransferase (EC 2.3.1.-) // 0.003886140816860104 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.003470046415875837 # SSO:000043994__3-deoxy-8-phosphooctulonate synthase (EC 2.5.1.55) // 0.00342299040073722 # SSO:000001785__Crossover junction endodeoxyribonuclease RuvC (EC 3.1.22.4) // 0.0034135404180972947 # SSO:000006811__Pyridoxamine 5'-phosphate oxidase (EC 1.4.3.5) // 0.003258017622738563 # SSO:000001022__Apolipoprotein N-acyltransferase (EC 2.3.1.-) // 0.0031757173404519676
CatBoost,0.897638,0.89536,0.897638,0.896409,0.778033,TP=103 TN=11 FP=6FN=7,SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 1.138948392554047 # SSO:000004144__L-fucose isomerase (EC 5.3.1.25) // 0.6731244877542245 # SSO:000004709__Mannonate dehydratase (EC 4.2.1.8) // 0.4965841863253689 # SSO:000036573__LPS-assembly protein LptD // 0.39517460249619235 # SSO:000033632__transglutaminase family protein // 0.34485017347114033 # SSO:000029482__c-type cytochrome // 0.32207819016154243 # SSO:000002972__GDP-mannose 46-dehydratase (EC 4.2.1.47) // 0.2640194523238161 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.23339493331922512 # SSO:000041675__WD40 repeat domain-containing protein // 0.22077907985363826 # SSO:000043034__inositol 2-dehydrogenase (EC 1.1.1.18) // 0.20603863311850726 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.1938359227559753 # SSO:000002060__DNA (cytosine-5-)-methyltransferase (EC 2.1.1.37 ) // 0.18982962489605387 # SSO:000033225__sigma-54-dependent transcriptional regulator // 0.18980750094188623 # SSO:000018910__LicD family protein // 0.1628732800692119 # SSO:000010823__Arsenate reductase family protein // 0.14670220575603404 # SSO:000031325__penicillin-binding protein 2 // 0.13184651474088493 # SSO:000011529__CDP-alcohol phosphatidyltransferase family protein // 0.12615166037058206 # SSO:000012031__Cell division protein // 0.11699156530070697 # SSO:000012746__DNA binding protein // 0.11668611317485514 # SSO:000021409__Peptidoglycan-binding protein // 0.10568481950798735


Numer of genomes:639
Number of genomic features :3754
Shape of y:(639,)
Count of 1: 50
Count of 0: 589
Running Decision Tree for phenotype D-fucose--builds_acid_from




Running Random Forest for phenotype D-fucose--builds_acid_from




Running CatBoost for phenotype D-fucose--builds_acid_from




######### Combined report for D-fucose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.945312,0.948239,0.945312,0.946636,0.816527,TP=115 TN=6 FP=4FN=3,SSO:000000206__2-oxoglutarate dehydrogenase E1 component (EC 1.2.4.2) // 0.25370337437702245 # SSO:000010617__Aminotransferase // 0.0766406312295361 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.07186351588750013 # SSO:000024867__Small acid-soluble spore protein alpha/beta type // 0.06931491553665091 # SSO:000005515__Ornithine cyclodeaminase (EC 4.3.1.12) // 0.06676397390028405 # SSO:000009661__4-alpha-glucanotransferase // 0.05686819394037682 # SSO:000020511__Nicotinamide mononucleotide transporter // 0.04801445970032917 # SSO:000026073__Transposase // 0.015633243634881246 # SSO:000019522__Metal-dependent phosphohydrolase // 0.015111852475636156 # SSO:000021523__Phage major tail tube protein // 0.009750195754551175 # SSO:000017074__General stress protein // 0.006788710434848792 # SSO:000021593__PhoH family protein // 0.0030245424379123555 # SSO:000024113__Rhodanese // 0.0019539260830649234 # SSO:000012272__Stringent starvation protein B // 0.0006726908321340057 # SSO:000010853__Asp/Glu racemase // 0.0 # SSO:000029488__cag pathogenicity island protein (cag12) // 0.0 # SSO:000004170__L-ribulose-5-phosphate 4-epimerase (EC 5.1.3.4) // 0.0 # SSO:000021611__Phosphate starvation-inducible protein PsiF // 0.0 # SSO:000013144__DinB family protein // 0.0 # SSO:000021269__Patatin family protein // 0.0
Random Forest,0.953125,0.949177,0.953125,0.950391,0.769374,TP=117 TN=5 FP=2FN=4,SSO:000012473__Cytochrome c // 0.008804539973319484 # SSO:000000206__2-oxoglutarate dehydrogenase E1 component (EC 1.2.4.2) // 0.008037012774522526 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.007539003523463594 # SSO:000022980__PspC domain-containing protein // 0.007331770961379004 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.006405021792785358 # SSO:000002250__Deoxyribodipyrimidine photolyase (EC 4.1.99.3) // 0.006055884690472408 # SSO:000005983__Phosphoglycolate phosphatase (EC 3.1.3.18) // 0.006054340100789041 # SSO:000036573__LPS-assembly protein LptD // 0.005967759632931874 # SSO:000007937__Tetraacyldisaccharide 4'-kinase (EC 2.7.1.130) // 0.0058620904227074935 # SSO:000004460__Lipid-A-disaccharide synthase (EC 2.4.1.182) // 0.005836045579669377 # SSO:000000281__3-deoxy-manno-octulosonate cytidylyltransferase (EC 2.7.7.38) // 0.00555305378030293 # SSO:000004190__LPS export ABC transporter permease LptG // 0.005494467456696697 # SSO:000008604__UDP-3-O-3-hydroxymyristoyl glucosamine N-acyltransferase (EC 2.3.1.-) // 0.005325971749859282 # SSO:000001541__Choline dehydrogenase (EC 1.1.99.1) // 0.00511965852861142 # SSO:000002350__Diphosphomevalonate decarboxylase (EC 4.1.1.33) // 0.004957649447763018 # SSO:000007776__Succinate dehydrogenase cytochrome b-556 subunit // 0.004306822971983257 # SSO:000030104__heme exporter protein CcmD // 0.0042913149520688795 # SSO:000003257__Glycerol-3-phosphate dehydrogenase (EC 1.1.5.3) // 0.0040452882343178405 # SSO:000000563__ATP-dependent Clp protease ATP-binding subunit ClpA // 0.004026531804254162 # SSO:000000364__4-hydroxy-3-methylbut-2-enyl diphosphate reductase (EC 1.17.1.2) // 0.0039708274994082905
CatBoost,0.921875,0.929864,0.921875,0.92545,0.752568,TP=113 TN=5 FP=6FN=4,SSO:000009661__4-alpha-glucanotransferase // 0.5970723617185599 # SSO:000005945__Phosphatidylserine decarboxylase (EC 4.1.1.65) // 0.4029997534244539 # SSO:000012473__Cytochrome c // 0.40197575916064016 # SSO:000036573__LPS-assembly protein LptD // 0.40126074925177196 # SSO:000010617__Aminotransferase // 0.34733356092921613 # SSO:000007937__Tetraacyldisaccharide 4'-kinase (EC 2.7.1.130) // 0.3356231697715458 # SSO:000001541__Choline dehydrogenase (EC 1.1.99.1) // 0.26714239183828037 # SSO:000022980__PspC domain-containing protein // 0.21610881781153973 # SSO:000029769__cyclic nucleotide-binding domain-containing protein // 0.21169207905063794 # SSO:000005983__Phosphoglycolate phosphatase (EC 3.1.3.18) // 0.20250784735297864 # SSO:000024867__Small acid-soluble spore protein alpha/beta type // 0.1997336804964137 # SSO:000011552__CHRD domain containing protein // 0.19732992839469657 # SSO:000001198__Betaine aldehyde dehydrogenase (EC 1.2.1.8) // 0.17399307810781872 # SSO:000002250__Deoxyribodipyrimidine photolyase (EC 4.1.99.3) // 0.15013060698709935 # SSO:000013622__FAD-dependent monooxygenase // 0.14641034550743445 # SSO:000017631__Haloacid dehalogenase type II (EC 3.8.1.2) // 0.14174956183910653 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.1297650420891251 # SSO:000004460__Lipid-A-disaccharide synthase (EC 2.4.1.182) // 0.10961637288688192 # SSO:000043309__branched-chain-amino-acid transaminase (EC 2.6.1.42) // 0.10603429036640505 # SSO:000000035__14-dihydroxy-2-naphthoate polyprenyltransferase (EC 2.5.1.74) // 0.1009186640584065


Numer of genomes:641
Number of genomic features :3762
Shape of y:(641,)
Count of 1: 70
Count of 0: 571
Running Decision Tree for phenotype D-lyxose--builds_acid_from




Running Random Forest for phenotype D-lyxose--builds_acid_from




Running CatBoost for phenotype D-lyxose--builds_acid_from




######### Combined report for D-lyxose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.79845,0.856079,0.79845,0.82359,0.589744,TP=99 TN=4 FP=18FN=8,SSO:000006176__Porphobilinogen synthase (EC 4.2.1.24) // 0.1516932063316927 # SSO:000000307__3-isopropylmalate dehydratase small subunit (EC 4.2.1.33) // 0.06759085684968993 # SSO:000012371__Competence protein // 0.04432606664428581 # SSO:000002623__Exodeoxyribonuclease VII large subunit (EC 3.1.11.6) // 0.04007193931100489 # SSO:000012151__Chemotaxis protein CheY // 0.03956809385494878 # SSO:000006212__Precorrin-6A reductase (EC 1.3.1.54) // 0.03462010197013559 # SSO:000001185__Beta-glucuronidase (EC 3.2.1.31) // 0.03435831711739514 # SSO:000008105__TonB-dependent siderophore receptor // 0.03418886840419709 # SSO:000002988__GTP pyrophosphokinase (EC 2.7.6.5) // 0.03379358166823204 # SSO:000002305__Dihydroorotase (EC 3.5.2.3) // 0.028531949077219256 # SSO:000043056__arylformamidase (EC 3.5.1.9) // 0.028520814943165627 # SSO:000043015__UDP-N-acetylmuramate dehydrogenase (EC 1.1.1.158) // 0.026263464930900954 # SSO:000023930__Regulator // 0.02251877573196051 # SSO:000043507__S-methyl-5-thioribose kinase (EC 2.7.1.100) // 0.017319334899513886 # SSO:000017050__Gas vesicle protein // 0.016038658260595664 # SSO:000016774__Fis family transcriptional regulator // 0.01580434985558031 # SSO:000012787__DNA methyltransferase // 0.014777950400010637 # SSO:000002647__Exosporium protein C // 0.013902548608467897 # SSO:000043727__thiamine diphosphokinase (EC 2.7.6.2) // 0.012400378592821705 # SSO:000026186__Tributyrin esterase // 0.011433180587467536
Random Forest,0.891473,0.915637,0.891473,0.900898,0.790598,TP=107 TN=8 FP=10FN=4,SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.008153568321179526 # SSO:000006176__Porphobilinogen synthase (EC 4.2.1.24) // 0.007481443551917735 # SSO:000025627__Thioredoxin domain-containing protein // 0.00604962418788674 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.005337128454946765 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.0051153621640490626 # SSO:000004504__Lipoyl synthase (EC 2.8.1.8) // 0.0048862239770319825 # SSO:000001022__Apolipoprotein N-acyltransferase (EC 2.3.1.-) // 0.004412328834735844 # SSO:000006304__Prephenate dehydratase (EC 4.2.1.51) // 0.003951097793337658 # SSO:000004670__Malate dehydrogenase (EC 1.1.1.37) // 0.003947754381097938 # SSO:000006853__Pyruvate oxidase (EC 1.2.3.3) // 0.003939932943313496 # SSO:000000168__2-isopropylmalate synthase (EC 2.3.3.13) // 0.0039038305398868406 # SSO:000005727__Pantoate--beta-alanine ligase (EC 6.3.2.1) // 0.0038988078687864055 # SSO:000005483__Oleate hydratase (EC 4.2.1.53) // 0.0037303008283945064 # SSO:000013192__DnaD domain protein // 0.0035130598070194268 # SSO:000037091__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit J // 0.0034997299725456964 # SSO:000012290__CoA-binding protein // 0.003416597030332492 # SSO:000000307__3-isopropylmalate dehydratase small subunit (EC 4.2.1.33) // 0.0033901492534070075 # SSO:000018454__Inositol monophosphatase // 0.003348095352302496 # SSO:000004698__Malto-oligosyltrehalose synthase (EC 5.4.99.15) // 0.0033361385453619208 # SSO:000021274__Patatin-like phospholipase family protein // 0.0032614137185541167
CatBoost,0.860465,0.879862,0.860465,0.869219,0.661325,TP=106 TN=5 FP=11FN=7,SSO:000025627__Thioredoxin domain-containing protein // 0.389456076658938 # SSO:000009961__ADP-ribosylglycohydrolase family protein // 0.3298554951078608 # SSO:000012151__Chemotaxis protein CheY // 0.30272647850160905 # SSO:000006176__Porphobilinogen synthase (EC 4.2.1.24) // 0.2784249990996177 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.2620841560688239 # SSO:000018641__Isochorismatase family protein // 0.24269314302184633 # SSO:000002919__Formyltetrahydrofolate deformylase (EC 3.5.1.10) // 0.23766803633218486 # SSO:000025231__Sugar binding protein // 0.23762421482576146 # SSO:000002629__Exopolyphosphatase (EC 3.6.1.11) // 0.22825992079848556 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.22428748770217 # SSO:000018191__IS110 family transposase // 0.18718896062277232 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.1729035753263727 # SSO:000038542__Phosphomethylpyrimidine synthase ThiC (EC 4.1.99.17) // 0.16320236651847747 # SSO:000010798__Arginine repressor // 0.1559279666494379 # SSO:000025241__Sugar kinase // 0.15321007145176735 # SSO:000020331__NADH-dependent flavin oxidoreductase // 0.1524639086497219 # SSO:000023842__RNA polymerase sigma factor SigY // 0.12661292975238395 # SSO:000004504__Lipoyl synthase (EC 2.8.1.8) // 0.12137114986151015 # SSO:000004699__Malto-oligosyltrehalose trehalohydrolase (EC 3.2.1.141) // 0.11676680178542831 # SSO:000009749__Adenosine kinase (EC 2.7.1.20) // 0.11579978190682655


Numer of genomes:647
Number of genomic features :3776
Shape of y:(647,)
Count of 1: 187
Count of 0: 460
Running Decision Tree for phenotype turanose--builds_acid_from




Running Random Forest for phenotype turanose--builds_acid_from




Running CatBoost for phenotype turanose--builds_acid_from




######### Combined report for turanose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.761538,0.756123,0.761538,0.758393,0.695001,TP=79 TN=20 FP=14FN=17,SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.15271368217910145 # SSO:000018641__Isochorismatase family protein // 0.06615213653269686 # SSO:000000280__3-deoxy-D-manno-octulosonic acid transferase (EC 2.4.99.13) (EC 2.4.99.12) // 0.05855613460566332 # SSO:000001052__Argininosuccinate synthase (EC 6.3.4.5) // 0.046845592921792634 # SSO:000005240__NADP-specific glutamate dehydrogenase (EC 1.4.1.4) // 0.041573245958484974 # SSO:000002988__GTP pyrophosphokinase (EC 2.7.6.5) // 0.0382035555146122 # SSO:000002256__Deoxyuridine 5'-triphosphate nucleotidohydrolase (EC 3.6.1.23) // 0.03572761100569639 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.03433920670024736 # SSO:000003447__Histidinol-phosphatase (EC 3.1.3.15) // 0.03207876696890202 # SSO:000005035__Muramoyltetrapeptide carboxypeptidase (EC 3.4.17.13) // 0.02459326046805548 # SSO:000012811__DNA polymerase III subunit delta' // 0.02382716362605478 # SSO:000000195__2-methylcitrate synthase (EC 2.3.3.5) // 0.0203336849356328 # SSO:000018986__Lipoprotein // 0.017959474917279086 # SSO:000029430__bacteriocin // 0.017415872458601184 # SSO:000012684__D-alanyl-D-alanine carboxypeptidase family protein // 0.01580122400466601 # SSO:000000735__Aconitate hydratase (EC 4.2.1.3) // 0.013867982473011761 # SSO:000012870__DNA replication protein // 0.013202726631700124 # SSO:000010953__BCCT family transporter // 0.012751070264896212 # SSO:000003169__Glutamine synthetase (EC 6.3.1.2) // 0.012639794350591878 # SSO:000001070__Arsenical pump-driving ATPase (EC 3.6.3.16) // 0.011972449109993726
Random Forest,0.815385,0.815385,0.815385,0.815385,0.773322,TP=81 TN=25 FP=12FN=12,SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.006618933958131229 # SSO:000033632__transglutaminase family protein // 0.004714662483098304 # SSO:000001052__Argininosuccinate synthase (EC 6.3.4.5) // 0.004163373373104632 # SSO:000001051__Argininosuccinate lyase (EC 4.3.2.1) // 0.00393995560355752 # SSO:000001098__Aspartate--ammonia ligase (EC 6.3.1.1) // 0.003923145449877035 # SSO:000025627__Thioredoxin domain-containing protein // 0.0039044622918708856 # SSO:000003118__Glucose 1-dehydrogenase (EC 1.1.1.47) // 0.0036980264869767524 # SSO:000003145__Glutamate 5-kinase (EC 2.7.2.11) // 0.0034977864320766263 # SSO:000036105__Glutamate-5-semialdehyde dehydrogenase (EC 1.2.1.41) // 0.003304607028526087 # SSO:000000568__ATP-dependent Clp protease proteolytic subunit (EC 3.4.21.92) // 0.0032466380208557924 # SSO:000002988__GTP pyrophosphokinase (EC 2.7.6.5) // 0.0030696364506539627 # SSO:000013154__Dipeptidase // 0.0030419365626511764 # SSO:000005097__N-acetyl-gamma-glutamyl-phosphate reductase (EC 1.2.1.38) // 0.003005534344797434 # SSO:000013635__FHA domain-containing protein // 0.002981559458518356 # SSO:000000995__Anthranilate phosphoribosyltransferase (EC 2.4.2.18) // 0.002951521275269991 # SSO:000000168__2-isopropylmalate synthase (EC 2.3.3.13) // 0.002865772569055729 # SSO:000003528__Hydroxymethylglutaryl-CoA synthase (EC 2.3.3.10) // 0.0027735071620004943 # SSO:000029759__coproporphyrinogen III oxidase // 0.002717763773949065 # SSO:000043945__histidinol-phosphate transaminase (EC 2.6.1.9) // 0.0027061411639130064 # SSO:000001558__Chorismate synthase (EC 4.2.3.5) // 0.0026698481298798853
CatBoost,0.792308,0.787769,0.792308,0.789568,0.732781,TP=81 TN=22 FP=12FN=15,SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.5287078518547064 # SSO:000029995__gluconate transporter // 0.3182518566518231 # SSO:000003118__Glucose 1-dehydrogenase (EC 1.1.1.47) // 0.3063806664396742 # SSO:000000919__Alpha-galactosidase (EC 3.2.1.22) // 0.2332335920848065 # SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.21145163896201555 # SSO:000018202__IS21 family transposase // 0.18581501154140845 # SSO:000000442__6-carboxytetrahydropterin synthase (EC 4.1.2.50) // 0.17852999461420763 # SSO:000000568__ATP-dependent Clp protease proteolytic subunit (EC 3.4.21.92) // 0.1548817255773073 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.14733113066301376 # SSO:000009109__flavodoxin // 0.14305122048678406 # SSO:000002988__GTP pyrophosphokinase (EC 2.7.6.5) // 0.14064892006960739 # SSO:000025627__Thioredoxin domain-containing protein // 0.13726263760385496 # SSO:000019412__Membrane dipeptidase // 0.12965392190953903 # SSO:000011951__Carboxylate-amine ligase // 0.12390534390976327 # SSO:000001052__Argininosuccinate synthase (EC 6.3.4.5) // 0.10925924216324653 # SSO:000000826__Adenylylsulfate kinase (EC 2.7.1.25) // 0.10730583628109892 # SSO:000013154__Dipeptidase // 0.10646323161637636 # SSO:000017255__Glutamate synthase // 0.10509310987893918 # SSO:000018207__IS30 family transposase // 0.10320509360316099 # SSO:000024083__Restriction endonuclease // 0.10071114628929868


Numer of genomes:620
Number of genomic features :3757
Shape of y:(620,)
Count of 1: 249
Count of 0: 371
Running Decision Tree for phenotype gentiobiose--builds_acid_from




Running Random Forest for phenotype gentiobiose--builds_acid_from




Running CatBoost for phenotype gentiobiose--builds_acid_from




######### Combined report for gentiobiose--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.66129,0.669219,0.66129,0.663721,0.657838,TP=50 TN=32 FP=24FN=18,SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.13484878908629447 # SSO:000005171__NAD(P)H-hydrate epimerase (EC 5.1.99.6) // 0.1140852727911361 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.0752983595410956 # SSO:000003108__Gluconokinase (EC 2.7.1.12) // 0.07382512319794184 # SSO:000037083__NADH-quinone oxidoreductase (EC 7.1.1.2) subunit B // 0.04796213045281594 # SSO:000029193__Zinc transporter ZupT // 0.03417565588765099 # SSO:000005280__Na+/H+ antiporter NhaB // 0.03056363775456001 # SSO:000000722__Acetylornithine deacetylase (EC 3.5.1.16) // 0.027034758252551794 # SSO:000001046__Arginine deiminase (EC 3.5.3.6) // 0.026678725226478937 # SSO:000012589__Cytochrome P-450 // 0.026203781704100128 # SSO:000002765__Ferrous iron transport protein A // 0.02522545096002101 # SSO:000017587__HPP family protein // 0.022787612990761562 # SSO:000024592__Serine/threonine protein phosphatase (EC 3.1.3.16) // 0.022642830493976716 # SSO:000025068__Sporulation protein // 0.021890017782428498 # SSO:000002985__GTP cyclohydrolase II (EC 3.5.4.25) // 0.019067921754437663 # SSO:000001067__Arsenate reductase (EC 1.20.4.1) // 0.018302487181989565 # SSO:000019153__Lysozyme // 0.01754467644225048 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.016300341488849065 # SSO:000024950__Sodium/solute symporter // 0.014369322644573876 # SSO:000006574__Pterin-4-alpha-carbinolamine dehydratase (EC 4.2.1.96) // 0.012668414721523826
Random Forest,0.733871,0.746305,0.733871,0.736242,0.738108,TP=53 TN=38 FP=21FN=12,SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.007478250946979808 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.0063940657352342094 # SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.006355791359400935 # SSO:000005171__NAD(P)H-hydrate epimerase (EC 5.1.99.6) // 0.006119679368616947 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.005910269181555263 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.005762186707195954 # SSO:000013593__Extracellular solute-binding protein // 0.004680575370556042 # SSO:000002108__DNA repair protein RadC // 0.0044645455266381275 # SSO:000002472__Endonuclease III (EC 4.2.99.18) // 0.004097415649140263 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.003845829234560011 # SSO:000018777__L-rhamnose isomerase // 0.0036973942913299035 # SSO:000044352__formate C-acetyltransferase (EC 2.3.1.54) // 0.003594937734649056 # SSO:000024227__Rod shape-determining protein // 0.003580735134946828 # SSO:000000453__6-phosphofructokinase (EC 2.7.1.11) // 0.003572795559367206 # SSO:000042756__Dihydroorotate dehydrogenase electron transfer subunit (EC 1.3.3.1) // 0.0035258960220187667 # SSO:000008176__Transcription termination factor Rho // 0.003491856521012299 # SSO:000007796__Sucrose-6-phosphate hydrolase (EC 3.2.1.26) // 0.0034688594140955178 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.00315975651374217 # SSO:000036545__L-rhamnose mutarotase (EC 5.1.3.32) // 0.0030407988465070194 # SSO:000033632__transglutaminase family protein // 0.002969496096524928
CatBoost,0.774194,0.781055,0.774194,0.775814,0.775135,TP=57 TN=39 FP=17FN=11,SSO:000000451__6-phospho-beta-glucosidase (EC 3.2.1.86) // 0.8525332630232526 # SSO:000002108__DNA repair protein RadC // 0.405748720664788 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.39129927635128314 # SSO:000005171__NAD(P)H-hydrate epimerase (EC 5.1.99.6) // 0.2126013243233537 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.1691499299282907 # SSO:000021665__Phosphohydrolase // 0.16673334642076412 # SSO:000001182__Beta-galactosidase (EC 3.2.1.23) // 0.1643465504804713 # SSO:000024592__Serine/threonine protein phosphatase (EC 3.1.3.16) // 0.14489856737493467 # SSO:000000925__Alpha-mannosidase (EC 3.2.1.24) // 0.14384922988128787 # SSO:000024110__Rhamnulokinase (EC 2.7.1.5) // 0.14251901447973442 # SSO:000042756__Dihydroorotate dehydrogenase electron transfer subunit (EC 1.3.3.1) // 0.13946198502321686 # SSO:000022363__Prevent-host-death protein // 0.12999517397363383 # SSO:000010295__Acyltransferase family protein // 0.1245952342706216 # SSO:000019412__Membrane dipeptidase // 0.11797821336160103 # SSO:000011127__Bifunctional DNA primase/polymerase // 0.11257764071937468 # SSO:000024953__Sodium:alanine symporter family protein // 0.11026692484386809 # SSO:000003124__Glucose-1-phosphate adenylyltransferase (EC 2.7.7.27) // 0.1072595490751838 # SSO:000017470__GreA/GreB family elongation factor // 0.10523629117910843 # SSO:000009057__biotin--acetyl-CoA-carboxylase ligase (EC 6.3.4.15 ) // 0.10075526496309731 # SSO:000010823__Arsenate reductase family protein // 0.10016785625007753


Numer of genomes:656
Number of genomic features :3781
Shape of y:(656,)
Count of 1: 45
Count of 0: 611
Running Decision Tree for phenotype xylitol--builds_acid_from




Running Random Forest for phenotype xylitol--builds_acid_from




Running CatBoost for phenotype xylitol--builds_acid_from




######### Combined report for xylitol--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.871212,0.91442,0.871212,0.890822,0.594857,TP=113 TN=2 FP=12FN=5,SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.23104403992502795 # SSO:000001098__Aspartate--ammonia ligase (EC 6.3.1.1) // 0.06959833873833648 # SSO:000043309__branched-chain-amino-acid transaminase (EC 2.6.1.42) // 0.05838948668218115 # SSO:000001582__Citrate lyase holo-acyl-carrier-protein synthase (EC 2.7.7.61) // 0.04600912307306807 # SSO:000033963__Peptide-methionine (R)-S-oxide reductase MsrB (EC 1.8.4.12) // 0.04382451924355683 # SSO:000000922__Alpha-glucuronidase (EC 3.2.1.139) // 0.03267596918233612 # SSO:000038785__Putative aminohydrolase SsnA // 0.024415108488187252 # SSO:000044197__UMP kinase (EC 2.7.4.22) // 0.023145420371158725 # SSO:000034727__Autonomous glycyl radical cofactor GrcA // 0.017424385262752418 # SSO:000008860__Urea carboxylase (EC 6.3.4.6) // 0.01554160318690248 # SSO:000017067__Phage major capsid protein // 0.014244201351975199 # SSO:000029193__Zinc transporter ZupT // 0.013469857181022355 # SSO:000028897__VTC domain-containing protein // 0.010493311251563515 # SSO:000010889__Autolysin (EC 3.5.1.28) // 0.010115738344132525 # SSO:000005893__Phenylacetic acid degradation operon negative regulatory protein PaaX // 0.008966787421084182 # SSO:000006212__Precorrin-6A reductase (EC 1.3.1.54) // 0.008804394884779396 # SSO:000024883__Small multidrug export protein // 0.008351939690662367 # SSO:000008849__Uracil phosphoribosyltransferase (EC 2.4.2.9) // 0.0069726664317402666 # SSO:000000819__Adenylate cyclase (EC 4.6.1.1) // 0.005106740225123078 # SSO:000019030__Methyltransferase type 12 // 0.0041909771362417724
Random Forest,0.931818,0.915838,0.931818,0.922925,0.559429,TP=122 TN=1 FP=3FN=6,SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.01089803230304929 # SSO:000019235__MOSC domain containing protein // 0.007450511009106646 # SSO:000006206__Precorrin-2 C(20)-methyltransferase (EC 2.1.1.130) // 0.005368004672914299 # SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.005293135341527037 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.005170032508689258 # SSO:000002316__Dihydroxy-acid dehydratase (EC 4.2.1.9) // 0.005047572346465298 # SSO:000006176__Porphobilinogen synthase (EC 4.2.1.24) // 0.004965531387169694 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.004376781445969081 # SSO:000008176__Transcription termination factor Rho // 0.004277592190494987 # SSO:000029506__carbohydrate kinase // 0.004207161879378872 # SSO:000016736__Fibronectin/fibrinogen-binding protein // 0.004044588203100502 # SSO:000001098__Aspartate--ammonia ligase (EC 6.3.1.1) // 0.003991593924790336 # SSO:000000168__2-isopropylmalate synthase (EC 2.3.3.13) // 0.003980226176489289 # SSO:000043973__adenosylcobinamide-GDP ribazoletransferase (EC 2.7.8.26) // 0.003939912143927703 # SSO:000004083__Ketol-acid reductoisomerase (EC 1.1.1.86) // 0.003834221071062009 # SSO:000011942__Carbon-nitrogen hydrolase family protein // 0.0038329061696341423 # SSO:000000308__3-isopropylmalate dehydrogenase (EC 1.1.1.85) // 0.003820022094601239 # SSO:000002250__Deoxyribodipyrimidine photolyase (EC 4.1.99.3) // 0.0037186139988232376 # SSO:000001541__Choline dehydrogenase (EC 1.1.99.1) // 0.0036203720800289307 # SSO:000001940__Cytochrome d ubiquinol oxidase subunit II (EC 1.10.3.-) // 0.003527750914869668
CatBoost,0.909091,0.909091,0.909091,0.909091,0.547429,TP=119 TN=1 FP=6FN=6,SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.7884831555374854 # SSO:000006206__Precorrin-2 C(20)-methyltransferase (EC 2.1.1.130) // 0.42451777972663635 # SSO:000017037__Galactitol-1-phosphate 5-dehydrogenase (EC 1.1.1.251) // 0.39705120791860576 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.247119953836067 # SSO:000008887__Uroporphyrinogen-III synthase (EC 4.2.1.75) // 0.22968261806286383 # SSO:000033511__thiolase family protein // 0.2103716654774206 # SSO:000001098__Aspartate--ammonia ligase (EC 6.3.1.1) // 0.1977775173601483 # SSO:000029307__aldolase // 0.19560800048322644 # SSO:000010149__Acetamidase/formamidase family protein // 0.19305214919037078 # SSO:000019235__MOSC domain containing protein // 0.18181604291124737 # SSO:000021554__Phage tail protein // 0.16433756553461587 # SSO:000034554__Aldehyde dehydrogenase (NAD(P)(+)) (EC 1.2.1.5) // 0.15249171096801645 # SSO:000044172__hydroxyisourate hydrolase (EC 3.5.2.17) // 0.14649815980714007 # SSO:000029506__carbohydrate kinase // 0.14098452426081606 # SSO:000024142__Ribonuclease // 0.13566818538426734 # SSO:000029028__Xaa-Pro dipeptidyl-peptidase // 0.13099842765546738 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.127262994591511 # SSO:000019339__Mannitol-1-phosphate 5-dehydrogenase (EC 1.1.1.17) // 0.12655499352011898 # SSO:000002109__DNA repair protein RecN // 0.1257462261124096 # SSO:000006211__Precorrin-4 C(11)-methyltransferase (EC 2.1.1.133) // 0.12019133940516243


Numer of genomes:669
Number of genomic features :3846
Shape of y:(669,)
Count of 1: 196
Count of 0: 473
Running Decision Tree for phenotype starch--builds_acid_from




Running Random Forest for phenotype starch--builds_acid_from




Running CatBoost for phenotype starch--builds_acid_from




######### Combined report for starch--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.783582,0.802358,0.783582,0.789396,0.771795,TP=76 TN=29 FP=19FN=10,SSO:000006161__Polyribonucleotide nucleotidyltransferase (EC 2.7.7.8) // 0.1531585038481317 # SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.13865266965776407 # SSO:000007212__SAM-dependent methyltransferase (EC 2.1.1.-) // 0.05688014621827298 # SSO:000017367__Glycosidase // 0.044908460172405115 # SSO:000021791__Pilus assembly protein // 0.04097369987418026 # SSO:000005958__Phosphoenolpyruvate synthase (EC 2.7.9.2) // 0.03624041999814389 # SSO:000000338__3-oxoadipyl-CoA thiolase (EC 2.3.1.174) // 0.034720190483374234 # SSO:000000968__Aminopeptidase C (EC 3.4.22.40) // 0.03242275177370872 # SSO:000012004__Cation:proton antiporter // 0.025483594814115315 # SSO:000018191__IS110 family transposase // 0.024923508629843417 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.024818074037141404 # SSO:000002476__Endonuclease VIII // 0.01928941370325109 # SSO:000010823__Arsenate reductase family protein // 0.017700070737623655 # SSO:000005132__N-formylglutamate deformylase (EC 3.5.1.68) // 0.01754047260913974 # SSO:000019394__Mechanosensitive ion channel // 0.016924035389801886 # SSO:000010242__Acyl transferase // 0.015946767461185358 # SSO:000000286__3-hydroxyacyl-CoA dehydrogenase (EC 1.1.1.35) // 0.014572794806198974 # SSO:000009689__4-hydroxybutyrate dehydrogenase (EC 1.1.1.61) // 0.012957802228684798 # SSO:000002029__D-inositol-3-phosphate glycosyltransferase (EC 2.4.1.250) // 0.012620881327490821 # SSO:000004630__Magnesium and cobalt transport protein CorA // 0.010473969101019779
Random Forest,0.835821,0.860277,0.835821,0.841232,0.846424,TP=78 TN=34 FP=17FN=5,SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.008688464380195207 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.008036534599164879 # SSO:000017367__Glycosidase // 0.005682738521309134 # SSO:000006161__Polyribonucleotide nucleotidyltransferase (EC 2.7.7.8) // 0.005596984827298145 # SSO:000036869__Methyltransferase // 0.005486322560433982 # SSO:000020651__Nucleoside 2-deoxyribosyltransferase // 0.005094289387253188 # SSO:000013261__Dyp-type peroxidase // 0.0048765245237267635 # SSO:000005994__Phosphomevalonate kinase (EC 2.7.4.2) // 0.004863836669641895 # SSO:000013391__Endonuclease // 0.0044202350319463846 # SSO:000003528__Hydroxymethylglutaryl-CoA synthase (EC 2.3.3.10) // 0.004006944817439855 # SSO:000017300__Glutaredoxin-like protein nrdH // 0.003889145657874175 # SSO:000029525__cation diffusion facilitator family transporter // 0.003402345427269318 # SSO:000001968__Cytosine permease // 0.003380240428081995 # SSO:000012290__CoA-binding protein // 0.0033189068403850375 # SSO:000000045__1-deoxy-D-xylulose 5-phosphate reductoisomerase (EC 1.1.1.267) // 0.003262252440085266 # SSO:000009299__rhodanese-related sulfurtransferase (EC 3.1.2.6 ) // 0.0031673228641739583 # SSO:000022976__PspA/IM30 family protein // 0.0031442587974285104 # SSO:000043018__acetolactate decarboxylase (EC 4.1.1.5) // 0.0030909956255984242 # SSO:000003103__Gluconate permease // 0.003081321211153798 # SSO:000009370__tRNA(1)(Val) (adenine(37)-N(6))-methyltransferase (EC 2.1.1.223) // 0.003077763935399485
CatBoost,0.88806,0.894467,0.88806,0.889919,0.883266,TP=85 TN=34 FP=10FN=5,SSO:000000917__Alpha-amylase (EC 3.2.1.1) // 0.6970430840867028 # SSO:000017367__Glycosidase // 0.4780171205329358 # SSO:000000920__Alpha-glucosidase (EC 3.2.1.20) // 0.37161410627424746 # SSO:000003103__Gluconate permease // 0.3091874801091139 # SSO:000012169__Chitinase (EC 3.2.1.14) // 0.2590033267502672 # SSO:000036869__Methyltransferase // 0.21770707200388317 # SSO:000017443__Glyoxalase // 0.20153423597441933 # SSO:000013391__Endonuclease // 0.20085099315750998 # SSO:000022976__PspA/IM30 family protein // 0.16716106982625545 # SSO:000005115__N-acetylmuramoyl-L-alanine amidase (EC 3.5.1.28) // 0.16683369762485062 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.16662437867189778 # SSO:000029525__cation diffusion facilitator family transporter // 0.15934446854106613 # SSO:000001968__Cytosine permease // 0.1522389911567738 # SSO:000033834__xylulokinase // 0.14188178548119196 # SSO:000010243__Acyl-ACP thioesterase // 0.13947672573454944 # SSO:000019394__Mechanosensitive ion channel // 0.1259140146176118 # SSO:000019936__MmgE/PrpD family protein // 0.12512675842590315 # SSO:000009012__Citrate pro-3S-lyase ligase (EC 6.2.1.22) // 0.11630451607574512 # SSO:000033667__transposase family protein // 0.1146963487301074 # SSO:000013261__Dyp-type peroxidase // 0.09757763688541718


Numer of genomes:684
Number of genomic features :3807
Shape of y:(684,)
Count of 1: 67
Count of 0: 617
Running Decision Tree for phenotype inulin--builds_acid_from




Running Random Forest for phenotype inulin--builds_acid_from




Running CatBoost for phenotype inulin--builds_acid_from




######### Combined report for inulin--builds_acid_from#####################


Model,Accuracy,Precision,Recall,F1-score,Balanced Accuracy,Confusion Matrix,Top features
Decision Tree,0.861314,0.891905,0.861314,0.874009,0.716811,TP=111 TN=7 FP=13FN=6,SSO:000006161__Polyribonucleotide nucleotidyltransferase (EC 2.7.7.8) // 0.1520101246411414 # SSO:000001095__Aspartate carbamoyltransferase (EC 2.1.3.2) // 0.08900278525896534 # SSO:000002839__Flagellin // 0.08403758797793681 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.07074791189232378 # SSO:000001582__Citrate lyase holo-acyl-carrier-protein synthase (EC 2.7.7.61) // 0.03166766908893832 # SSO:000013391__Endonuclease // 0.030335136188589865 # SSO:000029953__formate/nitrite transporter family protein // 0.027392540543640233 # SSO:000002472__Endonuclease III (EC 4.2.99.18) // 0.023922696547339 # SSO:000018910__LicD family protein // 0.023803501511427776 # SSO:000010110__chromosome segregation ATPase // 0.022765521029719075 # SSO:000005986__Phospholipid ABC transporter ATP-binding protein MlaF // 0.022499338949142744 # SSO:000021240__Panthothenate synthetase // 0.020814892151567 # SSO:000012222__Chromosome partitioning protein ParA // 0.01714777786382731 # SSO:000021554__Phage tail protein // 0.016648478881478378 # SSO:000021407__Peptidoglycan-binding domain-containing protein // 0.016188204649652305 # SSO:000022955__Proteinase // 0.01419899755977531 # SSO:000019522__Metal-dependent phosphohydrolase // 0.012536756477480737 # SSO:000005522__Orotidine 5'-phosphate decarboxylase (EC 4.1.1.23) // 0.01186355353101211 # SSO:000013242__DoxX family protein // 0.011843798713401145 # SSO:000004719__Mannosylglycerate hydrolase (EC 3.2.1.170) // 0.01175766188495225
Random Forest,0.919708,0.908285,0.919708,0.910943,0.680211,TP=121 TN=5 FP=3FN=8,SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.008128861011224491 # SSO:000006161__Polyribonucleotide nucleotidyltransferase (EC 2.7.7.8) // 0.007801559209531781 # SSO:000012290__CoA-binding protein // 0.006819758069173731 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.005997822332392071 # SSO:000000847__Alanine dehydrogenase (EC 1.4.1.1) // 0.005607297290585355 # SSO:000025307__Superoxide dismutase // 0.005543997955286842 # SSO:000029482__c-type cytochrome // 0.005462640842199833 # SSO:000006304__Prephenate dehydratase (EC 4.2.1.51) // 0.004868192877523228 # SSO:000013242__DoxX family protein // 0.004319847785986231 # SSO:000029148__YibE/F family protein // 0.004300579079706383 # SSO:000004083__Ketol-acid reductoisomerase (EC 1.1.1.86) // 0.0041199691088657665 # SSO:000002316__Dihydroxy-acid dehydratase (EC 4.2.1.9) // 0.004031911554475591 # SSO:000042364__uroporphyrinogen-III C-methyltransferase (EC 2.1.1.107) // 0.003916573801270422 # SSO:000006853__Pyruvate oxidase (EC 1.2.3.3) // 0.003642682664540713 # SSO:000001967__Cytosine deaminase (EC 3.5.4.1) // 0.0035696349830918036 # SSO:000002313__Dihydropteroate synthase (EC 2.5.1.15) // 0.0034843727577425036 # SSO:000020985__Oxygen-independent coproporphyrinogen III oxidase // 0.0033762955382074184 # SSO:000002839__Flagellin // 0.003362558459580918 # SSO:000025241__Sugar kinase // 0.003272764464967433 # SSO:000005425__Nucleoside diphosphate kinase (EC 2.7.4.6) // 0.003101086181464383
CatBoost,0.905109,0.889408,0.905109,0.894751,0.637717,TP=120 TN=4 FP=4FN=9,SSO:000002839__Flagellin // 0.5573797876767007 # SSO:000000913__Alpha-L-fucosidase (EC 3.2.1.51) // 0.3986833437683793 # SSO:000002538__Enoyl-CoA hydratase (EC 4.2.1.17) // 0.3762264146510528 # SSO:000012290__CoA-binding protein // 0.2638187689851313 # SSO:000025241__Sugar kinase // 0.2576304136259988 # SSO:000025307__Superoxide dismutase // 0.2518645376152835 # SSO:000000686__Acetolactate synthase small subunit (EC 2.2.1.6) // 0.22101415452868411 # SSO:000043945__histidinol-phosphate transaminase (EC 2.6.1.9) // 0.2066378253555224 # SSO:000025721__Toxic anion resistance protein // 0.19644187562081755 # SSO:000034080__3-oxoacid CoA-transferase subunit B // 0.19035519496058279 # SSO:000029128__YeeE/YedE family protein // 0.17570516132912897 # SSO:000025597__Thioesterase family protein // 0.17257489550273386 # SSO:000017367__Glycosidase // 0.1643500225800233 # SSO:000009528__2-dehydro-3-deoxygalactonokinase (EC 2.7.1.58) // 0.15151212719329393 # SSO:000029148__YibE/F family protein // 0.14502829897184463 # SSO:000001967__Cytosine deaminase (EC 3.5.4.1) // 0.14289395469533356 # SSO:000012762__DNA helicase // 0.12362595543345033 # SSO:000029506__carbohydrate kinase // 0.11822933064911748 # SSO:000031624__prepilin-type N-terminal cleavage/methylation domain-containing protein // 0.10866813731233578 # SSO:000025362__TM2 domain containing protein // 0.10822966982034203


Numer of genomes:476
Number of genomic features :3503
Shape of y:(476,)
Count of 1: 360
Count of 0: 116
Running Decision Tree for phenotype esculin--builds_acid_from




Running Random Forest for phenotype esculin--builds_acid_from




Running CatBoost for phenotype esculin--builds_acid_from
