In [271]:
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [272]:
data = pd.read_csv("isolates.csv")
data.head(10)

Unnamed: 0,#Organism group,Isolate,AMR genotypes,AST phenotypes
0,Listeria monocytogenes,PDT000077416.3,"fosX=COMPLETE,lin=COMPLETE","chloramphenicol=S,clindamycin=R,erythromycin=S..."
1,Listeria monocytogenes,PDT000095192.3,"fosX=COMPLETE,lin=COMPLETE","ampicillin=S,penicillin=S"
2,Salmonella enterica,PDT000003687.3,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
3,Salmonella enterica,PDT000003688.4,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
4,Salmonella enterica,PDT000003689.4,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
5,Salmonella enterica,PDT000003690.3,"aph(3'')-Ib=COMPLETE,aph(6)-Id=COMPLETE,mdsA=C...","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
6,Salmonella enterica,PDT000003691.3,"mdsA=COMPLETE,mdsB=COMPLETE,tet(B)=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
7,Salmonella enterica,PDT000003692.3,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
8,Salmonella enterica,PDT000003693.3,"aph(3'')-Ib=COMPLETE,aph(6)-Id=COMPLETE,mdsA=C...","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
9,Salmonella enterica,PDT000003694.4,"fosA7=COMPLETE,mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."


In [273]:
def transform_dataframe(df):
    new_data = []

    for idx, row in df.iterrows():
        # split AMR genotypes and remove "=COMPLETE"
        amr_genotypes = [i.split('=')[0] for i in row['AMR genotypes'].split(',')]

        # split AST phenotypes
        ast_phenotypes = row['AST phenotypes'].split(',')

        # process each phenotype
        for pheno in ast_phenotypes:
            drug, resistance = pheno.split('=')
            if resistance == 'R':
                resistance_score = 1
            elif resistance == 'S':
                resistance_score = 0
            else:  # assuming 'I' as per your description
                resistance_score = 0.5

            # create a new row
            new_row = {'#Organism group': row['#Organism group'],
                       'Isolate': row['Isolate'],
                       'AMR genotypes': ', '.join(amr_genotypes),
                       'drug': drug,
                       'resistance': resistance_score}
            new_data.append(new_row)

    # create a new dataframe
    new_df = pd.DataFrame(new_data)

    return new_df


In [274]:
df = transform_dataframe(data)


In [275]:
df.head(10)

Unnamed: 0,#Organism group,Isolate,AMR genotypes,drug,resistance
0,Listeria monocytogenes,PDT000077416.3,"fosX, lin",chloramphenicol,0.0
1,Listeria monocytogenes,PDT000077416.3,"fosX, lin",clindamycin,1.0
2,Listeria monocytogenes,PDT000077416.3,"fosX, lin",erythromycin,0.0
3,Listeria monocytogenes,PDT000077416.3,"fosX, lin",gentamicin,0.0
4,Listeria monocytogenes,PDT000077416.3,"fosX, lin",levofloxacin,0.0
5,Listeria monocytogenes,PDT000077416.3,"fosX, lin",oxacillin,1.0
6,Listeria monocytogenes,PDT000077416.3,"fosX, lin",penicillin,0.0
7,Listeria monocytogenes,PDT000077416.3,"fosX, lin",rifampin,0.0
8,Listeria monocytogenes,PDT000077416.3,"fosX, lin",tetracycline,0.0
9,Listeria monocytogenes,PDT000077416.3,"fosX, lin",trimethoprim-sulfamethoxazole,0.0


In [276]:
df.shape

(316071, 5)

In [277]:
df.to_csv("BasicData.csv",index=False)

In [278]:
unique_all_genes = set()
unique_all_antibiotics = set()

def create_empty_gene_antibiotic_df(df):
    for idx, row in df.iterrows():
        genotypes = row['AMR genotypes'].split(', ')
        antibiotic = row['drug']
        
        unique_all_genes.update(genotypes)
        unique_all_antibiotics.add(antibiotic)
    
    gene_antibiotic_df = pd.DataFrame(index=unique_all_genes, columns=unique_all_antibiotics)
    gene_antibiotic_df = gene_antibiotic_df.fillna(-1)
    
    return gene_antibiotic_df

# Assuming your DataFrame is named 'new_dataframe'
gene_antibiotic_df = create_empty_gene_antibiotic_df(new_dataframe)
gene_antibiotic_df



Unnamed: 0,chloramphenicol,dicloxacillin,ciprofloxacin,ceftiofur,fosfomycin-glucose-6-phosphate,amoxicillin-clavulanic acid,benzylpenicillin,metronidazole,linezolid,piperacillin,...,trimethoprim-sulfamethoxazole,aztreonam,norfloxacin,neomycin,Imipenem-EDTA-PA,delafloxacin,zoliflodacin,vancomycin,ertapenem,cefiderocol
qnrB48,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
blaADC-155,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
oqxB19,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
blaACT-37,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
blaTEM-19,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
blaOXA-494,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
qnrB38,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
tmexD3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
tet(O),-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


***********************************************

In [279]:
def gene_per_drug(drug):
    drug_df = df[df['drug'] == drug]
    unique_genes = set()
    for genes in drug_df['AMR genotypes'].str.split(', '):
        unique_genes.update(genes)
    return drug_df, unique_genes

In [280]:
def create_gene_df(drug_df, unique_genes):
    gene_arrays = []
    labels = []

    for idx, row in drug_df.iterrows():
        gene_array = np.zeros(len(unique_genes), dtype=int)
        genes = row['AMR genotypes'].split(', ')
        for gene in genes:
            gene_index = list(unique_genes).index(gene)
            gene_array[gene_index] = 1

        gene_arrays.append(gene_array.tolist())
        labels.append(row['resistance'])

    gene_df = pd.DataFrame(gene_arrays, columns=unique_genes)
    gene_df['label'] = labels

    return gene_df


In [281]:

def Model(drug_df_for_model, epochs, batch_size):
    # Assuming your DataFrame with gene features and labels is named `oxacillin_df_for_model`
    # Split the data into features (genes) and labels
    features = drug_df_for_model.drop('label', axis=1).values
    labels = drug_df_for_model['label'].values

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Define the neural network model
    model = keras.Sequential()
    model.add(keras.layers.Dense(64, activation='relu', input_shape=(features.shape[1],)))
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.Dense(16, activation='relu'))
    model.add(keras.layers.Dense(8, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))  # Sigmoid activation for probability between 0 and 1

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mse'])

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)

    # Evaluate the model
    loss, mse = model.evaluate(X_test, y_test)
    pred = model.predict(X_test)
    print('Loss:', loss)
    print('Mean Squared Error:', mse)
    print('len of pred: ', pred.shape, 'y_test: ', y_test.shape)
    # Make predictions
    return y_test, pred, model

In [282]:
def evaluation(y_test, predictions, drug):
    # Calculate mean squared error
    mse = mean_squared_error(y_test, predictions)

    # Calculate mean absolute error
    mae = mean_absolute_error(y_test, predictions)

    # Calculate R^2 score
    r2 = r2_score(y_test, predictions)

    
    print('Mean Squared Error:', mse)
    print('Mean Absolute Error:', mae)
    print('R^2 Score:', r2)
    return r2


In [283]:
def Create_df_one_gene(drug_df_for_model):
    # Extract the column names (excluding the label column)
    column_names = drug_df_for_model.columns[:-1]

    # Create a new DataFrame with zeros
    df_one_gene = pd.DataFrame(0, index=np.arange(len(column_names)), columns=column_names)

    # Set the value at the corresponding index position in each row
    for i in range(len(column_names)):
        df_one_gene.iloc[i, i] = 1

    # Display the new DataFrame
    return df_one_gene

In [284]:
def PredR_Antibiotic(drug):
    print('************************\n', drug)
    
    drug_df,drug_genes =   gene_per_drug(drug)
    drug_df_for_model = create_gene_df(drug_df, drug_genes)
    y_test, predictions, model = Model(drug_df_for_model, 20, 10)
    r2 = evaluation(y_test, predictions, drug)
    df_gene = Create_df_one_gene(drug_df_for_model)
    pred = model.predict(df_gene)
    for i, col in enumerate(df_gene.columns):
        gene_antibiotic_df.loc[col, drug] = pred[i]
    return r2

In [285]:
antibiotic_few_samples = []
antibiotic_precent = {} 
for antibiotic in unique_all_antibiotics:
    if len(df[df['drug'] == antibiotic]) < 50:
        antibiotic_few_samples.append(antibiotic)
        continue   
    r2 = PredR_Antibiotic(antibiotic)
    antibiotic_precent[antibiotic] = r2

************************
 chloramphenicol
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.08884400129318237
Mean Squared Error: 0.008962753228843212
len of pred:  (2455, 1) y_test:  (2455,)
Mean Squared Error: 0.008962752681900058
Mean Absolute Error: 0.021740988646161148
R^2 Score: 0.8508268383147382
************************
 ciprofloxacin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.13792715966701508
Mean Squared Error: 0.01766994781792164
len of pred:  (4257, 1) y_test:  (4257,)
Mean Squared Error: 0.017669952370086433
Mean Absolute Error: 0.03426082227434104
R^2 Score: 0.8901616339665916
****************

Loss: 0.0913119688630104
Mean Squared Error: 0.010456227697432041
len of pred:  (971, 1) y_test:  (971,)
Mean Squared Error: 0.010456228171436235
Mean Absolute Error: 0.016321102735249643
R^2 Score: 0.9201845849123188
************************
 amoxicillin-clavulanic acid
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.15934565663337708
Mean Squared Error: 0.017963353544473648
len of pred:  (2932, 1) y_test:  (2932,)
Mean Squared Error: 0.017963354180902585
Mean Absolute Error: 0.04003121256732465
R^2 Score: 0.8420372349370869
************************
 linezolid
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.05

Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.5315083265304565
Mean Squared Error: 0.010045635513961315
len of pred:  (178, 1) y_test:  (178,)
Mean Squared Error: 0.010045635629526569
Mean Absolute Error: 0.030767884007845237
R^2 Score: 0.8592278110190537
************************
 cefpodoxime
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.10398753732442856
Mean Squared Error: 0.024456826969981194
len of pred:  (56, 1) y_test:  (56,)
Mean Squared Error: 0.02445682699228175
Mean Absolute Error: 0.0397644080221653
R^2 Score: 0.8956508714995979
************************
 nitrofurantoin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoc

Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.18558287620544434
Mean Squared Error: 0.03122231923043728
len of pred:  (731, 1) y_test:  (731,)
Mean Squared Error: 0.03122232015659518
Mean Absolute Error: 0.04209062051908887
R^2 Score: 0.6034749385937574
************************
 azithromycin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.03634196147322655
Mean Squared Error: 0.002866617636755109
len of pred:  (2468, 1) y_test:  (2468,)
Mean Squared Error: 0.0028666170951937415
Mean Absolute Error: 0.00425649405349193
R^2 Score: 0.7648399525166955
************************
 cefotetan
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Ep

Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.45551028847694397
Mean Squared Error: 0.01859632320702076
len of pred:  (180, 1) y_test:  (180,)
Mean Squared Error: 0.018596325155284494
Mean Absolute Error: 0.057485615373787574
R^2 Score: 0.816862937680481
************************
 cefazolin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.3298443853855133
Mean Squared Error: 0.06031707301735878
len of pred:  (840, 1) y_test:  (840,)
Mean Squared Error: 0.06031707071580407
Mean Absolute Error: 0.10674151340173185
R^2 Score: 0.7417300430579861
************************
 ceftazidime
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

Epoch 19/20
Epoch 20/20
Loss: 0.2208239585161209
Mean Squared Error: 0.04354272782802582
len of pred:  (1000, 1) y_test:  (1000,)
Mean Squared Error: 0.04354273480640977
Mean Absolute Error: 0.08920105249757204
R^2 Score: 0.7911055869681399
************************
 sulfamethoxazole
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.12764956057071686
Mean Squared Error: 0.02701125480234623
len of pred:  (157, 1) y_test:  (157,)
Mean Squared Error: 0.027011254578117316
Mean Absolute Error: 0.03533804342738326
R^2 Score: 0.8871142058162066
************************
 teicoplanin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.1189538761973381
Mean Squared Error: 0.020168181508779526
len of pred:  (4071, 1) y_test:  (4071,)
Mean Squared Error: 0.020168176485671627
Mean Absolute Error: 0.04019458254822581
R^2 Score: 0.9176841347776014
************************
 cefoxitin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.15081046521663666
Mean Squared Error: 0.022057775408029556
len of pred:  (2785, 1) y_test:  (2785,)
Mean Squared Error: 0.02205777417633074
Mean Absolute Error: 0.03528976247004801
R^2 Score: 0.8436106162921411
************************
 sulfisoxazole
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20


************************
 trimethoprim
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.08081331849098206
Mean Squared Error: 0.02441420778632164
len of pred:  (87, 1) y_test:  (87,)
Mean Squared Error: 0.024414205801380907
Mean Absolute Error: 0.04255963897661389
R^2 Score: 0.8977076536337382
************************
 colistin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.652277946472168
Mean Squared Error: 0.054162848740816116
len of pred:  (376, 1) y_test:  (376,)
Mean Squared Error: 0.054162842412810136
Mean Absolute Error: 0.10384983898827974
R^2 Score: 0.4258046222407105
************************
 doripen

Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.13446912169456482
Mean Squared Error: 0.018387820571660995
len of pred:  (2401, 1) y_test:  (2401,)
Mean Squared Error: 0.018387819518461863
Mean Absolute Error: 0.02739441570331449
R^2 Score: 0.7876013349833672
************************
 penicillin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.264810711145401
Mean Squared Error: 0.038317061960697174
len of pred:  (264, 1) y_test:  (264,)
Mean Squared Error: 0.038317063185237886
Mean Absolute Error: 0.07084454764637328
R^2 Score: 0.8113488248263394
************************
 daptomycin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoc

Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.13043396174907684
Mean Squared Error: 0.01588558591902256
len of pred:  (3275, 1) y_test:  (3275,)
Mean Squared Error: 0.01588558622276718
Mean Absolute Error: 0.022680108208005513
R^2 Score: 0.9327923145436982
************************
 temocillin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.006034639663994312
Mean Squared Error: 0.0007536220946349204
len of pred:  (68, 1) y_test:  (68,)
Mean Squared Error: 0.0007536220867544344
Mean Absolute Error: 0.00561279447255494
R^2 Score: 0.0
************************
 cefuroxime
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/

Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.07002595067024231
Mean Squared Error: 0.011923651210963726
len of pred:  (765, 1) y_test:  (765,)
Mean Squared Error: 0.011923651314354215
Mean Absolute Error: 0.023322541030381793
R^2 Score: 0.8576248668079339
************************
 cefepime
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.3849961459636688
Mean Squared Error: 0.07863057404756546
len of pred:  (998, 1) y_test:  (998,)
Mean Squared Error: 0.0786305637450245
Mean Absolute Error: 0.13086925546142186
R^2 Score: 0.6267699418955013
************************
 ceftolozane-tazobactam
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/

Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.09824595600366592
Mean Squared Error: 0.011431610211730003
len of pred:  (3350, 1) y_test:  (3350,)
Mean Squared Error: 0.011431608706264156
Mean Absolute Error: 0.01774323521515394
R^2 Score: 0.9311230654285423
************************
 ticarcillin-clavulanic acid
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.014783608727157116
Mean Squared Error: 0.0024331840686500072
len of pred:  (15, 1) y_test:  (15,)
Mean Squared Error: 0.002433184143476789
Mean Absolute Error: 0.013386066754659016
R^2 Score: 0.0
************************
 gentamicin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


************************
 telithromycin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.0435878187417984
Mean Squared Error: 0.00796943623572588
len of pred:  (477, 1) y_test:  (477,)
Mean Squared Error: 0.007969437459411009
Mean Absolute Error: 0.01358275154242252
R^2 Score: 0.6993902294923195
************************
 imipenem
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.28296226263046265
Mean Squared Error: 0.050013020634651184
len of pred:  (1065, 1) y_test:  (1065,)
Mean Squared Error: 0.05001301848250768
Mean Absolute Error: 0.08034900487120529
R^2 Score: 0.7182736861244029
************************
 st

Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.5354993343353271
Mean Squared Error: 0.08403632789850235
len of pred:  (866, 1) y_test:  (866,)
Mean Squared Error: 0.08403631522476844
Mean Absolute Error: 0.1310363802738279
R^2 Score: 0.5309745289955519
************************
 polymyxin B
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.5067335367202759
Mean Squared Error: 0.03929107263684273
len of pred:  (181, 1) y_test:  (181,)
Mean Squared Error: 0.03929107149102575
Mean Absolute Error: 0.09583405056369779
R^2 Score: 0.4861417991546928
************************
 nalidixic acid
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 

Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.6345207691192627
Mean Squared Error: 0.12374252080917358
len of pred:  (99, 1) y_test:  (99,)
Mean Squared Error: 0.1237425208142644
Mean Absolute Error: 0.16339997249676658
R^2 Score: 0.4302088576459454
************************
 cefotaxime
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.231235533952713
Mean Squared Error: 0.04293729364871979
len of pred:  (525, 1) y_test:  (525,)
Mean Squared Error: 0.04293729201336789
Mean Absolute Error: 0.06603869915541799
R^2 Score: 0.7348079927580131
************************
 ceftaroline
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoc

Loss: 0.6286078095436096
Mean Squared Error: 0.13712739944458008
len of pred:  (28, 1) y_test:  (28,)
Mean Squared Error: 0.137127396732227
Mean Absolute Error: 0.17042450451897043
R^2 Score: -0.08868738266395937
************************
 florfenicol
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.02958829328417778
Mean Squared Error: 0.0009302502148784697
len of pred:  (539, 1) y_test:  (539,)
Mean Squared Error: 0.0009302502915217012
Mean Absolute Error: 0.0021594406069124048
R^2 Score: 0.6640680609780285
************************
 kanamycin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.08572622388601303
Mea

Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.4044978618621826
Mean Squared Error: 0.071756511926651
len of pred:  (154, 1) y_test:  (154,)
Mean Squared Error: 0.07175651338985281
Mean Absolute Error: 0.14045631861710556
R^2 Score: 0.4642180333557657
************************
 levofloxacin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.2785760760307312
Mean Squared Error: 0.03937237709760666
len of pred:  (1076, 1) y_test:  (1076,)
Mean Squared Error: 0.03937237298316839
Mean Absolute Error: 0.06315839602788534
R^2 Score: 0.8303854062778492
************************
 amoxicillin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 1

Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.46686190366744995
Mean Squared Error: 0.05957912653684616
len of pred:  (79, 1) y_test:  (79,)
Mean Squared Error: 0.05957912272821779
Mean Absolute Error: 0.14051755017872097
R^2 Score: 0.6102376258419211
************************
 spectinomycin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.03671509027481079
Mean Squared Error: 0.0027632846031337976
len of pred:  (97, 1) y_test:  (97,)
Mean Squared Error: 0.0027632846358285465
Mean Absolute Error: 0.0063247121624661565
R^2 Score: 0.720432847973002
************************
 meropenem-vaborbactam
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 

************************
 synercid
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.0004395211290102452
Mean Squared Error: 3.272152753197588e-07
len of pred:  (34, 1) y_test:  (34,)
Mean Squared Error: 3.2721525569119215e-07
Mean Absolute Error: 0.00043931342126768413
R^2 Score: 0.0
************************
 trimethoprim-sulfamethoxazole
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.14117173850536346
Mean Squared Error: 0.02047952078282833
len of pred:  (3311, 1) y_test:  (3311,)
Mean Squared Error: 0.020479522460801883
Mean Absolute Error: 0.031238185743270537
R^2 Score: 0.8491313791614852
******************

************************
 norfloxacin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.05719386041164398
Mean Squared Error: 0.006970186717808247
len of pred:  (23, 1) y_test:  (23,)
Mean Squared Error: 0.0069701861995849854
Mean Absolute Error: 0.05306738744611326
R^2 Score: 0.959030794449106
************************
 Imipenem-EDTA-PA
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.6931505799293518
Mean Squared Error: 1.6906886912693153e-06
len of pred:  (17, 1) y_test:  (17,)
Mean Squared Error: 1.6906886490024716e-06
Mean Absolute Error: 0.0007541092003093046
R^2 Score: 0.0
************************
 delafloxa

Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.6931474208831787
Mean Squared Error: 0.0027473827358335257
len of pred:  (91, 1) y_test:  (91,)
Mean Squared Error: 0.002747382618270931
Mean Absolute Error: 0.005580147544106284
R^2 Score: -0.011158909417848362
************************
 vancomycin
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Loss: 0.17736217379570007
Mean Squared Error: 0.037711746990680695
len of pred:  (168, 1) y_test:  (168,)
Mean Squared Error: 0.03771174526388843
Mean Absolute Error: 0.04237385432079812
R^2 Score: 0.2098171504617764
************************
 ertapenem
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoc

In [287]:
antibiotic_precent

{'chloramphenicol': 0.8508268383147382,
 'ciprofloxacin': 0.8901616339665916,
 'ceftiofur': 0.9201845849123188,
 'amoxicillin-clavulanic acid': 0.8420372349370869,
 'linezolid': 0.5219898616209999,
 'moxifloxacin': 0.697561348655386,
 'ceftazidime-clavulanic acid': 0.8592278110190537,
 'cefpodoxime': 0.8956508714995979,
 'nitrofurantoin': 0.7809815795597038,
 'clindamycin': 0.6034749385937574,
 'azithromycin': 0.7648399525166955,
 'cefotetan': 0.6374714974407787,
 'ampicillin-sulbactam': 0.5691193948696184,
 'cefotaxime-clavulanic acid': 0.816862937680481,
 'cefazolin': 0.7417300430579861,
 'ceftazidime': 0.7393697845528249,
 'tobramycin': 0.7911055869681399,
 'sulfamethoxazole': 0.8871142058162066,
 'teicoplanin': -0.6618370964132141,
 'rifampin': -0.019198334012066498,
 'tetracycline': 0.9176841347776014,
 'cefoxitin': 0.8436106162921411,
 'sulfisoxazole': 0.947007800584611,
 'amikacin': 0.648132360110915,
 'trimethoprim': 0.8977076536337382,
 'colistin': 0.4258046222407105,
 'doripe

In [294]:
gene_antibiotic_df[gene_antibiotic_df['chloramphenicol'] >0.9]


Unnamed: 0,chloramphenicol,dicloxacillin,ciprofloxacin,ceftiofur,fosfomycin-glucose-6-phosphate,amoxicillin-clavulanic acid,benzylpenicillin,metronidazole,linezolid,piperacillin,...,trimethoprim-sulfamethoxazole,aztreonam,norfloxacin,neomycin,Imipenem-EDTA-PA,delafloxacin,zoliflodacin,vancomycin,ertapenem,cefiderocol
floR,0.989191,-1,0.383982,0.029786,-1,0.00342,-1,-1,0.214189,-1,...,0.187397,0.21124,0.242684,-1,0.500366,-1.0,-1.0,0.026373,0.181844,-1
blaKPC-3,0.915442,-1,0.000643,-1.0,-1,0.999996,-1,-1,-1.0,-1,...,0.035525,0.999147,0.497124,-1,0.50032,-1.0,-1.0,-1.0,0.999213,-1
acrB_R717L,0.981041,-1,0.000885,0.052554,-1,0.000296,-1,-1,-1.0,-1,...,0.000421,-1.0,-1.0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1
ramR_T50P,0.964853,-1,0.008513,0.006366,-1,0.06185,-1,-1,-1.0,-1,...,0.005394,-1.0,-1.0,-1,-1.0,-1.0,-1.0,-1.0,-1.0,-1
blaACT-46,0.910554,-1,0.034263,-1.0,-1,0.999488,-1,-1,-1.0,-1,...,0.449539,0.90536,-1.0,-1,-1.0,-1.0,-1.0,-1.0,0.996365,-1
catA,0.969188,-1,0.068733,-1.0,-1,0.953578,-1,-1,0.001171,-1,...,0.058672,0.358767,0.234233,-1,0.500712,0.588127,-1.0,0.001455,0.146779,-1
catA1,0.987824,-1,0.02553,0.038221,-1,0.090949,-1,-1,-1.0,-1,...,0.084136,0.074413,0.532254,-1,0.501128,0.565999,-1.0,-1.0,0.002486,-1
qnrA1,0.998487,-1,0.542146,0.068088,-1,0.000448,-1,-1,-1.0,-1,...,0.198038,0.032708,-1.0,-1,0.500406,-1.0,-1.0,-1.0,0.083709,-1
blaTEM-103,0.97865,-1,0.000827,0.015264,-1,0.000109,-1,-1,-1.0,-1,...,0.458142,0.05215,-1.0,-1,-1.0,-1.0,-1.0,-1.0,0.066118,-1
blaNDM-7,0.953085,-1,0.815491,-1.0,-1,0.940765,-1,-1,0.017239,-1,...,0.000313,0.036701,-1.0,-1,0.501057,-1.0,-1.0,0.051955,0.996282,-1


In [295]:

gene_antibiotic_df.to_csv("gene_antibiotic_df_precent.csv",index=False)

### model - accuraccy & MSE

In [32]:
predictions

array([[0.00322947],
       [0.9983469 ],
       [0.00322947],
       ...,
       [0.00265715],
       [0.00217324],
       [0.0024083 ]], dtype=float32)

### Evaluation - continues (MSE)

Mean Squared Error: 0.01619927366086918
Mean Absolute Error: 0.03154599644390347
R^2 Score: 0.8993035344650906


### one examle test

In [17]:
y_test[1]

1.0

In [18]:
model.predict(X_test[1].reshape(1, -1))

array([[0.99743843]], dtype=float32)

### Test for each Gene seperate

Unnamed: 0,qnrB48,blaADC-155,oqxB19,blaACT-37,blaTEM-19,cmx,rmtF2,blaCTX-M-24,qnrB77,blaOXY-2-6,...,gyrA_D95N,mgrB_M27K,blaCDD,blaOXA-735,pmrB_T140P,blaOXA-494,qnrB38,tmexD3,tet(O),murA_D278E
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1100,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1101,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1102,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [34]:
pred_gene = model.predict(df_one_gene)
data = pd.DataFrame(data=pred_gene)

In [50]:
data

Unnamed: 0,0
0,0.062887
1,0.002434
2,0.015267
3,0.001975
4,0.001146
...,...
1099,0.016005
1100,0.001224
1101,0.049610
1102,0.007509


In [69]:
genes_list = list(oxacillin_genes)

# Access the element at index 29
gene_at_index = genes_list[1]

print(gene_at_index)

blaADC-155


In [48]:
data = pd.DataFrame(data=pred_gene)
data[data > 0.5].dropna()

Unnamed: 0,0
8,0.642578
12,0.824058
29,0.997132
42,0.636477
90,0.988201
95,0.888557
146,0.908533
150,0.604366
195,0.978971
227,0.639507


In [36]:
pred_gene.max()

0.9973835

In [37]:
pred_gene.min()

1.4436484e-05

In [None]:
# going over all the bacteria that are resistant to "chloramphenicol":
for gene_lst in df[(df.drug == "ciprofloxacin") & (df.resistance == 0)]['AMR genotypes'].values:
# going over all the genes of each bacteria :
    for g in gene_lst.split():
        print(g)
        v = np.zeros(X.shape[1])
        v[feature_names.index(g)]=1
        print(classifier.predict_proba([v]))
    print("***************************")