In [162]:
import pandas as pd
import numpy as np
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [163]:
data = pd.read_csv("isolates.csv")
data.head(10)

Unnamed: 0,#Organism group,Isolate,AMR genotypes,AST phenotypes
0,Listeria monocytogenes,PDT000077416.3,"fosX=COMPLETE,lin=COMPLETE","chloramphenicol=S,clindamycin=R,erythromycin=S..."
1,Listeria monocytogenes,PDT000095192.3,"fosX=COMPLETE,lin=COMPLETE","ampicillin=S,penicillin=S"
2,Salmonella enterica,PDT000003687.3,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
3,Salmonella enterica,PDT000003688.4,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
4,Salmonella enterica,PDT000003689.4,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
5,Salmonella enterica,PDT000003690.3,"aph(3'')-Ib=COMPLETE,aph(6)-Id=COMPLETE,mdsA=C...","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
6,Salmonella enterica,PDT000003691.3,"mdsA=COMPLETE,mdsB=COMPLETE,tet(B)=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
7,Salmonella enterica,PDT000003692.3,"mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
8,Salmonella enterica,PDT000003693.3,"aph(3'')-Ib=COMPLETE,aph(6)-Id=COMPLETE,mdsA=C...","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."
9,Salmonella enterica,PDT000003694.4,"fosA7=COMPLETE,mdsA=COMPLETE,mdsB=COMPLETE","amikacin=S,amoxicillin-clavulanic acid=S,ampic..."


In [164]:
def transform_dataframe(df):
    new_data = []

    for idx, row in df.iterrows():
        # split AMR genotypes and remove "=COMPLETE"
        amr_genotypes = [i.split('=')[0] for i in row['AMR genotypes'].split(',')]

        # split AST phenotypes
        ast_phenotypes = row['AST phenotypes'].split(',')

        # process each phenotype
        for pheno in ast_phenotypes:
            drug, resistance = pheno.split('=')
            if resistance == 'R':
                resistance_score = 1
            elif resistance == 'S':
                resistance_score = 0
            else:  # assuming 'I' as per your description
                resistance_score = 0.5

            # create a new row
            new_row = {'#Organism group': row['#Organism group'],
                       'Isolate': row['Isolate'],
                       'AMR genotypes': ', '.join(amr_genotypes),
                       'drug': drug,
                       'resistance': resistance_score}
            new_data.append(new_row)

    # create a new dataframe
    new_df = pd.DataFrame(new_data)

    return new_df


In [165]:
df = transform_dataframe(data)


In [166]:
df.head(10)

Unnamed: 0,#Organism group,Isolate,AMR genotypes,drug,resistance
0,Listeria monocytogenes,PDT000077416.3,"fosX, lin",chloramphenicol,0.0
1,Listeria monocytogenes,PDT000077416.3,"fosX, lin",clindamycin,1.0
2,Listeria monocytogenes,PDT000077416.3,"fosX, lin",erythromycin,0.0
3,Listeria monocytogenes,PDT000077416.3,"fosX, lin",gentamicin,0.0
4,Listeria monocytogenes,PDT000077416.3,"fosX, lin",levofloxacin,0.0
5,Listeria monocytogenes,PDT000077416.3,"fosX, lin",oxacillin,1.0
6,Listeria monocytogenes,PDT000077416.3,"fosX, lin",penicillin,0.0
7,Listeria monocytogenes,PDT000077416.3,"fosX, lin",rifampin,0.0
8,Listeria monocytogenes,PDT000077416.3,"fosX, lin",tetracycline,0.0
9,Listeria monocytogenes,PDT000077416.3,"fosX, lin",trimethoprim-sulfamethoxazole,0.0


In [167]:
df.shape

(316071, 5)

In [168]:
df.to_csv("BasicData.csv",index=False)

In [169]:
unique_all_genes = set()
unique_all_antibiotics = set()

def create_empty_gene_antibiotic_df(df):
    for idx, row in df.iterrows():
        genotypes = row['AMR genotypes'].split(', ')
        antibiotic = row['drug']
        
        unique_all_genes.update(genotypes)
        unique_all_antibiotics.add(antibiotic)
    
    gene_antibiotic_df = pd.DataFrame(index=unique_all_genes, columns=unique_all_antibiotics)
    gene_antibiotic_df = gene_antibiotic_df.fillna(-1)
    
    return gene_antibiotic_df

# Assuming your DataFrame is named 'new_dataframe'
gene_antibiotic_df = create_empty_gene_antibiotic_df(new_dataframe)
gene_antibiotic_df



Unnamed: 0,chloramphenicol,dicloxacillin,ciprofloxacin,ceftiofur,fosfomycin-glucose-6-phosphate,amoxicillin-clavulanic acid,benzylpenicillin,metronidazole,linezolid,piperacillin,...,trimethoprim-sulfamethoxazole,aztreonam,norfloxacin,neomycin,Imipenem-EDTA-PA,delafloxacin,zoliflodacin,vancomycin,ertapenem,cefiderocol
qnrB48,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
blaADC-155,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
oqxB19,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
blaACT-37,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
blaTEM-19,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
blaOXA-494,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
qnrB38,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
tmexD3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
tet(O),-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


***********************************************

In [170]:
def gene_per_drug(drug):
    drug_df = df[df['drug'] == drug]
    unique_genes = set()
    for genes in drug_df['AMR genotypes'].str.split(', '):
        unique_genes.update(genes)
    return drug_df, unique_genes

In [171]:
def create_gene_df(drug_df, unique_genes):
    gene_arrays = []
    labels = []

    for idx, row in drug_df.iterrows():
        gene_array = np.zeros(len(unique_genes), dtype=int)
        genes = row['AMR genotypes'].split(', ')
        for gene in genes:
            gene_index = list(unique_genes).index(gene)
            gene_array[gene_index] = 1

        gene_arrays.append(gene_array.tolist())
        labels.append(row['resistance'])

    gene_df = pd.DataFrame(gene_arrays, columns=unique_genes)
    gene_df['label'] = labels

    return gene_df


In [172]:

def Model(drug_df_for_model):
    # Assuming your DataFrame with gene features and labels is named `oxacillin_df_for_model`
    # Split the data into features (genes) and labels
    features = drug_df_for_model.drop('label', axis=1).values
    labels = drug_df_for_model['label'].values

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

    # Define the neural network model
    model = keras.Sequential()
    model.add(keras.layers.Dense(64, activation='relu', input_shape=(features.shape[1],)))
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.Dense(16, activation='relu'))
    model.add(keras.layers.Dense(8, activation='relu'))
    model.add(keras.layers.Dense(1, activation='sigmoid'))  # Sigmoid activation for probability between 0 and 1

    # Compile the model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['mse'])

    # Train the model
    model.fit(X_train, y_train, epochs=10, batch_size=32)

    # Evaluate the model
    loss, mse = model.evaluate(X_test, y_test)
    pred = model.predict(X_test)
    print('Loss:', loss)
    print('Mean Squared Error:', mse)
    print('len of pred: ', pred.shape, 'y_test: ', y_test.shape)
    # Make predictions
    return y_test, pred, model

In [173]:
def evaluation(y_test, predictions, drug):
    # Calculate mean squared error
    mse = mean_squared_error(y_test, predictions)

    # Calculate mean absolute error
    mae = mean_absolute_error(y_test, predictions)

    # Calculate R^2 score
    r2 = r2_score(y_test, predictions)

    
    print('Mean Squared Error:', mse)
    print('Mean Absolute Error:', mae)
    print('R^2 Score:', r2)


In [180]:
def Create_df_one_gene(drug_df_for_model):
    # Extract the column names (excluding the label column)
    column_names = drug_df_for_model.columns[:-1]

    # Create a new DataFrame with zeros
    df_one_gene = pd.DataFrame(0, index=np.arange(len(column_names)), columns=column_names)

    # Set the value at the corresponding index position in each row
    for i in range(len(column_names)):
        df_one_gene.iloc[i, i] = 1

    # Display the new DataFrame
    return df_one_gene

In [181]:
def PredR_Antibiotic(drug):
    print('************************\n', drug)
    
    drug_df,drug_genes =   gene_per_drug(drug)
    drug_df_for_model = create_gene_df(drug_df, drug_genes)
    y_test, predictions, model = Model(drug_df_for_model)
    evaluation(y_test, predictions, drug)
    df_gene = Create_df_one_gene(drug_df_for_model)
    pred = model.predict(df_gene)
    for i, col in enumerate(df_gene.columns):
        gene_antibiotic_df.loc[col, drug] = pred[i]

In [182]:
antibiotic_one_sample = []
for antibiotic in unique_all_antibiotics:
    if len(df[df['drug'] == antibiotic]) == 1:
        antibiotic_one_sample.append(antibiotic)
        continue
    PredR_Antibiotic(antibiotic)

************************
 chloramphenicol
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.06305903941392899
Mean Squared Error: 0.00778650026768446
len of pred:  (2455, 1) y_test:  (2455,)
Mean Squared Error: 0.007786501268264286
Mean Absolute Error: 0.016110380873614213
R^2 Score: 0.8704039870475323
************************
 ciprofloxacin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.08848624676465988
Mean Squared Error: 0.016445808112621307
len of pred:  (4257, 1) y_test:  (4257,)
Mean Squared Error: 0.016445806893712712
Mean Absolute Error: 0.03268668112290046
R^2 Score: 0.8977710567933166
************************
 ceftiofur
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.059577591717243195
Mean Squared Error: 0.008957244455814362
len of pred:  (971, 1) y_test:  (971,)
Me

Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.055715397000312805
Mean Squared Error: 0.009895863011479378
len of pred:  (168, 1) y_test:  (168,)
Mean Squared Error: 0.009895862792818939
Mean Absolute Error: 0.023048485401513323
R^2 Score: 0.47671975369644637
************************
 piperacillin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6618738174438477
Mean Squared Error: 0.2337503731250763
len of pred:  (9, 1) y_test:  (9,)
Mean Squared Error: 0.23375036433889523
Mean Absolute Error: 0.46725347969267106
R^2 Score: -0.05187663952502852
************************
 moxifloxacin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.39162445068359375
Mean Squared Error: 0.11697658151388168
len of pred:  (97, 1) y_test:  (97,)
Mean Squared Error: 0.11697656688893372
Mean Absolute Error: 0.221393554173794
R^2 Score: 0.4563435327942813

Epoch 9/10
Epoch 10/10
Loss: 0.2458273470401764
Mean Squared Error: 0.0546860471367836
len of pred:  (303, 1) y_test:  (303,)
Mean Squared Error: 0.05468604532979819
Mean Absolute Error: 0.09365488871162747
R^2 Score: 0.7560116080338505
************************
 clindamycin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.14295119047164917
Mean Squared Error: 0.029567478224635124
len of pred:  (731, 1) y_test:  (731,)
Mean Squared Error: 0.029567481516394947
Mean Absolute Error: 0.040861862909798624
R^2 Score: 0.6244914738842717
************************
 tylosin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6931673288345337
Mean Squared Error: 1.0096913683810271e-05
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 1.0096913334312063e-05
Mean Absolute Error: 0.003116592764854431
R^2 Score: 0.0
************************
 azithromycin
Epoch 1/

************************
 cefalexin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6936531662940979
Mean Squared Error: 0.0002528541081119329
len of pred:  (1, 1) y_test:  (1,)
Mean Squared Error: 0.00025285410018582866
Mean Absolute Error: 0.015901386737823486
R^2 Score: nan
************************
 cefazolin




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.24001096189022064
Mean Squared Error: 0.05183839052915573
len of pred:  (840, 1) y_test:  (840,)
Mean Squared Error: 0.05183838690301729
Mean Absolute Error: 0.10464577391034081
R^2 Score: 0.7780346791629293
************************
 ceftazidime
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.2684631049633026
Mean Squared Error: 0.058816853910684586
len of pred:  (1158, 1) y_test:  (1158,)
Mean Squared Error: 0.05881685390012083
Mean Absolute Error: 0.10304216690040559
R^2 Score: 0.7486350481453997
************************
 polymyxin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6859470009803772
Mean Squared Error: 0.2454104870557785
len of pred:  (3, 1) y_test:  (3,)
Mean Squared Error: 0.2454104848183567
Mean Absolute Error: 

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.1120484247803688
Mean Squared Error: 0.02324584499001503
len of pred:  (157, 1) y_test:  (157,)
Mean Squared Error: 0.023245844758988197
Mean Absolute Error: 0.03945184873927171
R^2 Score: 0.9028506565845541
************************
 teicoplanin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.04249093309044838
Mean Squared Error: 0.011640968732535839
len of pred:  (59, 1) y_test:  (59,)
Mean Squared Error: 0.011640969891969503
Mean Absolute Error: 0.02966857209043988
R^2 Score: 0.3013411001043822
************************
 oxytetracycline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6499327421188354
Mean Squared Error: 0.22843167185783386
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 0.22843166284019745
Mean Absolute Error: 0.47763824462890625
R^2 Score: 0.0
***

Mean Squared Error: 0.018760241075123092
Mean Absolute Error: 0.034180899688729435
R^2 Score: 0.9234305849625694
************************
 cefoxitin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.11033368110656738
Mean Squared Error: 0.020015127956867218
len of pred:  (2785, 1) y_test:  (2785,)
Mean Squared Error: 0.020015126899787943
Mean Absolute Error: 0.03934879588502143
R^2 Score: 0.8580929637020556
************************
 chlortetracycline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6186875104904175
Mean Squared Error: 0.21284550428390503
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 0.21284550833183724
Mean Absolute Error: 0.46133242547512054
R^2 Score: 0.0
************************
 sulfisoxazole
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.0567964352

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.4887636601924896
Mean Squared Error: 0.050790123641490936
len of pred:  (376, 1) y_test:  (376,)
Mean Squared Error: 0.05079012488012053
Mean Absolute Error: 0.11330102159091261
R^2 Score: 0.46155974016820045
************************
 cefalotin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.47588735818862915
Mean Squared Error: 0.14339540898799896
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 0.1433954086487148
Mean Absolute Error: 0.3786519765853882
R^2 Score: 0.0
************************
 doripenem
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.3306127190589905
Mean Squared Error: 0.08954421430826187
len of pred:  (251, 1) y_test:  (251,)
Mean Squared Error: 0.08954422234073499
Mean Absolute Error: 0.14219858803122168
R^2 Score: 0.6282821

Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.21221891045570374
Mean Squared Error: 0.04823889583349228
len of pred:  (131, 1) y_test:  (131,)
Mean Squared Error: 0.0482388964614257
Mean Absolute Error: 0.0897803302029617
R^2 Score: 0.18319910984259846
************************
 sulbactam
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.7471866011619568
Mean Squared Error: 0.2769944667816162
len of pred:  (1, 1) y_test:  (1,)
Mean Squared Error: 0.27699446431288166
Mean Absolute Error: 0.5263026356697083
R^2 Score: nan
************************
 sulfonamide




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.529464602470398
Mean Squared Error: 0.16898661851882935
len of pred:  (1, 1) y_test:  (1,)
Mean Squared Error: 0.16898662167585954
Mean Absolute Error: 0.4110798239707947
R^2 Score: nan
************************
 cephalothin




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.35553932189941406
Mean Squared Error: 0.07682547718286514
len of pred:  (67, 1) y_test:  (67,)
Mean Squared Error: 0.0768254751744014
Mean Absolute Error: 0.17253452094633187
R^2 Score: 0.5535669151354202
************************
 sulfadimethoxine
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6931543946266174
Mean Squared Error: 3.6004025787406135e-06
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 3.600402639136746e-06
Mean Absolute Error: 0.0016491413116455078
R^2 Score: 0.0
************************
 ampicillin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.08591552078723907
Mean Squared Error: 0.015141676180064678
len of pred:  (3275, 1) y_test:  (3275,)
Mean Squared Error: 0.015141674482295317
Mean Absolute Error: 0

Loss: 0.37511545419692993
Mean Squared Error: 0.0992983877658844
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 0.09929838948460556
Mean Absolute Error: 0.3108494281768799
R^2 Score: 0.0
************************
 cefuroxime
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.228299081325531
Mean Squared Error: 0.060718417167663574
len of pred:  (62, 1) y_test:  (62,)
Mean Squared Error: 0.06071841643443077
Mean Absolute Error: 0.13911722360118742
R^2 Score: 0.7114937048529643
************************
 tulathromycin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.5373148918151855
Mean Squared Error: 0.17282292246818542
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 0.1728229128561054
Mean Absolute Error: 0.41559898853302
R^2 Score: 0.0
************************
 erythromycin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6

Loss: 0.4451563060283661
Mean Squared Error: 0.12919485569000244
len of pred:  (90, 1) y_test:  (90,)
Mean Squared Error: 0.12919484921694216
Mean Absolute Error: 0.2903639045026567
R^2 Score: 0.4616881282627411
************************
 quinupristin-dalfopristin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6355189681053162
Mean Squared Error: 0.17169161140918732
len of pred:  (5, 1) y_test:  (5,)
Mean Squared Error: 0.17169161122920934
Mean Absolute Error: 0.37114242315292356
R^2 Score: -0.0730725701825583
************************
 tiamulin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6931477785110474
Mean Squared Error: 2.939221417364024e-07
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 2.9392214884182977e-07
Mean Absolute Error: 0.0005219578742980957
R^2 Score: 0.0
************************
 fusidic acid
Epoch 1/10
Epoch 2/10
Epo

Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6661899089813232
Mean Squared Error: 0.2366127073764801
len of pred:  (15, 1) y_test:  (15,)
Mean Squared Error: 0.2366126927537285
Mean Absolute Error: 0.48566083908081054
R^2 Score: 0.0
************************
 gentamicin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.11337442696094513
Mean Squared Error: 0.018868878483772278
len of pred:  (4159, 1) y_test:  (4159,)
Mean Squared Error: 0.018868876967685272
Mean Absolute Error: 0.037545050553288224
R^2 Score: 0.8553235991208604
************************
 oxacillin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.19076071679592133
Mean Squared Error: 0.05496927350759506
len of pred:  (130, 1) y_test:  (130,)
Mean Squared Error: 0.054969276112092555
Mean Absolute Error: 0.1058766761651406
R^2 Score: 0.7097842029695832
************************
 t

Loss: 0.3626193404197693
Mean Squared Error: 0.07381698489189148
len of pred:  (866, 1) y_test:  (866,)
Mean Squared Error: 0.07381697677672178
Mean Absolute Error: 0.12430518713590488
R^2 Score: 0.5880109425523454
************************
 polymyxin B
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.4595767557621002
Mean Squared Error: 0.026539338752627373
len of pred:  (181, 1) y_test:  (181,)
Mean Squared Error: 0.026539339201568847
Mean Absolute Error: 0.09735782386848281
R^2 Score: 0.6529120592484643
************************
 tilmicosin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.693149209022522
Mean Squared Error: 1.0082978860737057e-06
len of pred:  (2, 1) y_test:  (2,)
Mean Squared Error: 1.0082979153835936e-06
Mean Absolute Error: 0.000994950532913208
R^2 Score: 0.0
************************
 nalidixic acid
Epoch 1/10
Epoch 2/10
Epoch 

Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6629818081855774
Mean Squared Error: 0.20767971873283386
len of pred:  (9, 1) y_test:  (9,)
Mean Squared Error: 0.20767972717566727
Mean Absolute Error: 0.42555905050701565
R^2 Score: 0.06544122770949734
************************
 cefotaxime
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.19371429085731506
Mean Squared Error: 0.04253482073545456
len of pred:  (525, 1) y_test:  (525,)
Mean Squared Error: 0.04253482067401421
Mean Absolute Error: 0.07140247552596043
R^2 Score: 0.7372937615928838
************************
 ceftaroline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.2588144540786743
Mean Squared Error: 0.0753331109881401
len of pred:  (72, 1) y_test:  (72,)
Mean Squared Error: 0.0753331172106953
Mean Absolute Error: 0.13233901146385404
R^2 Score: 0.042

Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.08277582377195358
Mean Squared Error: 0.015682848170399666
len of pred:  (778, 1) y_test:  (778,)
Mean Squared Error: 0.01568284871164675
Mean Absolute Error: 0.03337469662522902
R^2 Score: 0.8543664412759745
************************
 tigecycline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.28303879499435425
Mean Squared Error: 0.03854838013648987
len of pred:  (442, 1) y_test:  (442,)
Mean Squared Error: 0.03854837632031747
Mean Absolute Error: 0.0865854001913532
R^2 Score: 0.30113539426108915
************************
 minocycline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.3624809682369232
Mean Squared Error: 0.06455957144498825
len of pred:  (154, 1) y_test:  (154,)
Mean Squared Error: 0.06455956808440648
Mean Absolute Error: 0.1431731151544985
R^2 Score: 0.517955224969765
***********



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.19277428090572357
Mean Squared Error: 0.027843814343214035
len of pred:  (47, 1) y_test:  (47,)
Mean Squared Error: 0.027843814132731775
Mean Absolute Error: 0.12356298908274224
R^2 Score: 0.8232557890252744
************************
 enrofloxacin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.5893208384513855
Mean Squared Error: 0.11522936820983887
len of pred:  (3, 1) y_test:  (3,)
Mean Squared Error: 0.11522936966283599
Mean Absolute Error: 0.28337250153223675
R^2 Score: -1.0741286539310475
************************
 doxycycline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.4545571804046631
Mean Squared Error: 0.10229488462209702
len of pred:  (47, 1) y_test:  (47,)
Mean Squared Error: 0.10229488087620871
Mean Absolute Error:

Loss: 0.6793636679649353
Mean Squared Error: 0.24311192333698273
len of pred:  (5, 1) y_test:  (5,)
Mean Squared Error: 0.24311192000358056
Mean Absolute Error: 0.49291799068450926
R^2 Score: 0.0
************************
 bacitracin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6944892406463623
Mean Squared Error: 0.0006701312377117574
len of pred:  (1, 1) y_test:  (1,)
Mean Squared Error: 0.0006701312432966233
Mean Absolute Error: 0.025886893272399902
R^2 Score: nan
************************
 mupirocin




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.485422819852829
Mean Squared Error: 0.0786757618188858
len of pred:  (79, 1) y_test:  (79,)
Mean Squared Error: 0.07867576339058979
Mean Absolute Error: 0.1999812092207655
R^2 Score: 0.48530876381481036
************************
 spectinomycin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.0378851555287838
Mean Squared Error: 0.002661979291588068
len of pred:  (97, 1) y_test:  (97,)
Mean Squared Error: 0.0026619794037431453
Mean Absolute Error: 0.008866408894729042
R^2 Score: 0.7306821052707606
************************
 eravacycline
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6934252381324768
Mean Squared Error: 0.00013898671022616327
len of pred:  (1, 1) y_test:  (1,)
Mean Squared Error: 0.0001389867054548688
Mean Absolute E



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.4674932062625885
Mean Squared Error: 0.13394354283809662
len of pred:  (25, 1) y_test:  (25,)
Mean Squared Error: 0.13394353771211884
Mean Absolute Error: 0.33984372854232786
R^2 Score: -0.6742942214014853
************************
 cefamandole
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.8010504841804504
Mean Squared Error: 0.3037584125995636
len of pred:  (1, 1) y_test:  (1,)
Mean Squared Error: 0.3037583989714818
Mean Absolute Error: 0.5511428117752075
R^2 Score: nan
************************
 synercid




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.18343107402324677
Mean Squared Error: 0.0322054959833622
len of pred:  (34, 1) y_test:  (34,)
Mean Squared Error: 0.032205495658900955
Mean Absolute Error: 0.1646291683701908
R^2 Score: 0.0
************************
 trimethoprim-sulfamethoxazole
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.08303695917129517
Mean Squared Error: 0.017328497022390366
len of pred:  (3311, 1) y_test:  (3311,)
Mean Squared Error: 0.01732849688766331
Mean Absolute Error: 0.031115396452138993
R^2 Score: 0.872344365858622
************************
 aztreonam
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.2341720312833786
Mean Squared Error: 0.05329693853855133
len of pred:  (847, 1) y_test:  (847,)
Mean Squared Error: 0.05329693509274234
Mean Absolute 

Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6931968927383423
Mean Squared Error: 2.4871527784853242e-05
len of pred:  (17, 1) y_test:  (17,)
Mean Squared Error: 2.4871526399242663e-05
Mean Absolute Error: 0.003312133690890144
R^2 Score: 0.0
************************
 delafloxacin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6756774187088013
Mean Squared Error: 0.24139179289340973
len of pred:  (13, 1) y_test:  (13,)
Mean Squared Error: 0.24139178755820723
Mean Absolute Error: 0.48714261788588303
R^2 Score: -0.019880302433425312
************************
 zoliflodacin
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.6931594014167786
Mean Squared Error: 0.0027533583343029022
len of pred:  (91, 1) y_test:  (91,)
Mean Squared Error: 0.002753358463932689
Mean Absolute Error: 0.006092990820224469
R^2 Score: -0.01335828621

************************
 cefiderocol
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss: 0.7282583713531494
Mean Squared Error: 0.2675324082374573
len of pred:  (4, 1) y_test:  (4,)
Mean Squared Error: 0.2675323857098224
Mean Absolute Error: 0.5170634984970093
R^2 Score: 0.0


In [183]:
antibiotic_one_sample

['dicloxacillin',
 'fosfomycin-glucose-6-phosphate',
 'fidaxomicin',
 'apramycin',
 'cephalexin',
 'ceftizoxime',
 'surotomycin',
 'rifaximin',
 'pefloxacin',
 'mecillinam']

In [184]:
gene_antibiotic_df

Unnamed: 0,chloramphenicol,dicloxacillin,ciprofloxacin,ceftiofur,fosfomycin-glucose-6-phosphate,amoxicillin-clavulanic acid,benzylpenicillin,metronidazole,linezolid,piperacillin,...,trimethoprim-sulfamethoxazole,aztreonam,norfloxacin,neomycin,Imipenem-EDTA-PA,delafloxacin,zoliflodacin,vancomycin,ertapenem,cefiderocol
qnrB48,-1.000000,-1,0.039699,-1.000000,-1,-1.000000,-1.0,-1.0,-1.000000,-1.000000,...,0.041179,0.282364,-1.000000,-1.000000,0.501277,0.519180,-1.0,-1.000000,0.655702,0.504569
blaADC-155,-1.000000,-1,0.006509,-1.000000,-1,-1.000000,-1.0,-1.0,-1.000000,-1.000000,...,0.004928,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.000000
oqxB19,0.017815,-1,0.022884,0.121675,-1,0.017955,-1.0,-1.0,0.058612,0.567754,...,0.111477,0.136410,0.426273,0.533598,0.503210,0.510799,-1.0,0.049670,0.116142,-1.000000
blaACT-37,-1.000000,-1,0.004970,-1.000000,-1,0.356468,-1.0,-1.0,-1.000000,-1.000000,...,0.007567,0.681791,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000,0.514734,-1.000000
blaTEM-19,-1.000000,-1,0.004653,-1.000000,-1,0.124477,-1.0,-1.0,-1.000000,-1.000000,...,0.010286,0.071376,-1.000000,-1.000000,-1.000000,0.523927,-1.0,-1.000000,0.075586,-1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
blaOXA-494,-1.000000,-1,0.017026,-1.000000,-1,0.422314,-1.0,-1.0,-1.000000,0.551732,...,0.034108,0.102046,-1.000000,-1.000000,0.501589,0.521212,-1.0,-1.000000,0.093444,0.498074
qnrB38,-1.000000,-1,0.002331,-1.000000,-1,0.693029,-1.0,-1.0,-1.000000,-1.000000,...,0.007532,0.047051,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000,0.289587,-1.000000
tmexD3,-1.000000,-1,0.039345,-1.000000,-1,-1.000000,-1.0,-1.0,-1.000000,-1.000000,...,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.000000
tet(O),-1.000000,-1,0.009538,-1.000000,-1,0.002270,-1.0,-1.0,-1.000000,-1.000000,...,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.0,-1.000000,-1.000000,-1.000000


### model - accuraccy & MSE

In [32]:
predictions

array([[0.00322947],
       [0.9983469 ],
       [0.00322947],
       ...,
       [0.00265715],
       [0.00217324],
       [0.0024083 ]], dtype=float32)

### Evaluation - continues (MSE)

Mean Squared Error: 0.01619927366086918
Mean Absolute Error: 0.03154599644390347
R^2 Score: 0.8993035344650906


### one examle test

In [17]:
y_test[1]

1.0

In [18]:
model.predict(X_test[1].reshape(1, -1))

array([[0.99743843]], dtype=float32)

### Test for each Gene seperate

Unnamed: 0,qnrB48,blaADC-155,oqxB19,blaACT-37,blaTEM-19,cmx,rmtF2,blaCTX-M-24,qnrB77,blaOXY-2-6,...,gyrA_D95N,mgrB_M27K,blaCDD,blaOXA-735,pmrB_T140P,blaOXA-494,qnrB38,tmexD3,tet(O),murA_D278E
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1100,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1101,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1102,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [34]:
pred_gene = model.predict(df_one_gene)
data = pd.DataFrame(data=pred_gene)

In [50]:
data

Unnamed: 0,0
0,0.062887
1,0.002434
2,0.015267
3,0.001975
4,0.001146
...,...
1099,0.016005
1100,0.001224
1101,0.049610
1102,0.007509


In [69]:
genes_list = list(oxacillin_genes)

# Access the element at index 29
gene_at_index = genes_list[1]

print(gene_at_index)

blaADC-155


In [48]:
data = pd.DataFrame(data=pred_gene)
data[data > 0.5].dropna()

Unnamed: 0,0
8,0.642578
12,0.824058
29,0.997132
42,0.636477
90,0.988201
95,0.888557
146,0.908533
150,0.604366
195,0.978971
227,0.639507


In [36]:
pred_gene.max()

0.9973835

In [37]:
pred_gene.min()

1.4436484e-05

In [None]:
# going over all the bacteria that are resistant to "chloramphenicol":
for gene_lst in df[(df.drug == "ciprofloxacin") & (df.resistance == 0)]['AMR genotypes'].values:
# going over all the genes of each bacteria :
    for g in gene_lst.split():
        print(g)
        v = np.zeros(X.shape[1])
        v[feature_names.index(g)]=1
        print(classifier.predict_proba([v]))
    print("***************************")