# Import Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import VotingClassifier

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Import Dataset

In [3]:
dataset = ['https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_doripenem_PA.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_clindamycin_CJ.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_doripenem_EcS.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_doripenem_KN.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_ertapenem_EcS.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_ertapenem_KN.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_imipenem_EcS.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_imipenem_KN.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_kanamycin_SE.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_meropenem_EcS.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_meropenem_KN.csv',
          'https://raw.githubusercontent.com/safaet/antimicrobial_resistance/main/Data/Read%20Data/amr_ast_streptomycin_SE.csv']

In [5]:
data = pd.read_csv(dataset[0])

# Add Sum Column and use Min-Max method

In [6]:
df2 = data.iloc[:, 1:-1]
data['sum'] = df2.sum(axis=1)

In [7]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_column = scaler.fit_transform(data['sum'].values.reshape(-1, 1))
data['sum'] = scaled_column

# Split the dataset

In [8]:
X = data.iloc[:,1:]
y = data['doripenem']

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Implement Algorithm
## Standardize the feature

In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Create individual models  (LogR, gb, nn, xgb)

In [11]:
log_reg_model = LogisticRegression(max_iter=1000, random_state=42)
gb_model = GradientBoostingClassifier(random_state=42)
nn_model = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=1000, random_state=42)
xgb_model = XGBClassifier(random_state=42)

### Create an ensemble model using a VotingClassifier

In [12]:
ensemble_model = VotingClassifier(
    estimators=[
        ('log_reg', log_reg_model),
        ('gb', gb_model),
        ('nn', nn_model),
        ('xgb', xgb_model)
    ],
    voting='soft'  # Use 'hard' for majority voting or 'soft' for weighted voting
)

## Create individual models (LogR, Decision Tree, Random Forest)

In [13]:
clf1 = LogisticRegression(random_state=42)
clf2 = RandomForestClassifier(n_estimators=100, random_state=42)
clf3 = SVC(probability=True, random_state=42)

### Create an ensemble model using a VotinClassifier

In [14]:
ensemble_model = VotingClassifier(estimators=[
    ('lr', clf1), ('rf', clf2), ('svc', clf3)
],voting='soft'
)

## Train the ensemble model


In [15]:
ensemble_model.fit(X_train_scaled, y_train)

## Make Prediction

In [16]:
y_pred = ensemble_model.predict(X_test_scaled)

# Evaluate the ensem le model

In [17]:
print("Ensemble Model Classification Report:")
print(classification_report(y_test, y_pred))

Ensemble Model Classification Report:
              precision    recall  f1-score   support

           0       0.60      1.00      0.75         3
           1       1.00      0.67      0.80         6

    accuracy                           0.78         9
   macro avg       0.80      0.83      0.77         9
weighted avg       0.87      0.78      0.78         9



In [19]:
### Create a Pickle file using serialization 
import pickle
pickle_out = open("models.pkl","wb")
pickle.dump(ensemble_model, pickle_out)
pickle_out.close()

In [22]:
ensemble_model.predict([[1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
]])

array([0])