In [None]:
Code to find the best random states


import warnings
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from joblib import Parallel, delayed
import logging

warnings.filterwarnings("ignore", message="DataConversionWarning")
logging.basicConfig(level=logging.INFO, format='%(message)s')

num_runs = 100  # Adjust as needed
maxvalu = 0
splitran = 0
smoteran = 0

# Define the models outside the loop
models = [
    ("Support Vector Machine", SVC(random_state=357, gamma='auto')),
    ("Random Forest", RandomForestClassifier(random_state=42, n_estimators=100)),
    ("MultinomialNB", MultinomialNB())
]

def run_experiment(i, j):
    logging.info(f"{i} {j}")
    X = data.drop('hospitalized', axis=1)
    y = data[['hospitalized']]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)

    # Apply SMOTE for oversampling
    smote = SMOTE(sampling_strategy='auto', random_state=j)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

    maxacc = 0

    for name, model in models:
        model.fit(X_resampled, y_resampled)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        maxacc += acc * 100

    maxacc /= 3

    return maxacc, i, j

results = Parallel(n_jobs=-1)(delayed(run_experiment)(i, j) for i in range(num_runs) for j in range(num_runs))

for result in results:
    maxacc, i, j = result
    if maxacc > maxvalu:
        maxvalu = maxacc
        splitran = i
        smoteran = j

print(f"Best splitran: {splitran}")
print(f"Best smoteran: {smoteran}")

In [None]:
# Code snippet to find the best random states for better accuracy



import warnings
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from joblib import Parallel, delayed
import logging

warnings.filterwarnings("ignore", message="DataConversionWarning")
logging.basicConfig(level=logging.INFO, format='%(message)s')

num_runs = 100  # Adjust as needed
maxvalu = 0
splitran = 0
smoteran = 0
svm_random_state = 0
rf_random_state = 0

# Define the models outside the loop
models = [
    ("Support Vector Machine", SVC(random_state=357, gamma='auto')),
    ("Random Forest", RandomForestClassifier(random_state=42, n_estimators=100)),
    ("MultinomialNB", MultinomialNB())
]

def run_experiment(i, j, k, l):
    logging.info(f"SMOTE: {i}, Data Split: {j}, SVM Random State: {k}, RF Random State: {l}")
    X = data.drop('hospitalized', axis=1)
    y = data[['hospitalized']]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=j)

    # Apply SMOTE for oversampling
    smote = SMOTE(sampling_strategy='auto', random_state=i)
    X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

    maxacc = 0

    for name, model in models:
        if name == "Support Vector Machine":
            model.set_params(random_state=k)
        elif name == "Random Forest":
            model.set_params(random_state=l)

        model.fit(X_resampled, y_resampled)
        y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        maxacc += acc * 100

    maxacc /= 3

    return maxacc, i, j, k, l

# Define the hyperparameter ranges
smote_range = range(100)
split_range = range(100)
svm_random_state_range = range(100)
rf_random_state_range = range(100)

# Iterate over hyperparameter values
results = Parallel(n_jobs=-1)(delayed(run_experiment)(i, j, k, l) for i in smote_range for j in split_range for k in svm_random_state_range for l in rf_random_state_range)

for result in results:
    maxacc, i, j, k, l = result
    if maxacc > maxvalu:
        maxvalu = maxacc
        smoteran = i
        splitran = j
        svm_random_state = k
        rf_random_state = l

print(f"Best SMOTE: {smoteran}")
print(f"Best Data Split: {splitran}")
print(f"Best SVM Random State: {svm_random_state}")
print(f"Best RF Random State: {rf_random_state}")
