In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Load the cross-sectional dataset
data_cross = pd.read_csv('oasis_cross-sectional.csv')

# Handling missing values
imputer = SimpleImputer(strategy='mean')
data_cross.fillna(data_cross.mean(), inplace=True)

# Splitting features and target
X_cross = data_cross.drop(['ID', 'M/F', 'Diagnosis'], axis=1)
y_cross = data_cross['Diagnosis']

# Splitting data into train and test sets
X_cross_train, X_cross_test, y_cross_train, y_cross_test = train_test_split(X_cross, y_cross, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_cross_train_scaled = scaler.fit_transform(X_cross_train)
X_cross_test_scaled = scaler.transform(X_cross_test)


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming you have features (X) and labels (y) prepared

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Random Forest model
rf_model = RandomForestClassifier()

# Define the hyperparameter grid for tuning
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Create the RandomizedSearchCV object
random_search = RandomizedSearchCV(rf_model, param_distributions=param_grid, n_iter=10, cv=5, verbose=2, n_jobs=-1, random_state=42)

# Fit the model to the data
random_search.fit(X_train, y_train)

# Get the best parameters and the best model
best_params = random_search.best_params_
best_model = random_search.best_estimator_

# Make predictions on the test set
y_pred = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Best Parameters: {best_params}")
print(f"Accuracy on Test Set: {accuracy}")
