In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

In [5]:
# Load the datasets
train_df = pd.read_csv('project_adult.csv', na_values='?', skipinitialspace=True)
X_validation = pd.read_csv('project_validation_inputs.csv', na_values='?', skipinitialspace=True)

# Separate features (X) and target (y) from the training data
X_train_full = train_df.drop('income', axis=1)
y_train_full = train_df['income'].apply(lambda x: 1 if x == '>50K' else 0)

# Identify categorical and numerical features
categorical_features = X_train_full.select_dtypes(include=['object']).columns
numerical_features = X_train_full.select_dtypes(include=['int64', 'float64']).columns

# Preprocessing pipelines for numerical and categorical features

numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine the transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Fit and transform the training data
X_train_processed = preprocessor.fit_transform(X_train_full)
X_validation_processed = preprocessor.transform(X_validation)

# Split the processed training data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_train_processed, y_train_full, test_size=0.2, random_state=42)

In [6]:

models = {
    "model_1": MLPClassifier(hidden_layer_sizes=(50,), max_iter=500, random_state=42),
    "model_2": MLPClassifier(hidden_layer_sizes=(100,50), max_iter=500, random_state=42),
    "model_3": MLPClassifier(hidden_layer_sizes=(100,), activation='tanh', max_iter=500, random_state=42),
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = accuracy_score(y_test, y_pred)
results



{'model_1': 0.8362763915547025,
 'model_2': 0.8182341650671785,
 'model_3': 0.8211132437619961}