In [21]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
data_url =  "https://raw.githubusercontent.com/mcnakhaee/palmerpenguins/refs/heads/master/palmerpenguins/data/penguins.csv"
penguins = pd.read_csv(data_url)
penguins

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007
3,Adelie,Torgersen,,,,,,2007
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007
...,...,...,...,...,...,...,...,...
339,Chinstrap,Dream,55.8,19.8,207.0,4000.0,male,2009
340,Chinstrap,Dream,43.5,18.1,202.0,3400.0,female,2009
341,Chinstrap,Dream,49.6,18.2,193.0,3775.0,male,2009
342,Chinstrap,Dream,50.8,19.0,210.0,4100.0,male,2009


In [22]:
def examine_penguins(penguins, attribs, target): 

  # Plot the pair grid
  g = sns.PairGrid(penguins[attribs], hue=target)
  g.map_diag(sns.histplot)
  g.map_offdiag(sns.scatterplot)
  g.add_legend()
  return g 


In [23]:
sex_mapping = {
    "male": 0, 
    "female": 1
}

penguins = penguins.dropna()

penguins.loc[:,"sex_no"] = penguins.loc[:,"sex"].map(sex_mapping).astype(int)
penguins

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  penguins.loc[:,"sex_no"] = penguins.loc[:,"sex"].map(sex_mapping).astype(int)


Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year,sex_no
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male,2007,0
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female,2007,1
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female,2007,1
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female,2007,1
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,male,2007,0
...,...,...,...,...,...,...,...,...,...
339,Chinstrap,Dream,55.8,19.8,207.0,4000.0,male,2009,0
340,Chinstrap,Dream,43.5,18.1,202.0,3400.0,female,2009,1
341,Chinstrap,Dream,49.6,18.2,193.0,3775.0,male,2009,0
342,Chinstrap,Dream,50.8,19.0,210.0,4100.0,male,2009,0


In [None]:
from sklearn.model_selection import train_test_split

# Choosing continuous measurement attributes for classification
y = penguins.loc[:,"species"]
X = penguins.loc[:,["bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g"]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=20250331)


In [29]:
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

# Scale your data!

scaler = StandardScaler()
scaler.fit(X)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Grid CV for MLPClassifier

# This is a Python dictionary. 
# This variable tells the GridSearchCV class what hyperparameters to train on.
param = {
    "hidden_layer_sizes": [(50,), (100,), (50, 50)],
}

# Initialize our model
mlp_classifier = MLPClassifier(max_iter=1000, random_state=20250331)

gs_mlp = GridSearchCV(mlp_classifier, 
                      param_grid=param,
                      scoring="accuracy",
                      cv=5
                     )

gs_mlp.fit(X_train, y_train)

display(gs_mlp.best_params_)

display(gs_mlp.score(X_train, y_train))
display(gs_mlp.score(X_test, y_test))


{'hidden_layer_sizes': (50,)}

0.9962406015037594

1.0