In [26]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate

# Read data and create data and target dataframes
df = pd.read_csv("CensusDataset.csv")
target_name = "class"
numerical_columns = ["age", "capital-gain", "capital-loss", "hours-per-week"]

target = df[target_name]
data = df[numerical_columns]
data.head()

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week
0,25,0,0,40
1,38,0,0,50
2,28,0,0,40
3,44,7688,0,40
4,18,0,0,30


In [30]:
# create model
model = Pipeline(steps=[('Preprocessor', StandardScaler()), ('Classifier',LogisticRegression())])

In [31]:
# Show available parameters in model
model.get_params()

{'memory': None,
 'steps': [('Preprocessor', StandardScaler()),
  ('Classifier', LogisticRegression())],
 'verbose': False,
 'Preprocessor': StandardScaler(),
 'Classifier': LogisticRegression(),
 'Preprocessor__copy': True,
 'Preprocessor__with_mean': True,
 'Preprocessor__with_std': True,
 'Classifier__C': 1.0,
 'Classifier__class_weight': None,
 'Classifier__dual': False,
 'Classifier__fit_intercept': True,
 'Classifier__intercept_scaling': 1,
 'Classifier__l1_ratio': None,
 'Classifier__max_iter': 100,
 'Classifier__multi_class': 'auto',
 'Classifier__n_jobs': None,
 'Classifier__penalty': 'l2',
 'Classifier__random_state': None,
 'Classifier__solver': 'lbfgs',
 'Classifier__tol': 0.0001,
 'Classifier__verbose': 0,
 'Classifier__warm_start': False}

In [32]:
# Accuracy with basic model
results = cross_validate(model, data, target)
scores = results['test_score']
print(f"Accuracy cross-validation: {scores.mean():.3f} +/- {scores.std():.3f}")

Accuracy cross-validation: 0.800 +/- 0.003


In [34]:
# Accuracy chaging C
model.set_params(Classifier__C=1e-3)
results = cross_validate(model, data, target)
scores = results['test_score']
print(f"Accuracy cross-validation: {scores.mean():.3f} +/- {scores.std():.3f}")

Accuracy cross-validation: 0.787 +/- 0.002


In [35]:
# Check C Parameter
print(model.get_params()['Classifier__C'])

0.001


### Set C manually

In [54]:
for C in [1e-3, 1e-2, 1e-1, 1, 10]:
    model.set_params(Classifier__C=C)
    resutls = cross_validate(model, data, target)
    scores = resutls["test_score"]
    string = f'{scores.mean():.3f} +/- {scores.std():.3f}'
    print(f"Accuracy cross-validation C={C}: " + ' '*(5-len(str(C))) + string)    

Accuracy cross-validation C=0.001: 0.787 +/- 0.002
Accuracy cross-validation C=0.01:  0.799 +/- 0.003
Accuracy cross-validation C=0.1:   0.800 +/- 0.003
Accuracy cross-validation C=1:     0.800 +/- 0.003
Accuracy cross-validation C=10:    0.800 +/- 0.003


In [57]:
model.get_params()

{'memory': None,
 'steps': [('Preprocessor', StandardScaler()),
  ('Classifier', LogisticRegression(C=10))],
 'verbose': False,
 'Preprocessor': StandardScaler(),
 'Classifier': LogisticRegression(C=10),
 'Preprocessor__copy': True,
 'Preprocessor__with_mean': True,
 'Preprocessor__with_std': True,
 'Classifier__C': 10,
 'Classifier__class_weight': None,
 'Classifier__dual': False,
 'Classifier__fit_intercept': True,
 'Classifier__intercept_scaling': 1,
 'Classifier__l1_ratio': None,
 'Classifier__max_iter': 100,
 'Classifier__multi_class': 'auto',
 'Classifier__n_jobs': None,
 'Classifier__penalty': 'l2',
 'Classifier__random_state': None,
 'Classifier__solver': 'lbfgs',
 'Classifier__tol': 0.0001,
 'Classifier__verbose': 0,
 'Classifier__warm_start': False}