In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import svm
from sklearn import neighbors
from sklearn import linear_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer
from sklearn.metrics import confusion_matrix

In [2]:
salaries = pd.read_csv('Salaries.csv')
salaries.drop(['education', 'fnlwgt'], axis = 1)

Unnamed: 0,age,workclass,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,39,State-gov,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,Private,12,Married-civ-spouse,Tech-support,Wife,White,Female,0,0,38,United-States,<=50K
32557,40,Private,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,40,United-States,>50K
32558,58,Private,9,Widowed,Adm-clerical,Unmarried,White,Female,0,0,40,United-States,<=50K
32559,22,Private,9,Never-married,Adm-clerical,Own-child,White,Male,0,0,20,United-States,<=50K


In [3]:
# Clean salaries
salaries = pd.read_csv('Salaries.csv')
salaries = salaries.drop(['education', 'fnlwgt'], axis = 1)

salaries = salaries[(salaries['workclass'] != ' ?') & (salaries['occupation'] != ' ?') & (salaries['native-country'] != ' ?')]
gender_mapping = {' Male': 0, ' Female': 1}
salaries['sex'] = salaries['sex'].map(gender_mapping)

# One-hot workclass
Federal_gov = {' Federal-gov': 1, ' Local-gov': 0, ' Private': 0, ' Self-emp-inc': 0, ' Self-emp-not-inc': 0, ' State-gov': 0, ' Without-pay': 0}
Local_gov = {' Federal-gov': 0, ' Local-gov': 1, ' Private': 0, ' Self-emp-inc': 0, ' Self-emp-not-inc': 0, ' State-gov': 0, ' Without-pay': 0}
Private = {' Federal-gov': 0, ' Local-gov': 0, ' Private': 1, ' Self-emp-inc': 0, ' Self-emp-not-inc': 0, ' State-gov': 0, ' Without-pay': 0}
Self_emp_inc = {' Federal-gov': 0, ' Local-gov': 0, ' Private': 0, ' Self-emp-inc': 1, ' Self-emp-not-inc': 0, ' State-gov': 0, ' Without-pay': 0}
Self_emp_not_inc = {' Federal-gov': 0, ' Local-gov': 0, ' Private': 0, ' Self-emp-inc': 0, ' Self-emp-not-inc': 1, ' State-gov': 0, ' Without-pay': 0}
State_gov = {' Federal-gov': 0, ' Local-gov': 0, ' Private': 0, ' Self-emp-inc': 0, ' Self-emp-not-inc': 0, ' State-gov': 1, ' Without-pay': 0}
Without_pay = {' Federal-gov': 0, ' Local-gov': 0, ' Private': 0, ' Self-emp-inc': 0, ' Self-emp-not-inc': 0, ' State-gov': 0, ' Without-pay': 1}
salaries['Federal_gov'] = salaries['workclass'].map(Federal_gov)
salaries['Local_gov'] = salaries['workclass'].map(Local_gov)
salaries['Private'] = salaries['workclass'].map(Private)
salaries['Self_emp_inc'] = salaries['workclass'].map(Self_emp_inc)
salaries['Self_emp_not_inc'] = salaries['workclass'].map(Self_emp_not_inc)
salaries['State_gov'] = salaries['workclass'].map(State_gov)
salaries['Without_pay'] = salaries['workclass'].map(Without_pay)
salaries = salaries.drop(['workclass'], axis = 1)


salaries['age'] = salaries['age'] / max(salaries['age'])
salaries['education-num'] = salaries['education-num'] / max(salaries['education-num'])
salaries['capital-gain'] = salaries['capital-gain'] / max(salaries['capital-gain'])
salaries['capital-loss'] = salaries['capital-loss'] / max(salaries['capital-loss'])
salaries['hours-per-week'] = salaries['hours-per-week'] / max(salaries['hours-per-week'])


salaries = pd.get_dummies(salaries, columns = ['marital-status', 'occupation', 'relationship', 'race', 'native-country'], dtype = int)
# Delete columns because there are too few of them
salaries = salaries.drop(['occupation_ Armed-Forces', 'native-country_ Cambodia', \
                          'native-country_ Holand-Netherlands', 'native-country_ Honduras', 'native-country_ Hong', \
                         'native-country_ Hungary', 'native-country_ Ireland', 'native-country_ Laos', \
                          "native-country_ Outlying-US(Guam-USVI-etc)", 'native-country_ Scotland', 'native-country_ Thailand', \
                         'native-country_ Trinadad&Tobago', 'native-country_ Yugoslavia'], axis = 1)

Y_mapping = {' <=50K': -1, ' >50K': 1}
salaries['salary'] = salaries['salary'].map(Y_mapping)

In [4]:
X = salaries.drop('salary', axis = 1)
Y = salaries[['salary']]

In [5]:
def calc_error(X, Y, classifier):
    
    # Hint: Use classifier.predict()
    Y_pred = classifier.predict(X)

    # Hint: Use accuracy_score().
    precision = accuracy_score(Y, Y_pred)
    recall = recall_score(Y, Y_pred)
    f_value = (2 * precision * recall) / (precision + recall)
    return precision, recall, f_value

In [6]:
def SVM(X_train, Y_train):
    C_list = [1, 10, 100, 1000]
    opt_validation = 0.0
    opt_C          = None  # Optimal C.

    for C in C_list:
        classifier = svm.LinearSVC(penalty = 'l2', loss = 'hinge', max_iter = 100000, C = C)
    
        cv_results = cross_validate(classifier, X_train, Y_train)
        validation_accuracy = np.mean(cv_results['test_score'])
    
        # Judge if it is the optimal one.
        if validation_accuracy > opt_validation:
            opt_validation = validation_accuracy
            opt_classifier = classifier
            opt_C = C
            
    
    opt_classifier = svm.LinearSVC(penalty = 'l2', loss = 'hinge', max_iter = 100000, C = opt_C)
    opt_classifier.fit(X_train, Y_train)
    opt_error = calc_error(X_train, Y_train, opt_classifier)
    
    test_error = calc_error(X_test, Y_test, opt_classifier)

    print('Best parameter C* = {}'.format(opt_C))
    print('Training accuracy: {}'.format(opt_error[0]))
    print('Training recall: {}'.format(opt_error[1]))
    print('Training F-value: {}'.format(opt_error[2]))
    
    print('Validation accuracy: {}'.format(opt_validation))

    print('Testing accuracy: {}'.format(test_error[0]))
    print('Testing recall: {}'.format(test_error[1]))
    print('Testing F-value: {}'.format(test_error[2]))

In [7]:
def KNN(X_train, Y_train):
    opt_validation = 0.0
    opt_n          = None  # Optimal C.
    n_list = [3,4,5,6,7,8]

    for n in n_list:
        classifier = neighbors.KNeighborsClassifier(n_neighbors = n)
    
        cv_results = cross_validate(classifier, X_train, Y_train)
        validation_accuracy = np.mean(cv_results['test_score'])
    
    
        # Judge if it is the optimal one.
        if validation_accuracy > opt_validation:
            opt_validation = validation_accuracy
            opt_classifier = classifier
            opt_n = n
            
    
    opt_classifier = neighbors.KNeighborsClassifier(n_neighbors = opt_n)
    opt_classifier.fit(X_train, Y_train)
    opt_error = calc_error(X_train, Y_train, opt_classifier)
    
    test_error = calc_error(X_test, Y_test, opt_classifier)

    print('Best parameter C* = {}'.format(opt_n))
    print('Training accuracy: {}'.format(opt_error[0]))
    print('Training recall: {}'.format(opt_error[1]))
    print('Training F-value: {}'.format(opt_error[2]))
    
    print('Validation accuracy: {}'.format(opt_validation))

    print('Testing accuracy: {}'.format(test_error[0]))
    print('Testing recall: {}'.format(test_error[1]))
    print('Testing F-value: {}'.format(test_error[2]))

In [8]:
def LogisticRegression(X_train, Y_train):
    C_list = [1, 10, 100, 1000]
    opt_validation = 0.0
    opt_C          = None  # Optimal C.

    for C in C_list:
        classifier = linear_model.LogisticRegression(penalty = 'l2', C = C)
    
        cv_results = cross_validate(classifier, X_train, Y_train)
        validation_accuracy = np.mean(cv_results['test_score'])
    
        # Judge if it is the optimal one.
        if validation_accuracy > opt_validation:
            opt_validation = validation_accuracy
            opt_classifier = classifier
            opt_C = C
            
    
    opt_classifier = linear_model.LogisticRegression(penalty = 'l2', C = C)
    opt_classifier.fit(X_train, Y_train)
    opt_error = calc_error(X_train, Y_train, opt_classifier)
    
    test_error = calc_error(X_test, Y_test, opt_classifier)

    print('Best parameter C* = {}'.format(opt_C))
    print('Training accuracy: {}'.format(opt_error[0]))
    print('Training recall: {}'.format(opt_error[1]))
    print('Training F-value: {}'.format(opt_error[2]))
    
    print('Validation accuracy: {}'.format(opt_validation))

    print('Testing accuracy: {}'.format(test_error[0]))
    print('Testing recall: {}'.format(test_error[1]))
    print('Testing F-value: {}'.format(test_error[2]))

# 20/80 Partition
# Seed 1

In [9]:
# Seed 1, 20/80 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.2, random_state=1)

In [10]:
# car_eval svm
# Partition: [20, 80]
# Seed 1
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 10
Training accuracy: 0.8469827586206896
Training recall: 0.6369509043927648
Training F-value: 0.7271031684974124
Validation accuracy: 0.8406839044215542
Testing accuracy: 0.8457936179030253
Testing recall: 0.6273489932885906
Testing F-value: 0.720375299295935


In [11]:
# car_eval KNN
# Partition: [20, 80]
# Seed 1
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 4
Training accuracy: 0.8736737400530504
Training recall: 0.6052971576227391
Training F-value: 0.7151354125693776
Validation accuracy: 0.8153176399142096
Testing accuracy: 0.8150849564857024
Testing recall: 0.4686241610738255
Testing F-value: 0.5951013336466316


In [12]:
# car_eval LR
# Partition: [20, 80]
# Seed 1
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html

Best parameter C* = 100
Training accuracy: 0.8453249336870027
Training recall: 0.6253229974160207
Training F-value: 0.7188683438696183
Validation accuracy: 0.8410151671908339
Testing accuracy: 0.8479486116866971
Testing recall: 0.6303691275167785
Testing F-value: 0.7231471453708038


# 20/80 Partition
# Seed 2

In [13]:
# Seed 2, 20/80 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.2, random_state=2)

In [14]:
# car_eval svm
# Partition: [20, 80]
# Seed 2
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 100
Training accuracy: 0.8502984084880637
Training recall: 0.5865845311430528
Training F-value: 0.694241513372971
Validation accuracy: 0.8416787919007558
Testing accuracy: 0.8457936179030253
Testing recall: 0.5930213328923433
Testing F-value: 0.6972038459336587


In [15]:
# car_eval KNN
# Partition: [20, 80]
# Seed 2
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 7
Training accuracy: 0.8577586206896551
Training recall: 0.6399726214921286
Training F-value: 0.7330314246374042
Validation accuracy: 0.8192982890023783
Testing accuracy: 0.8176129299627021
Testing recall: 0.5437406978667108
Testing F-value: 0.6531284980400014


In [16]:
# car_eval LR
# Partition: [20, 80]
# Seed 2
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Best parameter C* = 100
Training accuracy: 0.8512931034482759
Training recall: 0.5995893223819302
Training F-value: 0.7036080194477403
Validation accuracy: 0.8459897419832624
Testing accuracy: 0.844218814753419
Testing recall: 0.594013560443195
Testing F-value: 0.6973524342702379


# 20/80 Partition
# Seed 3

In [17]:
# Seed 1, 20/80 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.2, random_state=3)

In [18]:
# car_eval svm
# Partition: [20, 80]
# Seed 3
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 10
Training accuracy: 0.8463196286472149
Training recall: 0.6175882744836776
Training F-value: 0.7140846469918484
Validation accuracy: 0.8376989671911088
Testing accuracy: 0.8470783257355988
Testing recall: 0.610121524887631
Testing F-value: 0.7093340279660693


In [19]:
# car_eval KNN
# Partition: [20, 80]
# Seed 3
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 8
Training accuracy: 0.8499668435013262
Training recall: 0.5429713524317122
Training F-value: 0.6626390860491764
Validation accuracy: 0.8138246904115161
Testing accuracy: 0.8211355159552425
Testing recall: 0.48659896787081736
Testing F-value: 0.6110777065033601


In [20]:
# car_eval LR
# Partition: [20, 80]
# Seed 3
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html

Best parameter C* = 10
Training accuracy: 0.8471485411140584
Training recall: 0.6162558294470353
Training F-value: 0.7134873140619682
Validation accuracy: 0.841014617605153
Testing accuracy: 0.8464152507252383
Testing recall: 0.6059597136673881
Testing F-value: 0.7062825448628751


# 50/50 Partition
# Seed 1

In [21]:
# Seed 1, 50/50 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.5, random_state=1)

In [22]:
# car_eval svm
# Partition: [50, 50]
# Seed 1
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 100
Training accuracy: 0.8488163914859758
Training recall: 0.6051571313456889
Training F-value: 0.7065703528224739
Validation accuracy: 0.8468934218034146
Testing accuracy: 0.8470923678801141
Testing recall: 0.6031704095112286
Testing F-value: 0.7046185813954887


In [23]:
# car_eval KNN
# Partition: [50, 50]
# Seed 1
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 7
Training accuracy: 0.8638021351369273
Training recall: 0.6578028471662638
Training F-value: 0.7468581011364592
Validation accuracy: 0.8224924807171388
Testing accuracy: 0.8253431470061667
Testing recall: 0.5788639365918098
Testing F-value: 0.6804714043898744


In [24]:
# car_eval LR
# Partition: [50, 50]
# Seed 1
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Best parameter C* = 1000
Training accuracy: 0.8487500828857503
Training recall: 0.607037335482138
Training F-value: 0.7078272312352256
Validation accuracy: 0.8466284115916087
Testing accuracy: 0.8483522312843976
Testing recall: 0.6110964332892999
Testing F-value: 0.7104395451447355


# 50/50 Partition
# Seed 2

In [25]:
# Seed 2, 50/50 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.5, random_state=2)

In [26]:
# car_eval svm
# Partition: [50, 50]
# Seed 2
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 10
Training accuracy: 0.8470260592798886
Training recall: 0.5914732865623313
Training F-value: 0.6965498990795741
Validation accuracy: 0.845435085356279
Testing accuracy: 0.8486174656852994
Testing recall: 0.5983692793266702
Testing F-value: 0.701853867171309


In [27]:
# car_eval KNN
# Partition: [50, 50]
# Seed 2
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 8
Training accuracy: 0.8563755719116769
Training recall: 0.555045871559633
Training F-value: 0.6735447132290333
Validation accuracy: 0.8259399653071146
Testing accuracy: 0.8231549631987268
Testing recall: 0.4889531825355076
Testing F-value: 0.6134924781686992


In [28]:
# car_eval LR
# Partition: [50, 50]
# Seed 2
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Best parameter C* = 1000
Training accuracy: 0.8462303560771832
Training recall: 0.5971397733405289
Training F-value: 0.7001915762600143
Validation accuracy: 0.84490462533706
Testing accuracy: 0.8494794774882303
Testing recall: 0.6125723303524461
Testing F-value: 0.711831988881545


# 50/50 Partition
# Seed 3

In [29]:
# Seed 3, 50/50 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.5, random_state=3)

In [30]:
# car_eval svm
# Partition: [50, 50]
# Seed 3
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 100
Training accuracy: 0.8460977388767323
Training recall: 0.5996793158738642
Training F-value: 0.7018887338747384
Validation accuracy: 0.8443731762277246
Testing accuracy: 0.8499436376898084
Testing recall: 0.6096654275092936
Testing F-value: 0.7100274499326671


In [31]:
# car_eval KNN
# Partition: [50, 50]
# Seed 3
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 8
Training accuracy: 0.8548504741064916
Training recall: 0.5689470871191876
Training F-value: 0.6831935949471031
Validation accuracy: 0.8214965329094459
Testing accuracy: 0.823022345998276
Testing recall: 0.5061072756240043
Testing F-value: 0.6267825056858722


In [32]:
# car_eval LR
# Partition: [50, 50]
# Seed 3
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html

Best parameter C* = 100
Training accuracy: 0.8486837742855249
Training recall: 0.6114377338321753
Training F-value: 0.7107864390796288
Validation accuracy: 0.847224437295643
Testing accuracy: 0.8479543796830449
Testing recall: 0.6141795007966012
Testing F-value: 0.7123782638032836


# 80/20 Partition
# Seed 1

In [33]:
# Seed 1, 80/20 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.8, random_state=1)

In [34]:
# car_eval svm
# Partition: [80, 20]
# Seed 1
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 100
Training accuracy: 0.8478594222719549
Training recall: 0.6022155085599195
Training F-value: 0.704231322553668
Validation accuracy: 0.8463675041710594
Testing accuracy: 0.849494447207028
Testing recall: 0.6012903225806452
Testing F-value: 0.7041606733524461


In [35]:
# car_eval KNN
# Partition: [80, 20]
# Seed 1
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 8
Training accuracy: 0.8575158522939201
Training recall: 0.5755287009063444
Training F-value: 0.6887782844925237
Validation accuracy: 0.8271790238110064
Testing accuracy: 0.8289408254599702
Testing recall: 0.5270967741935484
Testing F-value: 0.6444246608042846


In [36]:
# car_eval LR
# Partition: [80, 20]
# Seed 1
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Best parameter C* = 10
Training accuracy: 0.8473206514981972
Training recall: 0.6028868747901981
Training F-value: 0.7045039972097826
Validation accuracy: 0.8469891627604362
Testing accuracy: 0.8493286921929388
Testing recall: 0.6167741935483871
Testing F-value: 0.7146074457386088


# 80/20 Partition
# Seed 2

In [37]:
# Seed 2, 80/20 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.8, random_state=2)

In [38]:
# car_eval svm
# Partition: [80, 20]
# Seed 2
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 100
Training accuracy: 0.8485225247627336
Training recall: 0.6082232993481531
Training F-value: 0.7085534909940886
Validation accuracy: 0.847196201920083
Testing accuracy: 0.8450190618266202
Testing recall: 0.600655737704918
Testing F-value: 0.702184956285687


In [39]:
# car_eval KNN
# Partition: [80, 20]
# Seed 2
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 8
Training accuracy: 0.8594637158605827
Training recall: 0.5828179842888184
Training F-value: 0.6946089801948605
Validation accuracy: 0.828380881623503
Testing accuracy: 0.8244654400795625
Testing recall: 0.5095081967213114
Testing F-value: 0.6298053995150815


In [40]:
# car_eval LR
# Partition: [80, 20]
# Seed 2
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Best parameter C* = 100
Training accuracy: 0.8479423100833022
Training recall: 0.6103961223466489
Training F-value: 0.7098224754127757
Validation accuracy: 0.8467818401619895
Testing accuracy: 0.8488314271506713
Testing recall: 0.6026229508196721
Testing F-value: 0.7048451637774628


# 80/20 Partition
# Seed 3

In [41]:
# Seed 3, 80/20 partition

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.8, random_state=3)

In [42]:
# car_eval svm
# Partition: [80, 20]
# Seed 3
np.random.seed(1)
SVM(X_train, Y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


Best parameter C* = 1000
Training accuracy: 0.84848108085706
Training recall: 0.5954980682009071
Training F-value: 0.6998284495797985
Validation accuracy: 0.8479420152928115
Testing accuracy: 0.8468423669816012
Testing recall: 0.6115755627009646
Testing F-value: 0.7102327619059313


In [43]:
# car_eval KNN
# Partition: [80, 20]
# Seed 3
KNN(X_train, Y_train)

  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)
  return self._fit(X, y)


Best parameter C* = 7
Training accuracy: 0.8656803017116333
Training recall: 0.6527801108684697
Training F-value: 0.7443050587901444
Validation accuracy: 0.828214923911713
Testing accuracy: 0.824133930051384
Testing recall: 0.5652733118971061
Testing F-value: 0.6705894456597266


In [44]:
# car_eval LR
# Partition: [80, 20]
# Seed 3
LogisticRegression(X_train, Y_train)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/

Best parameter C* = 10
Training accuracy: 0.8501802809896805
Training recall: 0.6086007055266253
Training F-value: 0.7093872536285083
Validation accuracy: 0.8485637597727337
Testing accuracy: 0.8448533068125311
Testing recall: 0.6186495176848874
Testing F-value: 0.7142700130470115
