In [4]:
import numpy
import sklearn, sklearn.datasets, sklearn.utils, sklearn.model_selection, sklearn.svm
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import cross_val_score


def study_C_fix_split(C_range): 
    """
    Examples 
    -----------
    >>> study_C_fix_split([1,10,100,1000])
    10
    >>> study_C_fix_split([10**p for p in range(-5,5)])
    10
    """
    best_C = 0 # placeholder

    # load the data
    data = sklearn.datasets.load_breast_cancer()
    X, y = data["data"], data["target"]

    # prepare the training and testing data
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=1)

    # Your code here
    score_count = 0
    for margin in C_range:
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'linear', random_state = 1, C = margin)
        classifier.fit(X_train, y_train)
    
        y_pred = classifier.predict(X_test)
        
        #cm = confusion_matrix(y_test, y_pred)
        #print(cm)
        score = accuracy_score(y_test, y_pred)
        if  score > score_count:
            score_count = score
            best_C = margin

    return best_C

def study_C_gridCV(C_range):
    """
    C_range: 1-D list of floats or integers 
    Examples
    --------------
    >>> study_C_gridCV([1,2,3,4,5])
    2
    >>> study_C_gridCV(numpy.array([0.1, 1, 10]))
    10.0
    """
    best_C = 0  #placeholder

    # load the data
    data = sklearn.datasets.load_breast_cancer()
    X, y = data["data"], data["target"]

    # shuffle the data
    X, y = sklearn.utils.shuffle(X, y, random_state=1)
        
    paras = {'C':C_range}
    
    # your code here
    accuracy = 0

    for margin in paras['C']:
        model = sklearn.svm.SVC(
                kernel='linear',
                random_state=1, C = margin)

   
        accuracies = cross_val_score(estimator = model, X = X, y = y, cv = 5)
        mean_accuracy = accuracies.mean()
        # print("Accuracy: {:.2f} %".format(mean_accuracy*100))
        # print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))
        
        if mean_accuracy > accuracy:
            accuracy = mean_accuracy
            best_C = margin

    return best_C
   

def study_C_and_sigma_gridCV(C_range, sigma_range):
    """
    Examples 
    ------------
    >>> study_C_and_sigma_gridCV([0.1, 0.5, 1, 3, 9, 100], numpy.array([0.1, 1, 10]))
    (0.1, 0.1)
    >>> study_C_and_sigma_gridCV([10**p for p in range(-5, 5, 1)], [10**p for p in range(-5, 5, 1)])
    (1000, 1e-05)
    """
    best_C, best_sigma = 0, 0 # placeholders

    # load the data
    data = sklearn.datasets.load_breast_cancer()
    X, y = data["data"], data["target"]

    # shuffle the data
    X, y = sklearn.utils.shuffle(X, y, random_state=1)

    # your code here
    parameters = {'C':C_range, 'gamma':sigma_range}
    svc = sklearn.svm.SVC(kernel='rbf', random_state=1)
    classifier = sklearn.model_selection.GridSearchCV(svc, parameters)
    classifier.fit(X,y)

    best_C = classifier.best_params_['C']
    best_sigma = classifier.best_params_['gamma']


    return best_C, best_sigma

In [5]:
# test above
study_C_fix_split([1,10,100,1000])

10

In [6]:
study_C_fix_split([10**p for p in range(-5,5)])

10

In [7]:
study_C_gridCV([1,2,3,4,5])

2

In [8]:
study_C_gridCV(numpy.array([0.1, 1, 10]))

10.0

In [9]:
study_C_and_sigma_gridCV([0.1, 0.5, 1, 3, 9, 100], numpy.array([0.1, 1, 10]))

(0.1, 0.1)

In [11]:
study_C_and_sigma_gridCV([10**p for p in range(-5, 5, 1)], [10**p for p in range(-5, 5, 1)])

(1000, 1e-05)