# Regularization

In [346]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor

In [347]:
np.random.seed(42)

In [348]:
df_iris = pd.read_csv('iris.csv')
print("df_iris.shape: {}".format(df_iris.shape))

df_iris.shape: (150, 5)


In [349]:
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [350]:
df_iris = df_iris.iloc[:100]

In [351]:
df_iris = df_iris.sample(frac=1).reset_index(drop=True)
df_iris['species'] = df_iris['species'].astype('category').cat.codes
df_iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,6.0,2.7,5.1,1.6,1
1,5.5,2.3,4.0,1.3,1
2,5.9,3.2,4.8,1.8,1
3,4.8,3.0,1.4,0.3,0
4,5.1,3.8,1.9,0.4,0


In [352]:
X_train = df_iris[['sepal_length', 'sepal_width']][:67].values
Y_train = df_iris['species'][:67]

X_test = df_iris[['sepal_length', 'sepal_width']][67:].values
Y_test = df_iris['species'][67:]

print("X_train.shape: {}".format(X_train.shape))
print("Y_train.shape: {}".format(Y_train.shape))
print("X_test.shape: {}".format(X_test.shape))
print("Y_test.shape: {}".format(Y_test.shape))

X_train.shape: (67, 2)
Y_train.shape: (67,)
X_test.shape: (33, 2)
Y_test.shape: (33,)


## Underfitting (недообучение)

## Regularization

In [424]:
def net_input(W, X, b):
    return (np.dot(W, X.T) + b).flatten()

In [425]:
def sigmoid(Z):
    return 1.0/(1 + np.exp(-Z))

In [426]:
def cost_function(W, X, b, Y, REGULARIZATION_PARAMETER):
    return -np.sum(Y * np.log(sigmoid(net_input(W, X, b))) + (1 - Y) * np.log(1 - sigmoid(net_input(W, X, b)))) / X.shape[0] + REGULARIZATION_PARAMETER * np.sum(W ** 2)

In [433]:
def cost_function_derivative_w(W, X, b, Y):
    return (np.dot(X.T, sigmoid(net_input(W, X, b)) - Y) / X.shape[0]).reshape(1, -1)

In [434]:
def cost_function_derivative_b(W, X, b, Y):
    return (np.sum(sigmoid(net_input(W, X, b)) - Y) / X.shape[0]).reshape(1, -1)

In [435]:
def accuracy(Y_hat, Y):
    return np.sum(Y_hat == Y) / Y.shape[0]

In [436]:
W = np.zeros((1, X_train.shape[1]))
X_train.shape
# (sigmoid(net_input(W, X_train, b)) - Y_train).shape
# np.dot(X_train.T, sigmoid(net_input(W, X_train, b)) - Y_train)

(67, 2)

In [438]:
# print(X_train.shape)
# print(Y_train.shape)

W = np.zeros((1, X_train.shape[1]))
print(W.shape)
b = 0

NUM_ITERATIONS = 1000
LEARNING_RATE = 10e-2
REGULARIZATION_PARAMETER = 0.1

for i in range(NUM_ITERATIONS):
    print('{} step'.format(i))
    print(cost_function(W, X_train, b, Y_train, REGULARIZATION_PARAMETER))
    W = W - LEARNING_RATE * (cost_function_derivative_w(W, X_train, b, Y_train) - REGULARIZATION_PARAMETER * W / X_train.shape[0])
    b = b - LEARNING_RATE * (cost_function_derivative_b(W, X_train, b, Y_train))
    Y_pred = np.array([1 if sigmoid(net_input(W, x, b)) > 0.5 else 0 for x in X_test])
    print('Accuracy: {}'.format(accuracy(Y_pred, Y_test)))
    
print(sigmoid(net_input(W, X_train, b)))

(1, 2)
0 step
0.6931471805599454
Accuracy: 0.3333333333333333
1 step
0.6789915197766154
Accuracy: 0.3333333333333333
2 step
0.6729464535167834
Accuracy: 0.3333333333333333
3 step
0.6671111705571422
Accuracy: 0.3333333333333333
4 step
0.6614866498088082
Accuracy: 0.3333333333333333
5 step
0.6560691654129969
Accuracy: 0.3333333333333333
6 step
0.6508547199319525
Accuracy: 0.3333333333333333
7 step
0.6458393583703845
Accuracy: 0.3333333333333333
8 step
0.6410191800227635
Accuracy: 0.36363636363636365
9 step
0.6363903395454239
Accuracy: 0.3939393939393939
10 step
0.6319490475607018
Accuracy: 0.3939393939393939
11 step
0.6276915711554166
Accuracy: 0.42424242424242425
12 step
0.6236142342897911
Accuracy: 0.42424242424242425
13 step
0.6197134181208581
Accuracy: 0.42424242424242425
14 step
0.6159855612440188
Accuracy: 0.45454545454545453
15 step
0.6124271598564324
Accuracy: 0.48484848484848486
16 step
0.6090347678459308
Accuracy: 0.5757575757575758
17 step
0.6058049968091388
Accuracy: 0.666666

Accuracy: 0.9696969696969697
159 step
0.8886064531920419
Accuracy: 0.9696969696969697
160 step
0.8926231442538253
Accuracy: 0.9696969696969697
161 step
0.8966479746476218
Accuracy: 0.9696969696969697
162 step
0.9006807765937576
Accuracy: 0.9696969696969697
163 step
0.9047213854453069
Accuracy: 0.9696969696969697
164 step
0.9087696396238869
Accuracy: 0.9696969696969697
165 step
0.912825380556908
Accuracy: 0.9696969696969697
166 step
0.9168884526162453
Accuracy: 0.9696969696969697
167 step
0.9209587030582986
Accuracy: 0.9696969696969697
168 step
0.9250359819654024
Accuracy: 0.9696969696969697
169 step
0.9291201421885555
Accuracy: 0.9696969696969697
170 step
0.9332110392914388
Accuracy: 0.9696969696969697
171 step
0.9373085314956873
Accuracy: 0.9696969696969697
172 step
0.9414124796273879
Accuracy: 0.9696969696969697
173 step
0.9455227470647718
Accuracy: 0.9696969696969697
174 step
0.9496391996870734
Accuracy: 0.9696969696969697
175 step
0.9537617058245262
Accuracy: 0.9696969696969697
176

Accuracy: 0.9696969696969697
318 step
1.567624017196175
Accuracy: 0.9696969696969697
319 step
1.5719390608669495
Accuracy: 0.9696969696969697
320 step
1.5762533428959884
Accuracy: 0.9696969696969697
321 step
1.58056685310968
Accuracy: 0.9696969696969697
322 step
1.5848795815226495
Accuracy: 0.9696969696969697
323 step
1.5891915183348726
Accuracy: 0.9696969696969697
324 step
1.5935026539288328
Accuracy: 0.9696969696969697
325 step
1.5978129788667266
Accuracy: 0.9696969696969697
326 step
1.6021224838877162
Accuracy: 0.9696969696969697
327 step
1.6064311599052232
Accuracy: 0.9696969696969697
328 step
1.6107389980042701
Accuracy: 0.9696969696969697
329 step
1.6150459894388651
Accuracy: 0.9696969696969697
330 step
1.6193521256294248
Accuracy: 0.9696969696969697
331 step
1.623657398160245
Accuracy: 0.9696969696969697
332 step
1.6279617987770054
Accuracy: 0.9696969696969697
333 step
1.632265319384325
Accuracy: 0.9696969696969697
334 step
1.6365679520433436
Accuracy: 0.9696969696969697
335 ste

Accuracy: 0.9696969696969697
483 step
2.2655131903965997
Accuracy: 0.9696969696969697
484 step
2.2696477309520264
Accuracy: 0.9696969696969697
485 step
2.27378113424149
Accuracy: 0.9696969696969697
486 step
2.277913401629526
Accuracy: 0.9696969696969697
487 step
2.2820445344961873
Accuracy: 0.9696969696969697
488 step
2.286174534236793
Accuracy: 0.9696969696969697
489 step
2.290303402261682
Accuracy: 0.9696969696969697
490 step
2.29443113999597
Accuracy: 0.9696969696969697
491 step
2.298557748879306
Accuracy: 0.9696969696969697
492 step
2.3026832303656364
Accuracy: 0.9696969696969697
493 step
2.3068075859229733
Accuracy: 0.9696969696969697
494 step
2.3109308170331557
Accuracy: 0.9696969696969697
495 step
2.3150529251916314
Accuracy: 0.9696969696969697
496 step
2.319173911907225
Accuracy: 0.9696969696969697
497 step
2.3232937787019186
Accuracy: 0.9696969696969697
498 step
2.327412527110633
Accuracy: 0.9696969696969697
499 step
2.3315301586810118
Accuracy: 0.9696969696969697
500 step
2.3

Accuracy: 0.9696969696969697
637 step
2.889872692916928
Accuracy: 0.9696969696969697
638 step
2.8938534715749604
Accuracy: 0.9696969696969697
639 step
2.897833409413393
Accuracy: 0.9696969696969697
640 step
2.9018125085271986
Accuracy: 0.9696969696969697
641 step
2.9057907710105226
Accuracy: 0.9696969696969697
642 step
2.909768198956645
Accuracy: 0.9696969696969697
643 step
2.91374479445796
Accuracy: 0.9696969696969697
644 step
2.9177205596059363
Accuracy: 0.9696969696969697
645 step
2.9216954964910964
Accuracy: 0.9696969696969697
646 step
2.9256696072029813
Accuracy: 0.9696969696969697
647 step
2.929642893830128
Accuracy: 0.9696969696969697
648 step
2.9336153584600355
Accuracy: 0.9696969696969697
649 step
2.937587003179143
Accuracy: 0.9696969696969697
650 step
2.9415578300727985
Accuracy: 0.9696969696969697
651 step
2.9455278412252333
Accuracy: 0.9696969696969697
652 step
2.9494970387195365
Accuracy: 0.9696969696969697
653 step
2.9534654246376295
Accuracy: 0.9696969696969697
654 step


Accuracy: 0.9696969696969697
794 step
3.5058473557998786
Accuracy: 0.9696969696969697
795 step
3.509720760853667
Accuracy: 0.9696969696969697
796 step
3.513593632224119
Accuracy: 0.9696969696969697
797 step
3.517465971686832
Accuracy: 0.9696969696969697
798 step
3.5213377810149145
Accuracy: 0.9696969696969697
799 step
3.5252090619789866
Accuracy: 0.9696969696969697
800 step
3.5290798163471804
Accuracy: 0.9696969696969697
801 step
3.532950045885139
Accuracy: 0.9696969696969697
802 step
3.536819752356018
Accuracy: 0.9696969696969697
803 step
3.540688937520484
Accuracy: 0.9696969696969697
804 step
3.544557603136719
Accuracy: 0.9696969696969697
805 step
3.548425750960417
Accuracy: 0.9696969696969697
806 step
3.552293382744783
Accuracy: 0.9696969696969697
807 step
3.5561605002405416
Accuracy: 0.9696969696969697
808 step
3.560027105195927
Accuracy: 0.9696969696969697
809 step
3.563893199356694
Accuracy: 0.9696969696969697
810 step
3.5677587844661116
Accuracy: 0.9696969696969697
811 step
3.57

Accuracy: 0.9696969696969697
944 step
4.081823401620426
Accuracy: 0.9696969696969697
945 step
4.0856352532069335
Accuracy: 0.9696969696969697
946 step
4.089446810555005
Accuracy: 0.9696969696969697
947 step
4.093258075089461
Accuracy: 0.9696969696969697
948 step
4.0970690482330205
Accuracy: 0.9696969696969697
949 step
4.100879731406299
Accuracy: 0.9696969696969697
950 step
4.104690126027827
Accuracy: 0.9696969696969697
951 step
4.108500233514039
Accuracy: 0.9696969696969697
952 step
4.112310055279289
Accuracy: 0.9696969696969697
953 step
4.116119592735841
Accuracy: 0.9696969696969697
954 step
4.119928847293886
Accuracy: 0.9696969696969697
955 step
4.123737820361541
Accuracy: 0.9696969696969697
956 step
4.127546513344845
Accuracy: 0.9696969696969697
957 step
4.131354927647774
Accuracy: 0.9696969696969697
958 step
4.1351630646722395
Accuracy: 0.9696969696969697
959 step
4.138970925818095
Accuracy: 0.9696969696969697
960 step
4.142778512483132
Accuracy: 0.9696969696969697
961 step
4.14658