In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import accuracy_score
import numpy as np

In [2]:
df_test = pd.read_table('http://work.caltech.edu/data/out.dta', 
                        delim_whitespace=True, header=None, names=['x1', 'x2', 'y'])
df_train = pd.read_table('http://work.caltech.edu/data/in.dta', 
                         delim_whitespace=True, header=None, names=['x1', 'x2', 'y'])

In [3]:
X_train, y_train = df_train[['x1', 'x2']], df_train[['y']]
X_test, y_test = df_test[['x1', 'x2']], df_test[['y']]

In [4]:
X_train['x0'] = np.ones(len(X_train))
X_train['x3'] = X_train['x1']**2
X_train['x4'] = X_train['x2']**2
X_train['x5'] = X_train['x1']*X_train['x2']
X_train['x6'] = np.abs(X_train['x1']-X_train['x2'])
X_train['x7'] = np.abs(X_train['x1']+X_train['x2'])

In [5]:
X_test['x0'] = np.ones(len(X_test))
X_test['x3'] = X_test['x1']**2
X_test['x4'] = X_test['x2']**2
X_test['x5'] = X_test['x1']*X_test['x2']
X_test['x6'] = np.abs(X_test['x1']-X_test['x2'])
X_test['x7'] = np.abs(X_test['x1']+X_test['x2'])

## Линейная регрессия:

In [6]:
lr = LinearRegression()
lr.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
y_pred_train = lr.predict(X_train)
y_pred_train = np.where(y_pred_train > 0, 1, -1)

In [8]:
y_pred_test = lr.predict(X_test)
y_pred_test = np.where(y_pred_test > 0, 1, -1)

### Значения ошибок:

In [9]:
print('E_in: ', 1 - accuracy_score(y_train, y_pred_train))
print('Е_out: ', 1 - accuracy_score(y_test, y_pred_test))

E_in:  0.02857142857142858
Е_out:  0.08399999999999996


## L2-регуляризация

In [10]:
ridge = Ridge(alpha=10e-3)
ridge.fit(X_train, y_train)

Ridge(alpha=0.01, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [11]:
y_pred_train = ridge.predict(X_train)
y_pred_train = np.where(y_pred_train > 0, 1, -1)

In [12]:
y_pred_test = ridge.predict(X_test)
y_pred_test = np.where(y_pred_test > 0, 1, -1)

### Значения ошибок

In [13]:
print('E_in: ', 1 - accuracy_score(y_train, y_pred_train))
print('Е_out: ', 1 - accuracy_score(y_test, y_pred_test))

E_in:  0.02857142857142858
Е_out:  0.07999999999999996
