In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as sk
from sklearn import linear_model
import warnings

In [2]:
sample = pd.read_table(filepath_or_buffer = 'in.dta',
                       delim_whitespace = True,
                       names = ['x', 'y', 'sign'],
                       dtype = {'x': np.float64, 'y': np.float64, 'sign': np.int32})
sample.head()

Unnamed: 0,x,y,sign
0,-0.77947,0.838221,1
1,0.155635,0.895377,1
2,-0.059908,-0.71778,1
3,0.207596,0.758933,1
4,-0.195983,-0.375487,-1


In [3]:
test = pd.read_table(filepath_or_buffer = 'out.dta',
                       delim_whitespace = True,
                       names = ['x', 'y', 'sign'],
                       dtype = {'x': np.float64, 'y': np.float64, 'sign': np.int32})
test.head()

Unnamed: 0,x,y,sign
0,-0.106006,-0.081467,-1
1,0.17793,-0.345951,-1
2,0.102162,0.718258,1
3,0.694078,0.623397,-1
4,0.023541,0.727432,1


In [4]:
transformations = [
    lambda x: 1,
    lambda x: x[0],
    lambda x: x[1],
    lambda x: x[0]**2,
    lambda x: x[1]**2,
    lambda x: x[0] * x[1],
    lambda x: np.abs(x[0] - x[1]),
    lambda x: np.abs(x[0] + x[1])
]

def create_func(n, vector):
    def func(p):
        return int(np.sign(sum([transformations[i](p) * vector[i] for i in range(n + 1)])))
    return func

In [5]:
def run_experiment(training_size, from_head=True):
    for i, t in enumerate(transformations):
        sample[i] = sample[['x', 'y']].apply(t, axis = 1)

    if from_head:
        training = sample[:training_size]
        validation = sample[training_size:]
    else:
        training = sample[35 - training_size:]
        validation = sample[:35 - training_size]

    regr = sk.linear_model.LinearRegression()

    print("Training size: ", training_size)
    print('{0:10}{1:25}{2:25}'.format('Model', 'E_val', 'E_out'))

    for i, t in enumerate(transformations[1:]):
        regr.fit(training.iloc[:, 4: i + 5], training.sign)
        w = np.append(regr.intercept_, regr.coef_)
        f = create_func(i + 1, w)
        e_val = 1 - sk.metrics.accuracy_score(validation.sign, validation[['x', 'y']].apply(f, axis = 1))
        e_out = 1 - sk.metrics.accuracy_score(test.sign, test[['x', 'y']].apply(f, axis = 1))

        print('{0:<10}{1:<25}{2:<25}'.format(i+1,e_val, e_out))

In [6]:
run_experiment(25)

Training size:  25
Model     E_val                    E_out                    
1         0.5                      0.488                    
2         0.7                      0.648                    
3         0.30000000000000004      0.42000000000000004      
4         0.5                      0.41600000000000004      
5         0.19999999999999996      0.18799999999999994      
6         0.0                      0.08399999999999996      
7         0.09999999999999998      0.07199999999999995      


In [7]:
run_experiment(10, False)

Training size:  10
Model     E_val                    E_out                    
1         0.43999999999999995      0.472                    
2         0.43999999999999995      0.5                      
3         0.28                     0.396                    
4         0.36                     0.388                    
5         0.19999999999999996      0.28400000000000003      
6         0.07999999999999996      0.19199999999999995      
7         0.12                     0.19599999999999995      
