In [1]:
import numpy as np
import pandas as pd
import sys

In [2]:
def activation(signal):
    """ Activation function maps received signal to set {-1, 1}
    f(x) = 1 for every x > 0
    f(x) = -1 for every x <= 0
    Works with numbers, lists and numpy arrays
    """
    if type(signal) in (list, np.ndarray):
        return list(map(
                lambda x: 1 if x==True else -1,
                np.array(signal)>0)
        )
    elif type(signal) in (int, float, np.int, np.int64, np.float, np.float64):
        return 1 if signal > 0 else -1
    else:
        return sys.exit("output signal must be number or array")

In [3]:
class Perceptron(object):
    """Class represents single layer perceptron"""
    def __init__(self, activation=None, velocity=0.1):
        self.weights = np.array([], dtype=float)
        self.velocity = velocity # 0 < velocity < 1
        self.activation = activation
            
    def fit(self, X, y):
        # Insert one as bias in start of each feature set
        X = np.insert(X, 0, 1, axis=1)
        self.weights = np.random.uniform(low=0, high=1, size=len(X[0]))
        # Fitting
        for i, features in enumerate(X):
            output_signal = self.activation(features @ self.weights)
            if output_signal != y[i]:
                self.weights += self.velocity * y[i] * features
        return self.weights
            
    def predict(self, X):
        X = np.insert(X.to_numpy(), 0, 1, axis=1) # for bias
        return self.activation(X @ self.weights)
    
    def evaluate_error(self, X, y):
        """ Accuracy
        score = right_predicted / all
        """
        X = np.insert(X, 0, 1, axis=1) # for bias
        right_predicted = 0
        
        for i, features in enumerate(X):
            res = self.activation(features @ self.weights)
            if res == y[i]:
                right_predicted += 1
        
        return 100 * right_predicted / len(X)

### Подготовка данных

In [4]:
data = pd.read_csv("weatherAUS.csv")
for column in data.columns:
    data = data[pd.notnull(data[column])]

tmp = {"Yes": 1, "No": -1}
data["RainTomorrow"] = list(map(lambda x: tmp[x], data["RainTomorrow"]));

data = data._get_numeric_data() # it's illigal

X_train = data[:50000]
del X_train["RainTomorrow"]
y_train = data["RainTomorrow"][:50000]

X_test = data[50000:]
del X_test["RainTomorrow"]
y_test = data["RainTomorrow"][50000:]

### Проверка

In [5]:
p = Perceptron(activation=activation, velocity=0.888889)
p.fit(X_train.to_numpy(), y_train.to_numpy())

array([ 9.20053997e+00,  2.69214025e+03,  5.80319230e+03, -1.31664867e+04,
        4.08852507e+03, -7.52745784e+03,  1.30841320e+04, -6.61623205e+02,
       -4.43320480e+02, -1.04463525e+04,  1.75160648e+04,  1.07266240e+03,
       -4.18417677e+03,  1.99945971e+03,  4.52897009e+03,  4.84554105e+03,
        3.76067896e+03,  3.28468653e+04])

In [6]:
p.evaluate_error(X_test.to_numpy(), y_test.to_numpy())

81.4797507788162

### Тюнинг модели

In [7]:
%%time

velocity_range = np.linspace(0, 1, 100)
max = (-1, 0)

for v in velocity_range:
    p = Perceptron(activation=activation, velocity=0.1)
    p.fit(X_train.to_numpy(), y_train.to_numpy())
    score = p.evaluate_error(X_test.to_numpy(), y_test.to_numpy())
    print("Velocity: %f\tScore: %f" % (v, score))
    if score > max[1]:
        max = (v, score)
print("\nBest velocity: %f\tScore: %f" % max)

Velocity: 0.000000	Score: 88.816199
Velocity: 0.010101	Score: 81.542056
Velocity: 0.020202	Score: 81.526480
Velocity: 0.030303	Score: 81.588785
Velocity: 0.040404	Score: 81.542056
Velocity: 0.050505	Score: 90.342679
Velocity: 0.060606	Score: 81.526480
Velocity: 0.070707	Score: 81.542056
Velocity: 0.080808	Score: 81.542056
Velocity: 0.090909	Score: 81.510903
Velocity: 0.101010	Score: 90.467290
Velocity: 0.111111	Score: 81.526480
Velocity: 0.121212	Score: 81.526480
Velocity: 0.131313	Score: 81.697819
Velocity: 0.141414	Score: 81.573209
Velocity: 0.151515	Score: 81.510903
Velocity: 0.161616	Score: 81.526480
Velocity: 0.171717	Score: 81.542056
Velocity: 0.181818	Score: 81.573209
Velocity: 0.191919	Score: 81.542056
Velocity: 0.202020	Score: 81.542056
Velocity: 0.212121	Score: 82.274143
Velocity: 0.222222	Score: 81.744548
Velocity: 0.232323	Score: 88.707165
Velocity: 0.242424	Score: 81.542056
Velocity: 0.252525	Score: 81.682243
Velocity: 0.262626	Score: 81.542056
Velocity: 0.272727	Score: 88