In [32]:
import pickle
import numpy as np

X_train, y_train, X_train_skel_features = None, None, None
with open("train_info", "rb") as fin:
    data = pickle.load(fin)
    X_train, y_train, X_train_skel_features = data["data"], data["labels"], data["skel_features"]

X_test, y_test, X_test_skel_features = None, None, None
with open("test_info", "rb") as fin:
    data = pickle.load(fin)
    X_test, y_test, X_test_skel_features = data["data"], data["labels"], data["skel_features"]

In [5]:
def get_edges_ged_rad(skel_features):
    edges_x = []
    edges_y = []
    degs = set()
    rads = set()
    for i, d in enumerate(skel_features):
        if i % 4 == 0:
            edges_x.append(d)
        if i % 4 == 1:
            edges_y.append(d)
        if i % 4 == 2:
            degs.add((edges_x[-1], edges_y[-1], d))
        if i % 4 == 3:
            rads.add((edges_x[-1], edges_y[-1], d))
    
    return edges_x, edges_y, list(degs), list(rads)

In [6]:
def draw_skeleton(image, features):
    res_x, res_y, res_deg, res_rad = get_edges_ged_rad(features)
    
    fig = plt.figure(figsize=(18, 8))

    ax = fig.add_subplot(121)
    ax.imshow(image.reshape((s, s)), cmap="gray", interpolation="none")

    for i in range(0, len(res_x), 2):
        ax.plot(res_x[i:i + 2], s - np.array(res_y[i:i + 2]), color="black")

    for i in range(len(res_deg)):
        x, y, d = res_deg[i]
        ax.scatter([x], [s - y], color="blue", s=5)
        ax.text(x, s - y, str(int(d)), fontsize=16, color="red")

    ax = fig.add_subplot(122)

    ax.imshow(image.reshape((s, s)), cmap="gray", interpolation="none")

    for i in range(0, len(res_x), 2):
        ax.plot(res_x[i:i + 2], s - np.array(res_y[i:i + 2]), color="black")

    for i in range(len(res_rad)):
        x, y, r = res_deg[i]
        circle = plt.Circle((x, s - y), r, color="cyan", ls="-", linewidth=2, fill=False)
        ax.scatter([x], [s - y], color="blue", s=5)
        ax.add_patch(circle)

    fig.show()

In [7]:
import matplotlib.pyplot as plt
from skimage import io
%matplotlib inline

In [8]:
s = int(np.sqrt(X_train.shape[1]))

**ВНИМАНИЕ! Здесь отрисовщик автоматически отражает картинку симметрично по оси Y**

In [None]:
for i in range(10):
    idx1, idx2 = np.where(y_train == i)[0][:2]
    draw_skeleton(X_train[idx1, :], X_train_skel_features[idx1])
#     res_x, res_y, res_deg, res_rad = get_edges_ged_rad(X_train_skel_features[idx1])
#     y = np.unique(res_deg, axis=0)
#     z = [] 
#     for i in y:
#         z.append(tuple(i))
#     print(len(res_deg), len(z))
    draw_skeleton(X_train[idx2, :], X_train_skel_features[idx2])

## Data

In [9]:
import pandas as pd
import math
from tqdm import tqdm_notebook as tqdm

In [12]:
def add_stat_features(df, array, name):
    df[name + '_min'] = [array.min()]
    df[name + '_max'] = [array.max()]
    df[name + '_std'] = [array.std()]
    df[name + '_mean'] = [array.mean()]
    

def get_features(skelet):
    try:
        res_x, res_y, res_deg, res_rad = skelet
        res_x, res_y = np.array(res_x), np.array(res_y)

        vectors = list(zip(res_x[1::2] - res_x[:-1:2], res_y[1::2] - res_y[:-1:2]))

        features = pd.DataFrame()
        features['num_of_points'] = [len(vectors)]

        vec_x = np.array(list(zip(*vectors))[0])
        vec_y = np.array(list(zip(*vectors))[1])
        add_stat_features(features, vec_x, 'vec_x')
        add_stat_features(features, vec_y, 'vec_y')


        deg_x, deg_y, deg_r = zip(*res_deg)
        deg_r = np.array(deg_r)
        add_stat_features(features, deg_r, 'radius')

        deg_x, deg_y = np.array(deg_x), np.array(deg_y)
        add_stat_features(features, deg_x, 'x_coord')
        add_stat_features(features, deg_y, 'y_coord')

        vec_lenghts = np.apply_along_axis(lambda x: math.sqrt(x[0] ** 2 + x[1] ** 2), 1, vectors)
        add_stat_features(features, vec_lenghts, 'vec_len')

        vec_angles = np.apply_along_axis(lambda x: math.atan2(x[1], x[0]), 1, vectors)
        add_stat_features(features, vec_angles, 'vec_angles')
    
        ## hist
        num_of_bins = 10
        for i in range(num_of_bins + 1):
            features['angle_hist_' + str(i)] = 0
        for angle in vec_angles:
            bar_long = 2 * math.pi / num_of_bins
            bar_num = int((angle + math.pi) / bar_long)
            feature_name = 'angle_hist_' + str(bar_num)
            features[feature_name] = features[feature_name][0] + 1
        for i in range(num_of_bins + 1):
            features['angle_norm_hist_' + str(i)] = features['angle_hist_' + str(i)] / len(vectors)
        return features
    except Exception:
        raise Exception
        return None

In [36]:
train = pd.DataFrame()
for i in tqdm(range(10), desc='class'):
    indexes = np.where(y_train == i)[0]
    for ind in tqdm(indexes, desc='instance', leave=False):
        skelet = get_edges_ged_rad(X_train_skel_features[ind])
        y = y_train[ind]
        to_add = get_features(skelet)
        if to_add is not None:
            to_add['target'] = y
            train = pd.concat([train, to_add], ignore_index=True)

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget

A Jupyter Widget




In [37]:
from sklearn.model_selection import train_test_split

In [38]:
X, y = train.drop(['target'], axis=1), train.target
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.33, random_state=42)

## LightGDM

In [59]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb

gbm = lgb.LGBMClassifier(num_class=10, learning_rate=0.3, n_jobs=7, n_estimators=300, num_leaves=50, random_state=1312, reg_lambda=0.8)

gbm.fit(Xtrain, ytrain,
        eval_set=[(Xtest, ytest)],
        early_stopping_rounds=5)

[1]	valid_0's multi_logloss: 1.38468
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's multi_logloss: 1.09052
[3]	valid_0's multi_logloss: 0.899727
[4]	valid_0's multi_logloss: 0.766735
[5]	valid_0's multi_logloss: 0.667432
[6]	valid_0's multi_logloss: 0.591289
[7]	valid_0's multi_logloss: 0.530863
[8]	valid_0's multi_logloss: 0.48342
[9]	valid_0's multi_logloss: 0.445269
[10]	valid_0's multi_logloss: 0.413328
[11]	valid_0's multi_logloss: 0.387552
[12]	valid_0's multi_logloss: 0.366013
[13]	valid_0's multi_logloss: 0.347165
[14]	valid_0's multi_logloss: 0.331258
[15]	valid_0's multi_logloss: 0.31744
[16]	valid_0's multi_logloss: 0.30547
[17]	valid_0's multi_logloss: 0.29505
[18]	valid_0's multi_logloss: 0.286046
[19]	valid_0's multi_logloss: 0.278163
[20]	valid_0's multi_logloss: 0.271469
[21]	valid_0's multi_logloss: 0.265607
[22]	valid_0's multi_logloss: 0.260755
[23]	valid_0's multi_logloss: 0.255874
[24]	valid_0's multi_logloss: 0.251208
[25]	valid_0's mul

LGBMClassifier(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
        importance_type='split', learning_rate=0.3, max_depth=-1,
        min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
        n_estimators=300, n_jobs=7, num_class=10, num_leaves=50,
        objective=None, random_state=1312, reg_alpha=0.0, reg_lambda=0.8,
        silent=True, subsample=1.0, subsample_for_bin=200000,
        subsample_freq=0)

In [56]:
y_pred = gbm.predict(Xtest, num_iteration=gbm.best_iteration_)
print('Accuracy:', accuracy_score(ytest, y_pred))

Accuracy: 0.9371212121212121


  if diff:
