# Двухслойный перцептрон

Для работы нужен файл clouds.data.h5. Если нет в папке - запустить 26_image_preprocessing.ipynb

1. Построить двухслойный перцептрон для типа облака Fish
2. Оценить качество по коэффициенту сходства

## Подключение библиотек

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

## Используемые функции

In [2]:
image_x = 525
image_y = 350


def mask_rate(a, x, y):
    b = a // 1400
    return np.round(x*(b*x // 2100) + y*(a%1400) // 1400).astype("uint32")

def calc_mask(px, x=525, y=350):
    p = np.array([int(n) for n in px.split(" ")]).reshape(-1, 2)
    mask = np.zeros(y*x, dtype="uint8")
    for i, m in p:
        mask[mask_rate(i, x, y)-1:mask_rate(m + i, x, y)] = 1
    return mask.reshape(y, x).transpose()

def calc_dice(x):
    dice = 0
    px = x["EncodedPixels"]
    if px != px and x["target"] == 0:
        dice = 1
    elif px == px and x["target"] == 1:
        mask = calc_mask(px).flatten()
        target = np.ones(image_x*image_y, dtype="uint8")
        dice = 2*np.sum(target[mask==1]) / (np.sum(target) + np.sum(mask))
    
    return dice

## Загрузить данные

In [3]:
clouds = pd.read_hdf('../data/out/clouds.data.h5')
print(clouds.head())

    0   1    2    3    4    5    6    7    8    9  ...  183745  183746  \
0   0   0    0    0    0    0    0    0    0    0  ...     102     113   
1  72  43   32   39   49   69   71   47   25   22  ...      38      58   
2  97  98  100  102  105  107  109  110  111  112  ...       0       0   
3  88  87   90   94   94   89   87   87   82   82  ...     166     164   
4  18  19   21   21   21   20   18   17   15   16  ...     125     137   

   183747  183748  183749        Image  \
0     102      96     117  0011165.jpg   
1      83     111     134  002be4f.jpg   
2       0       0       0  0031ae9.jpg   
3     109      70      72  0035239.jpg   
4      93      60      49  003994e.jpg   

                                                Fish  \
0  264918 937 266318 937 267718 937 269118 937 27...   
1  233813 878 235213 878 236613 878 238010 881 23...   
2  3510 690 4910 690 6310 690 7710 690 9110 690 1...   
3                                                NaN   
4  2367966 18 2367985 

## Оставить данные только по Fish

In [4]:
clouds.drop(
    labels=[
        "Image",
        "Flower",
        "Gravel",
        "Sugar",
    ],
    axis=1,
    inplace=True,
)

## Разделение данных

In [5]:
clouds_train, clouds_test = train_test_split(clouds, test_size=0.2)
del clouds
print(clouds_train.head())

        0    1    2    3    4    5    6    7    8   9  ...  183741  183742  \
3686   27   31   37   41   40   35   27   22   34  46  ...      53      63   
1191   32   32   33   36   39   40   35   29   27  30  ...      61      85   
1349   46   81   59   40   86   48    9  118   66   6  ...     180     193   
4767   60   82   72  135  197  194  196  175  117  87  ...      59     133   
2437  131  198  213  177  117   68   57   44   29  32  ...     176     151   

      183743  183744  183745  183746  183747  183748  183749  \
3686      73      78      96      50      37      65      85   
1191      40      25      26      26      27      27      27   
1349     206     209     210     211     208     201     193   
4767     181      75      77     116     122     117      84   
2437      94      42      56      80      54      29      27   

                                                   Fish  
3686                                                NaN  
1191                          

## Двухслойный перцептрон

In [6]:
y = clouds_train["Fish"].notnull().astype("int8")
x = pd.DataFrame(clouds_train).drop(labels=["Fish"], axis=1)
model = MLPClassifier(
    hidden_layer_sizes=(31,),
    max_iter=20,
    activation="logistic",
    verbose=10,
    random_state=1,
    learning_rate_init=.02,
    warm_start=True,
)

In [7]:
for i in range(len(clouds_train) // 100):
    model.partial_fit(x[i:i + 100], y[i:i + 100], classes=[0, 1])

Iteration 1, loss = 0.73990315
Iteration 2, loss = 0.68337871
Iteration 3, loss = 0.68934231
Iteration 4, loss = 0.69334777
Iteration 5, loss = 0.69743538
Iteration 6, loss = 0.69750033
Iteration 7, loss = 0.69853543
Iteration 8, loss = 0.70023779
Iteration 9, loss = 0.70273096
Iteration 10, loss = 0.70604421
Iteration 11, loss = 0.70794807
Iteration 12, loss = 0.70958805
Iteration 13, loss = 0.70953672
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Iteration 14, loss = 0.71154238
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Iteration 15, loss = 0.71462263
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Iteration 16, loss = 0.71780139
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Iteration 17, loss = 0.71646315
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
Iteration 18, l

In [8]:
del x
del y

## Предсказать значения

In [11]:
result = pd.DataFrame({"EncodedPixels": clouds_test["Fish"]})
result["target"] = model.predict(clouds_test.drop(labels=["Fish"], axis=1))
print(result.head())

                                          EncodedPixels  target
2954  208010 312 209410 312 210810 312 212210 312 21...       0
3585                                                NaN       0
24    973171 62 973238 2 973244 2 973249 1 973265 8 ...       0
5425                                                NaN       0
2251                                                NaN       0


## Оценка по Дайсу
Считаем, что область облака - это все изображение

In [12]:
dice = result.apply(calc_dice, axis=1, result_type="expand")
print(f'MLP, Fish: {round(dice.mean(), 3)}')

MLP, Fish: 0.506
