In [133]:
# Зависимости
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.neural_network import MLPRegressor, MLPClassifier

from sklearn.metrics import mean_squared_error, f1_score

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [134]:
# Инициализируем все известные генераторы случайных чисел / Setting all known random seeds
my_code = "nosova"
seed_limit = 2 ** 32
my_seed = int.from_bytes(my_code.encode(), "little") % seed_limit

os.environ['PYTHONHASHSEED']=str(my_seed)

random.seed(my_seed)

np.random.seed(my_seed)

tf.compat.v1.set_random_seed(my_seed)

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [135]:
# Читаем данные из файла
example_data = pd.read_csv("datasets/Fish.csv")

In [136]:
example_data.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [137]:
# Определим размер валидационной и тестовой выборок
val_test_size = round(0.2*len(example_data))
print(val_test_size)

32


In [138]:
# Создадим обучающую, валидационную и тестовую выборки
random_state = my_seed
train_val, test = train_test_split(example_data, test_size=val_test_size, random_state=random_state)
train, val = train_test_split(train_val, test_size=val_test_size, random_state=random_state)
print(len(train), len(val), len(test))

95 32 32


In [139]:
# Значения в числовых столбцах преобразуем к отрезку [0,1].
# Для настройки скалировщика используем только обучающую выборку.
num_columns = ['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
ord_columns = ['Species']

ct = ColumnTransformer(transformers=[
    ('numerical', MinMaxScaler(), num_columns), 
    ('ordinal', OneHotEncoder(), ord_columns)])

ct.fit(train)

ColumnTransformer(transformers=[('numerical', MinMaxScaler(),
                                 ['Weight', 'Length1', 'Length2', 'Length3',
                                  'Height', 'Width']),
                                ('ordinal', OneHotEncoder(), ['Species'])])

In [140]:
# Преобразуем значения, тип данных приводим к DataFrame
sc_train = pd.DataFrame(ct.transform(train))
sc_test = pd.DataFrame(ct.transform(test))
sc_val = pd.DataFrame(ct.transform(val))

In [141]:
sc_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.555988,0.533981,0.547273,0.596284,0.964600,0.741247,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.087647,0.262136,0.265455,0.265203,0.206429,0.363442,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.172800,0.320388,0.325455,0.378378,0.635926,0.459235,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.099811,0.271845,0.274545,0.273649,0.268918,0.377396,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.178882,0.343689,0.343636,0.336149,0.390051,0.576469,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,0.105894,0.300971,0.301818,0.298986,0.278648,0.371547,0.0,0.0,1.0,0.0,0.0,0.0,0.0
91,0.143604,0.304854,0.309091,0.358108,0.579145,0.418978,1.0,0.0,0.0,0.0,0.0,0.0,0.0
92,0.413053,0.464078,0.465455,0.513514,0.843745,0.609326,1.0,0.0,0.0,0.0,0.0,0.0,0.0
93,0.057235,0.168932,0.174545,0.175676,0.206660,0.320535,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [142]:
# Устанавливаем названия столбцов
column_names = num_columns + list(range(7))
sc_train.columns = column_names
sc_test.columns = column_names
sc_val.columns = column_names

In [143]:
# В качестве входных параметров используем первые 5 числовых параметров,
# в качестве выходного - шестой числовой параметр.
x_labels = num_columns[:-1]
y_labels = num_columns[-1]
print(x_labels)
print(y_labels)

['Weight', 'Length1', 'Length2', 'Length3', 'Height']
Width


In [144]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_labels]
y_test = sc_test[y_labels]
y_val = sc_val[y_labels]

In [145]:
# Создадим нейроннную сеть для решения задачи регрессии на базе библиотеки sklearn
reg = MLPRegressor(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)
reg.get_params()

{'activation': 'relu',
 'alpha': 0.0,
 'batch_size': 16,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-07,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 50,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [146]:
# Обучим нейронную сеть
reg.fit(x_train, y_train)

MLPRegressor(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)

In [147]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val = reg.predict(x_val)
mse1 = mean_squared_error(y_val, pred_val)
print(mse1)

0.004930147589487033


In [148]:
# Создадим нейронную сеть на базе библиотеки keras
model = Sequential()

model.add(Dense(100, input_dim=5, activation='relu', use_bias=False))

model.add(Dense(1, activation='linear', use_bias=False))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

model.summary()

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 100)               500       
                                                                 
 dense_25 (Dense)            (None, 1)                 100       
                                                                 
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________


In [149]:
# Обучим нейронную сеть
history = model.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [150]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val2 = model.predict(x_val)
mse2 = mean_squared_error(y_val, pred_val2)
print(mse2)

0.003941470697286984


In [151]:
# Создаем нейронную сеть со слоем дропаута
drop = Sequential()

drop.add(Dense(100, input_dim=5, activation='relu', use_bias=False))

drop.add(Dropout(rate=0.5))

drop.add(Dense(1, activation='linear', use_bias=False))

drop.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

drop.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_26 (Dense)            (None, 100)               500       
                                                                 
 dropout_6 (Dropout)         (None, 100)               0         
                                                                 
 dense_27 (Dense)            (None, 1)                 100       
                                                                 
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________


In [152]:
# Обучим нейронную сеть
history = drop.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [153]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val3 = drop.predict(x_val)
mse3 = mean_squared_error(y_val, pred_val3)
print(mse3)

0.004592740120805249


In [154]:
print(mse1, mse2, mse3)

0.004930147589487033 0.003941470697286984 0.004592740120805249


In [155]:
# Проверяем на тестовой выборке
pred_test = model.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print(mse)

0.0042779036710729


In [156]:
# Задание №1 - решение задачи классификации.
# В качестве входных параметров используем все числовые параметры,
# в качестве выходного - единственный категориальный параметр.

In [157]:
x_labels = num_columns
y_labels = list(range(7))
print(x_labels)
print(y_labels)

['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
[0, 1, 2, 3, 4, 5, 6]


In [158]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_labels]
y_test = sc_test[y_labels]
y_val = sc_val[y_labels]

In [159]:
y_train

Unnamed: 0,0,1,2,3,4,5,6
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...
90,0.0,0.0,1.0,0.0,0.0,0.0,0.0
91,1.0,0.0,0.0,0.0,0.0,0.0,0.0
92,1.0,0.0,0.0,0.0,0.0,0.0,0.0
93,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [160]:
# Создайте нейронную сеть для решения задачи классификации двумя способами: с помощью sklearn и keras.
# Сравните их эффективность.
# Для keras используйте loss и metrics = 'categorical_crossentropy'.

In [161]:
#sklearn 
clas = MLPClassifier(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)
clas.get_params()


{'activation': 'relu',
 'alpha': 0.0,
 'batch_size': 16,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-07,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 50,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [162]:
# Обучим нейронную сеть
clas.fit(x_train, y_train)



MLPClassifier(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)

In [163]:
pred_val = clas.predict(x_val)
mse1 = mean_squared_error(y_val, pred_val)
print(mse1)

0.14285714285714285


In [164]:
#keras
model = Sequential()
model.add(Dense(100, input_dim=6, activation='relu', use_bias=False))
model.add(Dense(7, activation='softmax', use_bias=False))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_crossentropy'])
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_28 (Dense)            (None, 100)               600       
                                                                 
 dense_29 (Dense)            (None, 7)                 700       
                                                                 
Total params: 1,300
Trainable params: 1,300
Non-trainable params: 0
_________________________________________________________________


In [165]:
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, batch_size=16, verbose=0)
                  

In [166]:
pred_val2 = model.predict(x_val)
mse2 = mean_squared_error(y_val, pred_val2)
print(mse2)

0.09108768548162939


In [167]:
print(mse1, mse2)

0.14285714285714285 0.09108768548162939


In [168]:
# Задание №2 - использование dropout-слоя.

In [169]:
# Создайте нейронную сеть для решения задачи классификации с помощью keras. Используйте dropout-слой.
# Сравните эффективность нейронных сетей с dropout-слоем и без него.
# Попробуйте найти такие параметры dropout-слоя, чтобы сеть с ним работала лучше, чем без него.

In [170]:
drop = Sequential()
drop.add(Dense(100, input_dim=6, activation='relu', use_bias=False))
drop.add(Dropout(rate=0.1))
drop.add(Dense(7, activation='softmax', use_bias=False))
drop.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_crossentropy'])
drop.summary()

Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_30 (Dense)            (None, 100)               600       
                                                                 
 dropout_7 (Dropout)         (None, 100)               0         
                                                                 
 dense_31 (Dense)            (None, 7)                 700       
                                                                 
Total params: 1,300
Trainable params: 1,300
Non-trainable params: 0
_________________________________________________________________


In [171]:
history = drop.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [172]:
pred_val3 = drop.predict(x_val)
mse3 = mean_squared_error(y_val, pred_val3)
print(mse3)

0.09119040517494871


In [173]:
print('sklearn:', mse1, '\nkeras:', mse2, '\nkeras drop:', mse3)

sklearn: 0.14285714285714285 
keras: 0.09108768548162939 
keras drop: 0.09119040517494871


In [174]:
# Проверка на тестовой выборке
pred_test = clas.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print('sklearn:', mse)

sklearn: 0.14285714285714285


In [175]:
# Проверка на тестовой выборке
pred_test = model.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print('keras:', mse)

keras: 0.08903929283203622


In [176]:
# Проверка на тестовой выборке
pred_test = drop.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print('keras drop:', mse)

keras drop: 0.09000731486393983
