In [1]:
# Зависимости
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.neural_network import MLPRegressor, MLPClassifier

from sklearn.metrics import mean_squared_error, f1_score

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [2]:
# Инициализируем все известные генераторы случаынйх чисел / Setting all known random seeds
my_code = "Sattorov"
seed_limit = 2 ** 32
my_seed = int.from_bytes(my_code.encode(), "little") % seed_limit

os.environ['PYTHONHASHSEED']=str(my_seed)

random.seed(my_seed)

np.random.seed(my_seed)

tf.compat.v1.set_random_seed(my_seed)

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [3]:
# Читаем данные из файла
example_data = pd.read_csv("datasets/Fish.csv")

In [4]:
example_data.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [5]:
# Определим размер валидационной и тестовой выборок
val_test_size = round(0.2*len(example_data))
print(val_test_size)

32


In [6]:
# Создадим обучающую, валидационную и тестовую выборки
random_state = my_seed
train_val, test = train_test_split(example_data, test_size=val_test_size, random_state=random_state)
train, val = train_test_split(train_val, test_size=val_test_size, random_state=random_state)
print(len(train), len(val), len(test))

95 32 32


In [7]:
# Значения в числовых столбцах преобразуем к отрезку [0,1].
# Для настройки скалировщика используем только обучающую выборку.
num_columns = ['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
ord_columns = ['Species']

ct = ColumnTransformer(transformers=[
    ('numerical', MinMaxScaler(), num_columns), 
    ('ordinal', OneHotEncoder(), ord_columns)])

ct.fit(train)

ColumnTransformer(transformers=[('numerical', MinMaxScaler(),
                                 ['Weight', 'Length1', 'Length2', 'Length3',
                                  'Height', 'Width']),
                                ('ordinal', OneHotEncoder(), ['Species'])])

In [8]:
# Преобразуем значения, тип данных приводим к DataFrame
sc_train = pd.DataFrame(ct.transform(train))
sc_test = pd.DataFrame(ct.transform(test))
sc_val = pd.DataFrame(ct.transform(val))

In [9]:
sc_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.623423,0.653105,0.661355,0.646617,0.592294,0.923164,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,0.071110,0.216274,0.223108,0.242481,0.254588,0.332938,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.435135,0.451820,0.462151,0.520677,0.766725,0.614973,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.479069,0.760171,0.760956,0.759398,0.345449,0.641425,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,0.356681,0.471092,0.482072,0.539474,0.777782,0.670100,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,0.003452,0.059957,0.063745,0.056391,0.031842,0.030853,0.0,0.0,0.0,0.0,0.0,1.0,0.0
91,0.165255,0.316916,0.332669,0.347744,0.372462,0.474341,0.0,0.0,0.0,0.0,0.0,0.0,1.0
92,0.121321,0.274090,0.272908,0.300752,0.329011,0.456365,0.0,0.0,0.0,0.0,1.0,0.0,0.0
93,0.184083,0.340471,0.348606,0.336466,0.382771,0.606052,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [10]:
# Устанавливаем названия столбцов
column_names = num_columns + list(range(7))
sc_train.columns = column_names
sc_test.columns = column_names
sc_val.columns = column_names

In [11]:
# В качестве входных параметров используем первые 5 числовых параметров,
# в качестве выходного - шестой числовой параметр.
x_labels = num_columns[:-1]
y_labels = num_columns[-1]
print(x_labels)
print(y_labels)

['Weight', 'Length1', 'Length2', 'Length3', 'Height']
Width


In [12]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_labels]
y_test = sc_test[y_labels]
y_val = sc_val[y_labels]

In [13]:
# Создадим нейроннную сеть для решения задачи регрессии на базе библиотеки sklearn
reg = MLPRegressor(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)
reg.get_params()

{'activation': 'relu',
 'alpha': 0.0,
 'batch_size': 16,
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-07,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_fun': 15000,
 'max_iter': 50,
 'momentum': 0.9,
 'n_iter_no_change': 10,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [14]:
# Обучим нейронную сеть
reg.fit(x_train, y_train)

MLPRegressor(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)

In [15]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val = reg.predict(x_val)
mse1 = mean_squared_error(y_val, pred_val)
print(mse1)

0.004821290692088878


In [16]:
# Создадим нейронную сеть на базе библиотеки keras
model = Sequential()

model.add(Dense(100, input_dim=5, activation='relu', use_bias=False))

model.add(Dense(1, activation='linear', use_bias=False))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               500       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 100       
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________


In [17]:
# Обучим нейронную сеть
history = model.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [18]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val2 = model.predict(x_val)
mse2 = mean_squared_error(y_val, pred_val2)
print(mse2)

0.00737163014135683


In [19]:
# Создаем нейронную сеть со слоем дропаута
drop = Sequential()

drop.add(Dense(100, input_dim=5, activation='relu', use_bias=False))

drop.add(Dropout(rate=0.5))

drop.add(Dense(1, activation='linear', use_bias=False))

drop.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

drop.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 100)               500       
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 100       
Total params: 600
Trainable params: 600
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Обучим нейронную сеть
history = drop.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [21]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val3 = drop.predict(x_val)
mse3 = mean_squared_error(y_val, pred_val3)
print(mse3)

0.0076275838703888064


In [22]:
print(mse1, mse2, mse3)

0.004821290692088878 0.00737163014135683 0.0076275838703888064


In [23]:
# Проверяем на тестовой выборке
pred_test = model.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print(mse)

0.008410453306402754


In [24]:
# Задание №1 - решение задачи классификации.
# В качестве входных параметров используем все числовые параметры,
# в качестве выходного - единственный категориальный параметр.

In [25]:
x_labels = num_columns
y_labels = list(range(7))
print(x_labels)
print(y_labels)

['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']
[0, 1, 2, 3, 4, 5, 6]


In [26]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_labels]
y_test = sc_test[y_labels]
y_val = sc_val[y_labels]

In [27]:
# Создайте нейронную сеть для решения задачи классификации двумя способами: с помощью sklearn и keras.
# Сравните их эффективность.
# Для keras используйте loss и metrics = 'categorical_crossentropy'.

In [28]:
#sklearn
clas = MLPClassifier(alpha=0.0, batch_size=16, epsilon=1e-07, max_iter=50)
clas.get_params()
clas.fit(x_train, y_train)
pred_val = clas.predict(x_val)
mse1 = mean_squared_error(y_val, pred_val)
print(mse1)

0.14285714285714285




In [29]:
#keras
model = Sequential()
model.add(Dense(100, input_dim=6, activation='relu', use_bias=False))
model.add(Dense(7, activation='softmax', use_bias=False))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_crossentropy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 100)               600       
_________________________________________________________________
dense_5 (Dense)              (None, 7)                 700       
Total params: 1,300
Trainable params: 1,300
Non-trainable params: 0
_________________________________________________________________


In [30]:
history = model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [31]:
pred_val2 = model.predict(x_val)
mse2 = mean_squared_error(y_val, pred_val2)
print(mse2)

0.09364935515953741


In [32]:
print(mse1, mse2)

0.14285714285714285 0.09364935515953741


In [33]:
# Задание №2 - использование dropout-слоя.

In [34]:
# Создайте нейронную сеть для решения задачи классификации с помощью keras. Используйте dropout-слой.
# Сравните эффективность нейронных сетей с dropout-слоем и без него.
# Попробуйте найти такие параметры dropout-слоя, чтобы сеть с ним работала лучше, чем без него.

In [35]:
drop = Sequential()
drop.add(Dense(100, input_dim=6, activation='relu', use_bias=False))
drop.add(Dropout(rate=0.1))
drop.add(Dense(7, activation='softmax', use_bias=False))
drop.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_crossentropy'])
drop.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 100)               600       
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 7)                 700       
Total params: 1,300
Trainable params: 1,300
Non-trainable params: 0
_________________________________________________________________


In [36]:
history = drop.fit(x_train, y_train, validation_data = (x_val, y_val), epochs=50, batch_size=16, verbose=0)

In [37]:
pred_val3 = drop.predict(x_val)
mse3 = mean_squared_error(y_val, pred_val3)
print(mse3)

0.09271638190170563


In [38]:
print('sklearn:', mse1, '\nkeras:', mse2, '\nkeras drop:', mse3)

sklearn: 0.14285714285714285 
keras: 0.09364935515953741 
keras drop: 0.09271638190170563


In [39]:
# Проверка на тестовом выборке
pred_test = clas.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print('sklearn:', mse)

sklearn: 0.14285714285714285


In [40]:
# Проверка на тестовом выборке
pred_test = model.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print('keras:', mse)

keras: 0.08103304874156181


In [41]:
# Проверка на тестовом выборке
pred_test = drop.predict(x_test)
mse = mean_squared_error(y_test, pred_test)
print('keras drop:', mse)

keras drop: 0.0798907350402118
