In [1]:
# Зависимости
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.compose import ColumnTransformer

from sklearn.svm import SVR, SVC

from sklearn.metrics import mean_squared_error, f1_score
from sklearn.metrics import mean_squared_error

In [2]:
# Генерируем уникальный seed
my_code = "Волков Н"
seed_limit = 2 ** 32
my_seed = int.from_bytes(my_code.encode(), "little") % seed_limit

In [3]:
# Читаем данные из файла
example_data = pd.read_csv("datasets/Fish.csv")

In [4]:
example_data.head()

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.52,4.02
1,Bream,290.0,24.0,26.3,31.2,12.48,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.73,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.444,5.134


In [5]:
# Определим размер валидационной и тестовой выборок
val_test_size = round(0.2*len(example_data))
print(val_test_size)

32


In [6]:
# Создадим обучающую, валидационную и тестовую выборки
random_state = my_seed
train_val, test = train_test_split(example_data, test_size=val_test_size, random_state=random_state)
train, val = train_test_split(train_val, test_size=val_test_size, random_state=random_state)
print(len(train), len(val), len(test))

95 32 32


In [7]:
# Значения в числовых столбцах преобразуем к отрезку [0,1].
# Для настройки скалировщика используем только обучающую выборку.
num_columns = ['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']

ct = ColumnTransformer(transformers=[('numerical', MinMaxScaler(), num_columns)], remainder='passthrough')
ct.fit(train)

ColumnTransformer(remainder='passthrough',
                  transformers=[('numerical', MinMaxScaler(),
                                 ['Weight', 'Length1', 'Length2', 'Length3',
                                  'Height', 'Width'])])

In [8]:
# Преобразуем значения, тип данных приводим к DataFrame
sc_train = pd.DataFrame(ct.transform(train))
sc_test = pd.DataFrame(ct.transform(test))
sc_val = pd.DataFrame(ct.transform(val))

In [9]:
# Устанавливаем названия столбцов
column_names = num_columns + ['Species']
sc_train.columns = column_names
sc_test.columns = column_names
sc_val.columns = column_names

In [10]:
sc_train

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width,Species
0,0.105894,0.300971,0.301818,0.298986,0.273008,0.371547,Perch
1,0.969588,0.941748,0.938182,0.932432,0.456563,0.718369,Pike
2,0.543823,0.572816,0.574545,0.569257,0.58027,0.870743,Perch
3,0.373517,0.466019,0.474545,0.521959,0.800542,0.596597,Bream
4,0.051153,0.170874,0.169091,0.185811,0.329094,0.22911,Parkki
...,...,...,...,...,...,...,...
90,0.118059,0.436893,0.434545,0.439189,0.222393,0.328146,Pike
91,0.020741,0.12233,0.12,0.121622,0.121104,0.19514,Perch
92,0.309044,0.446602,0.443636,0.425676,0.481537,0.700609,Perch
93,0.072441,0.223301,0.229091,0.231419,0.229623,0.369291,Perch


In [11]:
# Задание №1 - анализ метода опорных векторов в задаче регрессии
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR
# kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'
# Только для kernel = 'poly' : degreeint, default=3

In [12]:
# Выбираем 4 числовых переменных, три их них будут предикторами, одна - зависимой переменной
n = 4
labels = random.sample(num_columns, n)

y_label = labels[0]
x_labels = labels[1:]

print(x_labels)
print(y_label)

['Length1', 'Weight', 'Height']
Length3


In [13]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_label]
y_test = sc_test[y_label]
y_val = sc_val[y_label]

In [14]:
y_train


0     0.298986
1     0.932432
2     0.569257
3     0.521959
4     0.185811
        ...   
90    0.439189
91    0.121622
92    0.425676
93    0.231419
94    0.782095
Name: Length3, Length: 95, dtype: object

In [15]:
# Создайте 4 модели с различными ядрами: 'linear', 'poly', 'rbf', 'sigmoid'.
# Решите получившуюся задачу регрессии с помощью созданных моделей и сравните их эффективность.
# При необходимости применяйте параметр регуляризации C : float, default=1.0
# Укажите, какая модель решает задачу лучше других.
r_model1 = SVR( kernel='linear')
r_model2 = SVR(kernel='poly')
r_model3 = SVR()
r_model4 = SVR(kernel='sigmoid')




In [16]:
r_model1.fit(x_train,y_train)

a=r_model1.predict(x_test)
mse1 = mean_squared_error(y_test, a)
mse1

0.0029010406653519486

In [17]:

r_model2.fit(x_train,y_train)
a=r_model2.predict(x_test)
mse2 = mean_squared_error(y_test, a)
mse2

0.0036444488484717536

In [18]:
r_model3.fit(x_train,y_train)
a=r_model3.predict(x_test)
mse3 = mean_squared_error(y_test, a)
mse3

0.0023120387681056552

In [19]:
r_model4.fit(x_train,y_train)
a=r_model4.predict(x_test)
mse4 = mean_squared_error(y_test, a)
mse4

25.373999314862846

In [20]:
# Задание №2 - анализ метода опорных векторов в задаче классификации
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
# kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'}, default='rbf'
# Только для kernel = 'poly' : degreeint, default=3

In [21]:
# Выбираем 2 числовых переменных, которые будут параметрами элементов набора данных
# Метка класса всегда 'Species'
n = 2
x_labels = random.sample(num_columns, n)
y_label = 'Species'

print(x_labels)
print(y_label)

['Height', 'Weight']
Species


In [22]:
# Отберем необходимые параметры
x_train = sc_train[x_labels]
x_test = sc_test[x_labels]
x_val = sc_val[x_labels]

y_train = sc_train[y_label]
y_test = sc_test[y_label]
y_val = sc_val[y_label]

In [23]:
y_train

0      Perch
1       Pike
2      Perch
3      Bream
4     Parkki
       ...  
90      Pike
91     Perch
92     Perch
93     Perch
94      Pike
Name: Species, Length: 95, dtype: object

In [24]:
# Создайте 4 модели с различными ядрами: 'linear', 'poly', 'rbf', 'sigmoid'.
# Решите получившуюся задачу регрессии с помощью созданных моделей и сравните их эффективность.
# При необходимости применяйте параметр регуляризации C : float, default=1.0
# Укажите, какая модель решает задачу лучше других.
c_model1 = SVC(kernel= 'linear',degree=1)
c_model2 = SVC(kernel='poly',degree=0.5)
c_model3 = SVC(degree=0)
c_model4 = SVC(kernel='sigmoid',degree=1)


In [25]:
c_model1.fit(x_train, y_train)
print(c_model1.predict(x_test))

['Perch' 'Perch' 'Perch' 'Perch' 'Bream' 'Perch' 'Perch' 'Perch' 'Perch'
 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Bream' 'Perch' 'Bream' 'Perch'
 'Bream' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Bream' 'Perch'
 'Bream' 'Perch' 'Perch' 'Bream' 'Bream']


In [26]:
c_model2.fit(x_train, y_train)
print(c_model2.predict(x_test))

['Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch'
 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch'
 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Perch'
 'Perch' 'Perch' 'Perch' 'Perch' 'Perch']


In [27]:
c_model3.fit(x_train, y_train)
print(c_model3.predict(x_test))

['Smelt' 'Perch' 'Perch' 'Perch' 'Bream' 'Perch' 'Perch' 'Perch' 'Perch'
 'Perch' 'Perch' 'Perch' 'Perch' 'Perch' 'Bream' 'Perch' 'Bream' 'Perch'
 'Bream' 'Parkki' 'Smelt' 'Perch' 'Parkki' 'Perch' 'Perch' 'Bream' 'Smelt'
 'Bream' 'Perch' 'Smelt' 'Bream' 'Bream']


In [28]:
c_model4.fit(x_train, y_train)
print(c_model4.predict(x_test))

['Bream' 'Bream' 'Perch' 'Perch' 'Perch' 'Bream' 'Perch' 'Perch' 'Bream'
 'Perch' 'Bream' 'Bream' 'Bream' 'Bream' 'Perch' 'Perch' 'Perch' 'Bream'
 'Perch' 'Perch' 'Smelt' 'Perch' 'Perch' 'Bream' 'Bream' 'Perch' 'Smelt'
 'Perch' 'Perch' 'Smelt' 'Perch' 'Perch']
