In [1]:
# Зависимости
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import  KNeighborsClassifier
from sklearn.tree import  DecisionTreeClassifier, plot_tree


from sklearn.metrics import f1_score

import tensorflow as tf
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout

In [2]:
from tensorflow import compat
# Инициализируем все известные генераторы случаынйх чисел / Setting all known random seeds
my_code = "Рахматуллаев и Тимуров"
seed_limit = 2 ** 32
my_seed = int.from_bytes(my_code.encode(), "little") % seed_limit

os.environ['PYTHONHASHSEED']=str(my_seed)

random.seed(my_seed)

np.random.seed(my_seed)

tf.compat.v1.set_random_seed(my_seed)

session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [3]:
# Читаем данные из файла
train_data = pd.read_csv("datasets/iris_train.csv")

In [4]:
train_data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.7,2.6,3.5,1.0,Iris-versicolor
1,5.1,3.8,1.5,0.3,Iris-setosa
2,5.4,3.9,1.3,0.4,Iris-setosa
3,6.5,3.0,5.2,2.0,Iris-virginica
4,6.9,3.1,4.9,1.5,Iris-versicolor


In [5]:
# Определим размер валидационной выборки
val_size = round(0.2*len(train_data))
print(val_size)

24


In [6]:
# Создадим обучающую и валидационную выборки
random_state = my_seed
train, val = train_test_split(train_data, test_size=val_size, random_state=random_state)
print(len(train), len(val))

96 24


In [7]:
# Значения в числовых столбцах преобразуем к отрезку [0,1].
# Для настройки скалировщика используем только обучающую выборку.
num_columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
ord_columns = ['species']

ct = ColumnTransformer(transformers=[('numerical', MinMaxScaler(), num_columns)], remainder='passthrough')

ct.fit(train)

ColumnTransformer(remainder='passthrough',
                  transformers=[('numerical', MinMaxScaler(),
                                 ['sepal_length', 'sepal_width', 'petal_length',
                                  'petal_width'])])

In [8]:
# Преобразуем значения, тип данных приводим к DataFrame
sc_train = pd.DataFrame(ct.transform(train))
sc_val = pd.DataFrame(ct.transform(val))

In [9]:
# Устанавливаем названия столбцов
column_names = num_columns + ord_columns
sc_train.columns = column_names
sc_val.columns = column_names

In [10]:
sc_train

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,0.0,0.416667,0.016949,0.0,Iris-setosa
1,0.323529,0.583333,0.118644,0.041667,Iris-setosa
2,0.205882,0.666667,0.067797,0.041667,Iris-setosa
3,0.176471,0.166667,0.389831,0.375,Iris-versicolor
4,0.058824,0.125,0.050847,0.083333,Iris-setosa
...,...,...,...,...,...
91,0.029412,0.375,0.067797,0.041667,Iris-setosa
92,0.205882,0.583333,0.084746,0.041667,Iris-setosa
93,0.323529,0.416667,0.59322,0.583333,Iris-versicolor
94,0.705882,0.416667,0.711864,0.916667,Iris-virginica


In [11]:
# Отберем необходимые параметры
x_train = sc_train[num_columns]
x_val = sc_val[num_columns]

y_train = (sc_train[ord_columns].values).flatten()
y_val = (sc_val[ord_columns].values).flatten()

In [12]:
# Создадим простую модель k ближайших соседей
model = KNeighborsClassifier(n_neighbors=10)

In [13]:
# Обучим модель
model.fit(x_train, y_train)

KNeighborsClassifier(n_neighbors=10)

In [14]:
# Проверим работу обученной нейронной сети на валидационной выборке
pred_val = model.predict(x_val)
f1 = f1_score(y_val, pred_val, average='weighted')
print(f1)

0.8746867167919801


In [15]:
test = pd.read_csv("datasets/iris_test.csv")

In [16]:
test['species'] = ''

In [17]:
test.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.4,3.7,1.5,0.2,
1,6.0,3.4,4.5,1.6,
2,5.9,3.0,4.2,1.5,
3,5.7,3.0,4.2,1.2,
4,5.6,2.7,4.2,1.3,


In [18]:
sc_test = pd.DataFrame(ct.transform(test))
sc_test.columns = column_names

In [19]:
x_test = sc_test[num_columns]

In [20]:
test['species'] = model.predict(x_test)

In [21]:
test.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.4,3.7,1.5,0.2,Iris-setosa
1,6.0,3.4,4.5,1.6,Iris-versicolor
2,5.9,3.0,4.2,1.5,Iris-versicolor
3,5.7,3.0,4.2,1.2,Iris-versicolor
4,5.6,2.7,4.2,1.3,Iris-versicolor


In [22]:
test.to_csv('R_T_test_result.csv', index=False)