## Importovanie potrebných knižníc

In [14]:
from sklearn.model_selection import train_test_split
from metrics import accuracy, recall, precision, f1_score
from svm import SVM
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler

## Načítanie datasetu

In [15]:
dataset = pd.read_csv("data/Hotel Reservations.csv")

## Zobrazenie prvých riadkov datasetu

In [16]:
dataset.head()

Unnamed: 0,Booking_ID,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,type_of_meal_plan,required_car_parking_space,room_type_reserved,lead_time,arrival_year,arrival_month,arrival_date,market_segment_type,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests,booking_status
0,INN00001,2,0,1,2,Meal Plan 1,0,Room_Type 1,224,2017,10,2,Offline,0,0,0,65.0,0,Not_Canceled
1,INN00002,2,0,2,3,Not Selected,0,Room_Type 1,5,2018,11,6,Online,0,0,0,106.68,1,Not_Canceled
2,INN00003,1,0,2,1,Meal Plan 1,0,Room_Type 1,1,2018,2,28,Online,0,0,0,60.0,0,Canceled
3,INN00004,2,0,0,2,Meal Plan 1,0,Room_Type 1,211,2018,5,20,Online,0,0,0,100.0,0,Canceled
4,INN00005,2,0,1,1,Not Selected,0,Room_Type 1,48,2018,4,11,Online,0,0,0,94.5,0,Canceled


## Základné štatistiky datasetu

In [17]:
dataset.describe()

Unnamed: 0,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,required_car_parking_space,lead_time,arrival_year,arrival_month,arrival_date,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests
count,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0,36275.0
mean,1.844962,0.105279,0.810724,2.2043,0.030986,85.232557,2017.820427,7.423653,15.596995,0.025637,0.023349,0.153411,103.423539,0.619655
std,0.518715,0.402648,0.870644,1.410905,0.173281,85.930817,0.383836,3.069894,8.740447,0.158053,0.368331,1.754171,35.089424,0.786236
min,0.0,0.0,0.0,0.0,0.0,0.0,2017.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,1.0,0.0,17.0,2018.0,5.0,8.0,0.0,0.0,0.0,80.3,0.0
50%,2.0,0.0,1.0,2.0,0.0,57.0,2018.0,8.0,16.0,0.0,0.0,0.0,99.45,0.0
75%,2.0,0.0,2.0,3.0,0.0,126.0,2018.0,10.0,23.0,0.0,0.0,0.0,120.0,1.0
max,4.0,10.0,7.0,17.0,1.0,443.0,2018.0,12.0,31.0,1.0,13.0,58.0,540.0,5.0


## Vytvorenie dummy premenných pre kategorické atribúty

In [18]:
meal_plan_dummies = pd.get_dummies(dataset['type_of_meal_plan']).astype(int)
room_type_dummies = pd.get_dummies(dataset['room_type_reserved']).astype(int)
market_segment_dummies = pd.get_dummies(dataset['market_segment_type']).astype(int)

## Odstránenie nepotrebných stĺpcov a spojenie s dummy premennými

In [19]:
dataset.drop(['type_of_meal_plan', 'room_type_reserved', 'market_segment_type', 'Booking_ID', 'arrival_year', 'arrival_month', 'arrival_date'], axis=1, inplace=True)
dataset = pd.concat([dataset, meal_plan_dummies, room_type_dummies, market_segment_dummies], axis=1)

## Zakódovanie cieľového atribútu

In [20]:
dataset['booking_status'] = dataset['booking_status'].replace({'Not_Canceled': 1, 'Canceled': -1})

## Normalizácia datasetu

In [21]:
normalized_dataset = dataset.drop('booking_status', axis=1)

# Min-Max škálovanie
min_max_scaler = MinMaxScaler()
normalized_dataset = min_max_scaler.fit_transform(normalized_dataset)

# Z-score normalizácia (standardizácia)
standard_scaler = StandardScaler()
normalized_dataset = standard_scaler.fit_transform(normalized_dataset)

## Rozdelenie datasetu na vstupné (X) a výstupné (y) premenné


In [22]:
X = normalized_dataset
y = dataset['booking_status'].to_numpy()

## Rozdelenie datasetu na trénovaciu a temporálnu množinu (kombinované validačné a testovacie)


In [23]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

## Rozdelenie temporálnej množiny na validačnú a testovaciu množinu

In [24]:
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

## Inicializácia modelu SVM s určenými parametrami

In [25]:
model = SVM(C=0, lr=0.00001, verbose=True)

## Trénovanie modelu a získanie vektorov podpory

In [26]:
support_vectors = model.fit(X_train, y_train, X_val, y_val)

Iterácia 1: vahy = [-0.10072    -0.02487833 -0.00803123 -0.01577715 -0.02552096  0.02567623
 -0.11785333  0.02883742  0.00808589  0.01462158 -0.03882506  0.06931642
  0.01731049 -0.0244872   0.00052107 -0.00078574  0.00538049  0.00050159
  0.00076055 -0.00350626  0.00169343 -0.0085228   0.00320869  0.0015858
  0.01921717  0.03093848  0.01046186 -0.02898913], náklad = 1.75564930, Správnosť = 78.03%
Iterácia 11: vahy = [-6.10560000e-01 -4.73678285e-02 -3.40232780e-02 -4.32277261e-02
 -6.94866515e-02  8.25923579e-02 -7.07189689e-01  3.20638483e-03
 -4.19795718e-03 -1.15765067e-02 -1.85648365e-01  4.20736007e-01
  2.36839930e-02 -3.70714628e-02 -8.02050783e-03  2.16608670e-03
 -9.05948790e-04  9.08412064e-03 -2.72281393e-03  2.07009692e-03
  1.13576108e-04 -1.37215428e-02  9.12080955e-03 -6.55161575e-03
 -1.36633780e-03  1.06086634e-02  1.54139218e-01 -1.49713188e-01], náklad = 2.86525475, Správnosť = 79.16%
Iterácia 21: vahy = [-0.69574    -0.0556844  -0.03483709 -0.02993558 -0.05038681  

## Predikcia na testovacej množine

In [27]:
y_pred = model.forward(X_test)

## Výpočet metrík

In [28]:
print(f"Správnosť: {accuracy(y_test, y_pred):.2%}")
print(f"Presnosť: {precision(y_test, y_pred):.2%}")
print(f"Recall: {recall(y_test, y_pred):.2%}")
print(f"F1: {f1_score(y_test, y_pred):.2%}")

Správnosť: 80.10%
Presnosť: 81.82%
Recall: 90.04%
F1: 85.73%
