<h1>Лабораторная работа №6</h1>

In [65]:
import random

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV

In [66]:
cities = ["Bishkek", "Almaty", "Tashkent"]
vacation_types = ["Beach", "Shopping", "Adventure"]
transport_types = ["Plane", "Car", "Train"]
vacation_targets = ["London", "Paris", "Moscow"]

data = {
    "salary": [random.randint(30000, 150000) for _ in range(1000)],
    "city": [random.choice(cities) for _ in range(1000)],
    "age": [random.randint(30, 65) for _ in range(1000)],
    "vacation_prefer": [random.choice(vacation_types) for _ in range(1000)],
    "transport_prefer": [random.choice(transport_types) for _ in range(1000)],
    "target": [random.choice(vacation_targets) for _ in range(1000)],
}

df = pd.DataFrame(data)

print(df)

     salary      city  age vacation_prefer transport_prefer  target
0     35431    Almaty   54           Beach            Plane   Paris
1    142959    Almaty   40           Beach            Train  London
2    133859    Almaty   63       Adventure              Car  Moscow
3    110944   Bishkek   56       Adventure              Car  Moscow
4     63111   Bishkek   64        Shopping            Plane   Paris
..      ...       ...  ...             ...              ...     ...
995  128124    Almaty   40           Beach              Car  London
996  126686   Bishkek   64        Shopping            Plane   Paris
997   92030    Almaty   49           Beach              Car  Moscow
998   70024   Bishkek   44        Shopping            Plane  Moscow
999  108855  Tashkent   48       Adventure              Car  Moscow

[1000 rows x 6 columns]


In [67]:
df_encoded = pd.get_dummies(df, columns=["city", "vacation_prefer", "transport_prefer", "target"])
print(df_encoded)

     salary  age  city_Almaty  city_Bishkek  city_Tashkent  \
0     35431   54         True         False          False   
1    142959   40         True         False          False   
2    133859   63         True         False          False   
3    110944   56        False          True          False   
4     63111   64        False          True          False   
..      ...  ...          ...           ...            ...   
995  128124   40         True         False          False   
996  126686   64        False          True          False   
997   92030   49         True         False          False   
998   70024   44        False          True          False   
999  108855   48        False         False           True   

     vacation_prefer_Adventure  vacation_prefer_Beach  \
0                        False                   True   
1                        False                   True   
2                         True                  False   
3                         T

In [68]:
X = df_encoded.drop(columns=[col for col in df_encoded.columns if col.startswith("target_")])
y = df_encoded[[col for col in df_encoded.columns if col.startswith("target_")]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [69]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("Точность:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, zero_division=0))

Точность: 0.23
              precision    recall  f1-score   support

           0       0.37      0.21      0.27        71
           1       0.34      0.18      0.23        68
           2       0.35      0.31      0.33        61

   micro avg       0.35      0.23      0.28       200
   macro avg       0.35      0.23      0.28       200
weighted avg       0.35      0.23      0.28       200
 samples avg       0.23      0.23      0.23       200



In [70]:
param_grid = {
    "n_estimators": [50, 100, 200],
    "max_depth": [None, 10, 20, 30],
    "min_samples_split": [2, 5, 10],
}

grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring="accuracy")
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print("Лучшие параметры:", grid_search.best_params_)

y_pred_best = best_model.predict(X_test)
print("Точность после оптимизации:", accuracy_score(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best, zero_division=0))

Лучшие параметры: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 100}
Точность после оптимизации: 0.23
              precision    recall  f1-score   support

           0       0.37      0.21      0.27        71
           1       0.34      0.18      0.23        68
           2       0.35      0.31      0.33        61

   micro avg       0.35      0.23      0.28       200
   macro avg       0.35      0.23      0.28       200
weighted avg       0.35      0.23      0.27       200
 samples avg       0.23      0.23      0.23       200



In [71]:
random_sample = pd.DataFrame(columns=X_train.columns)

random_sample.loc[0] = {
    "salary": random.randint(30000, 150000),
    "city_Bishkek": 1,
    "city_Almaty": 0,
    "city_Tashkent": 0,
    "age": random.randint(30, 65),
    "vacation_prefer_Beach": 1,
    "vacation_prefer_Shopping": 0,
    "vacation_prefer_Adventure": 0,
    "transport_prefer_Plane": 1,
    "transport_prefer_Car": 0,
    "transport_prefer_Train": 0,
}

random_sample.fillna(0, inplace=True)

prediction = best_model.predict(random_sample)
class_names = [col for col in y.columns]

if True in prediction[0].tolist():
    predicted_class_index = prediction[0].tolist().index(True)
    
    predicted_city = class_names[predicted_class_index].replace("target_", "")
    print("Предсказанный город:", predicted_city)
else:
    print("Не найдено подходящего города")

Предсказанный город: Paris
