In [25]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import numpy as np

# Załaduj dane
df = pd.read_csv("/content/dane_lambda_kappa_balans_80plus.csv")

# Tworzenie nowych cech
df['czy_wysoki_ttl'] = (df['ttl_dni'] > 60).astype(int)
df['czy_duzo_zapytan'] = (df['zapytania_na_sekunde'] > 1000).astype(int)
df['czy_chmura_i_historia'] = ((df['chmura'] == 1) & (df['historia_danych'] == 1)).astype(int)

# Przygotowanie danych
X = df.drop(columns=['architektura'])
y = df['architektura']

# Podział na zbiory treningowe i testowe
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Kategoryczne kolumny
kolumny_kat = ['historia_danych', 'typ_przetwarzania', 'typ_zapytan', 'skalowalnosc', 'chmura']

# Pipeline
encoder = ColumnTransformer(
    transformers=[('kat', OneHotEncoder(drop='first'), kolumny_kat)],
    remainder='passthrough'
)

model = Pipeline([
    ('preprocessing', encoder),
    ('clf', DecisionTreeClassifier(max_depth=7, class_weight='balanced', random_state=42))
])

# Trening
model.fit(X_train, y_train)

# Predykcja i wynik
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Dokładność: {accuracy:.2f}")


Dokładność: 0.79
