### Bez raya

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import LinearSVC, SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
import warnings
from sklearn.exceptions import ConvergenceWarning
from time import time

# Wczytaj dane
df = pd.read_csv("customer_churn_data.csv")
y = df.pop("Churn")
fn_cost = df.pop("FN")
fp_cost = df.pop("FP")

# Podział na zbiór treningowy i testowy
X_train, X_test, y_train, y_test, fp_cost_train, fp_cost_test, fn_cost_train, fn_cost_test = train_test_split(
    df, y, fp_cost, fn_cost, test_size=0.25, random_state=0, stratify=y
)

# Skalowanie
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

warnings.filterwarnings("ignore", category=ConvergenceWarning)

# ---- Linear SVM ----
start = time()
linear_svc = LinearSVC(loss="hinge", max_iter=10_000, class_weight="balanced", random_state=0)
param_grid_linear = {"C": np.linspace(1e-2, 1e2, 100)}

grid_linear = GridSearchCV(linear_svc, param_grid_linear, scoring="f1", cv=5, n_jobs=-1)
grid_linear.fit(X_train, y_train)
linear_time = time() - start

y_pred_linear = grid_linear.predict(X_test)
f1_linear = f1_score(y_test, y_pred_linear)

# ---- Kernel SVM ----
start = time()
kernel_svc = SVC(class_weight="balanced", random_state=0)
param_grid_kernel = {
    "kernel": ["rbf"],
    "C": np.linspace(1e-2, 1e2, 10),
    "gamma": ["scale"] + list(np.linspace(0.1, 10, 10)),
}

grid_kernel = GridSearchCV(kernel_svc, param_grid_kernel, scoring="f1", cv=5, n_jobs=-1)
grid_kernel.fit(X_train, y_train)
kernel_time = time() - start

y_pred_kernel = grid_kernel.predict(X_test)
f1_kernel = f1_score(y_test, y_pred_kernel)

# ---- Wyniki ----
print("=== Wyniki bez raya ===")
print(f"Linear SVM F1-score: {f1_linear:.4f}, czas: {linear_time:.2f} s")
print(f"Kernel SVM F1-score: {f1_kernel:.4f}, czas: {kernel_time:.2f} s")




=== Wyniki bez raya ===
Linear SVM F1-score: 0.5913, czas: 16.02 s
Kernel SVM F1-score: 0.8594, czas: 22.25 s


### Z rayem

In [4]:
import ray
from ray import train
from ray.train import Checkpoint, RunConfig, ScalingConfig
from ray.tune import Tuner
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from time import time

# Start Ray
ray.init(
    ignore_reinit_error=True,
    include_dashboard=False,
    _temp_dir="C:/ray_temp"
)
print("Ray OK")

# Wczytaj dane
df = pd.read_csv("customer_churn_data.csv")
y = df.pop("Churn")
fn_cost = df.pop("FN")
fp_cost = df.pop("FP")

print("Date wczytane")
      
# Podział i skalowanie
X_train, X_test, y_train, y_test, fp_cost_train, fp_cost_test, fn_cost_train, fn_cost_test = train_test_split(
    df, y, fp_cost, fn_cost, test_size=0.25, random_state=0, stratify=y
)
print("Podzial zakoncozny")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Ray Dataset
import ray.data
ds = ray.data.from_items([{
    "X": x, "y": int(y)
} for x, y in zip(X_train_scaled, y_train)])
print("Dane do raya wczytane)
# Funkcja treningowa
def train_func(config):
    X = np.stack([row["X"] for row in train.get_dataset_shard("train").iter_torch_batches(batch_size=1024, collate_fn=lambda x: x)])
    y = np.array([row["y"] for row in train.get_dataset_shard("train").iter_torch_batches(batch_size=1024, collate_fn=lambda x: x)])
    X = X.reshape(-1, X.shape[-1])

    model = LinearSVC(
        C=config["C"],
        loss="hinge",
        max_iter=10_000,
        class_weight="balanced",
        random_state=0
    )
    model.fit(X, y)
    preds = model.predict(X)
    f1 = f1_score(y, preds)
    train.report({"f1": f1})

# Hiperparametry
search_space = {
    "C": ray.tune.grid_search(list(np.linspace(1e-2, 1e2, 10)))
}

# Konfiguracja
scaling_config = ScalingConfig(num_workers=1, use_gpu=False)

print("zaczynam tunning huperparametryczny")
start = time()
tuner = Tuner(
    train_func,
    param_space=search_space,
    run_config=RunConfig(name="linear_svm_ray"),
    scaling_config=scaling_config,
    tune_config=ray.tune.TuneConfig(metric="f1", mode="max"),
)

results = tuner.fit()
ray_time = time() - start

best_result = results.get_best_result()
print("=== Wyniki z Ray ===")
print(f"Best F1-score (train): {best_result.metrics['f1']:.4f}, czas: {ray_time:.2f} s")

ray.shutdown()

SyntaxError: unterminated string literal (detected at line 33) (276727793.py, line 33)