In [90]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import datetime
import warnings

In [91]:
with open('strokes_split_data.pkl', 'rb') as f:
    df = pickle.load(f)

X_train = df["X_train"]
Y_train = df["Y_train"]

In [92]:
' ----------------------Steuerung: verwendete Spalten --------------------'
drop_columns = [
    # 'gender',
    #  'age',
    #  'hypertension',
    #  'heart_disease',
    #  'Residence_type',
    #  'avg_glucose_level',
    # 'bmi',
    # , 'smoking_status',
     'age_above_60'
    , 'high_glucose'
    , 'did_smoke'
    , 'heart_risk'
    , 'at_least_one_risk'
    , 'at_least_one_risk_and_high_age'
    , 'all_risks'
    , 'risk_sum'
    ]
X_train = X_train.drop(columns=drop_columns)

In [93]:
' ################# Hier Parameter initiale Modellsteuerung setzen!! ####################'
print("----------------------- Modellparameter ----------------------")
classweight_y = 10
gamma = 0.1
' ############# Modellinstanz #########'
svc = SVC(C=1.0  
          , kernel='rbf'  
          , degree=3   
          , gamma= gamma
          , coef0=0.0  
          , shrinking=True  
          , probability=True 
          , tol=0.001 
          , cache_size=200  
          , class_weight={0: 1, 1: classweight_y}  
          , verbose=False 
          , max_iter=-1  
          , decision_function_shape='ovo'  
          , break_ties=False 
          , random_state=42)

----------------------- Modellparameter ----------------------


In [94]:
' ############# scaling ##################'
scaler = StandardScaler()
' ######################## Pipeline (Scaler + model) für Gridsearch #################'
' ---------------- jetzt Pipeline mit Scaler und Modell erstellen -----'
svc_pipeline = Pipeline([('scaler', StandardScaler()), ('svc', svc)])

In [108]:
############# scope of gridsearch einstellen ###############
scope_of_gridsearch = "C_and_gamma_close_to_zero"
############################################################
if scope_of_gridsearch == "tradeoff_area": ####### liefert große Fläche ##########
    # Liste mit Klassengewichtungen erzeugen
    y_weights = list(range(9,11, 1))
    list_weight_classes = [{0: 1, 1: value} for value in y_weights]
    # list_of_C_values = np.arange(0.1, 1, 0.05) # sehr feines C-grid, erste Berechnung
    # list_of_gamma_values = np.arange(0.01, 0.5, 0.001)  # sehr feines gamma-grid, erste berechnung
    list_of_C_values = np.arange(0.005, 1.05, 0.05) # weniger feines C-grid, erste Berechnung
    list_of_gamma_values = np.arange(0.005, 0.305, 0.01)  # weniger feiner gamma-grid, zweite berechnung
    grid_search_name = "gridsearch_tradeoff_final.pkl"
elif scope_of_gridsearch == "C_and_gamma_close_to_zero": ############### kleine Ecke rund um Nullpunkt gamma und C ###
    # Liste mit Klassengewichtungen erzeugen
    y_weights = [11]
    list_weight_classes = [{0: 1, 1: value} for value in y_weights]
    list_of_C_values = np.arange(0.01, 0.1, 0.01)
    list_of_gamma_values = np.arange(0.001, 0.051, 0.001)
    grid_search_name = "gridsearch_C_and_gamma_close_to_zero.pkl"
elif scope_of_gridsearch == "only_gamma_close_to_zero": ######### Band für winzige gamma, fast vollständiger C-Bereich ############
    # Liste mit Klassengewichtungen erzeugen
    y_weights = [11]
    list_weight_classes = [{0: 1, 1: value} for value in y_weights]
    list_of_C_values = np.arange(0.1, 1, 0.05)
    list_of_gamma_values = np.arange(0.001, 0.01, 0.001)
    grid_search_name = "gridsearch_only_gamma_close_to_zero.pkl"
elif scope_of_gridsearch == "only_C_close_to_zero": ######### Restband für winzige C, vollständiger gamma-Bereich ############
    # Liste mit Klassengewichtungen erzeugen
    y_weights = [11]
    list_weight_classes = [{0: 1, 1: value} for value in y_weights]
    list_of_C_values = [0.01, 0.05]
    list_of_gamma_values = np.arange(0.01, 0.5, 0.01)
    grid_search_name = "gridsearch_only_C_close_to_zero.pkl"

print(len(list_weight_classes))
print(len(list_of_C_values))
print(len(list_of_gamma_values))
grid_parameters = {"svc__C": list_of_C_values
              , "svc__gamma": list_of_gamma_values  # relevant. Große werte machen kleine Bereiche und andersrum!
              , "svc__class_weight": list_weight_classes}

gridsearch = GridSearchCV(svc_pipeline
                          , grid_parameters
                          , scoring=["recall", "precision"]
                          , cv=6
                          , return_train_score=True
                          , refit ="recall")

1
9
50


In [109]:
# Warnungen ignorieren
warnings.filterwarnings("ignore")

In [110]:
print(datetime.now())

2025-01-13 17:22:34.477131


In [111]:
gridsearch.fit(X_train, Y_train)


KeyboardInterrupt



In [100]:
print(datetime.now())

2025-01-13 17:03:25.684773


In [101]:
############## Ergebnisse rausziehen #############
final_model=gridsearch.best_estimator_
cv_res = pd.DataFrame(gridsearch.cv_results_)
cv_res_important_values = cv_res[["mean_test_recall", "mean_train_recall", 'mean_test_precision','mean_train_precision',"param_svc__C", "param_svc__gamma", "param_svc__class_weight"]]
pd.set_option('display.max_rows', None)  # Zeilen: Zeige alle
pd.set_option('display.max_columns', None)  # Spalten: Zeige alle
ergebnisse_sortiert = cv_res_important_values.sort_values(by="mean_test_recall", ascending=False)

In [102]:
############ Ergebnisse aktuell ausgewählte Gridsearch abspeichern ###########
new_grid_search = {'Ergebnisse': ergebnisse_sortiert,
                'final_model': final_model,
                    "grid_parameters": grid_parameters}
with open(grid_search_name, 'wb') as data:
    # Einpacken von allen vorbereiteten Daten
    pickle.dump(new_grid_search, data, pickle.HIGHEST_PROTOCOL)

# Einzelstücke verkleben

In [113]:
# #################### oben neu erzeugte Ergebnisse öffnen ########################
with open(grid_search_name, 'rb') as f:
    print(grid_search_name)
    df = pickle.load(f)
ergebnis = df["Ergebnisse"]
# ######################## ältere gridsearchergebnisse öffnen ##########
with open("gridsearch_tradeoff_final.pkl", 'rb') as f:
    df = pickle.load(f)
ergebnis = df["Ergebnisse"]
with open("gridsearch_C_and_gamma_close_to_zero.pkl", 'rb') as f:
    df = pickle.load(f)
ergebnis1 = df["Ergebnisse"]
with open("gridsearch_only_gamma_close_to_zero.pkl", 'rb') as f:
    df = pickle.load(f)
ergebnis2 = df["Ergebnisse"]
with open("gridsearch_only_C_close_to_zero.pkl", 'rb') as f:
    df = pickle.load(f)
ergebnis3 = df["Ergebnisse"]
# ergebnis1.sort_values(by=["mean_train_recall","mean_test_recall"], ascending=[False, False]).head(3)

gridsearch_C_and_gamma_close_to_zero.pkl


In [114]:
############### alles zusammenhängen ranhängen #############
ergebnis_combined = pd.concat([ergebnis, ergebnis1, ergebnis2, ergebnis3])
ergebnis_combined = ergebnis_combined.sort_values(by=["mean_train_recall","mean_test_recall"], ascending=[False, False])

save_merged_results = 0
if save_merged_results ==1:
    combined_grid_search = {'Ergebnisse': ergebnis_combined,
                    'final_model': final_model,
                        "grid_parameters": grid_parameters}
    grid_search_name = "gridsearch_complete_tradeoff_area_final.pkl"
    with open(grid_search_name, 'wb') as data:
        # Einpacken von allen vorbereiteten Daten
        pickle.dump(combined_grid_search, data, pickle.HIGHEST_PROTOCOL)