In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

RANDOM_STATE = 42


In [7]:
columns = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num',
    'marital-status', 'occupation', 'relationship', 'race', 'sex',
    'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income'
]

In [8]:
df = pd.read_csv('../data/raw/adult/adult.data', header=None, names=columns, index_col=False, na_values=[' ?','?'])

In [9]:
df_processed = df.dropna().copy()

In [11]:
X = df_processed.drop('income', axis = 1)
y = df_processed['income']

In [14]:
X.select_dtypes(include='object').columns

Index(['workclass', 'education', 'marital-status', 'occupation',
       'relationship', 'race', 'sex', 'native-country'],
      dtype='object')

In [15]:
X_encoded = pd.get_dummies(X, drop_first=True)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=RANDOM_STATE)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=X_test.columns)

print("--- Trenowanie modelu SVC ---")
svc = SVC(kernel='rbf', probability=True, random_state=RANDOM_STATE)
svc.fit(X_train_scaled, y_train)

y_pred_svc = svc.predict(X_test_scaled)
y_proba_svc = svc.predict_proba(X_test_scaled)[:, 1]

print("\nRaport Klasyfikacji dla SVC:")
print(classification_report(y_test, y_pred_svc))
print("Accuracy dla SVC:", accuracy_score(y_test, y_pred_svc))
print("AUC dla SVC:", roc_auc_score(y_test, y_proba_svc))


--- Trenowanie modelu SVC ---

Raport Klasyfikacji dla SVC:
              precision    recall  f1-score   support

       <=50K       0.87      0.94      0.90      4503
        >50K       0.76      0.58      0.66      1530

    accuracy                           0.85      6033
   macro avg       0.81      0.76      0.78      6033
weighted avg       0.84      0.85      0.84      6033

Accuracy dla SVC: 0.8471738770097795
AUC dla SVC: 0.8952848427845489
