In [2]:
import pandas as pd
df = pd.read_csv('credit_scores.csv')
df = df.drop(columns=["SSN", "NAME", "Customer_ID", "ID"])


In [3]:
X = df.drop(columns=["Credit_Score"])
y = df["Credit_Score"]

In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=1)

In [7]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC


num_features = X.select_dtypes(include=['int64', 'float64']).columns
cat_features = X.select_dtypes(include=['object']).columns
num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])
cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer([
    ('num', num_pipeline, num_features),
    ('cat', cat_pipeline, cat_features)
])


In [8]:
from sklearn.model_selection import GridSearchCV

clf_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', SVC())
])
param_grid = {
    'classifier__kernel': ['linear', 'rbf'],
    'classifier__C': [0.01, 10, 20]
}
grid_search = GridSearchCV(clf_pipeline, param_grid, cv=2)
grid_search.fit(X_train, y_train)


In [None]:
from sklearn.metrics import accuracy_score
import joblib
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of the best model: {accuracy}')
best_model.fit(X, y)
joblib.dump(best_model, 'model.pkl')

Accuracy of the best model: 0.6390302304699191
