In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

# 데이터 로드
train_data = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

# 필요한 열 선택 및 전처리
df = train_data[['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']]
df.loc[:, 'Age'] = df['Age'].fillna(df['Age'].mean())
X = df.drop('Survived', axis=1)
y = df['Survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 모델 정의
models = {
    'Logistic Regression': LogisticRegression(max_iter=200),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine': SVC(),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

In [3]:
# 모델 학습 및 평가
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    results[model_name] = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred),
        'Recall': recall_score(y_test, y_pred),
        'F1 Score': f1_score(y_test, y_pred)
    }

results_df = pd.DataFrame(results).T

results_df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score
Logistic Regression,0.731844,0.770833,0.5,0.606557
Decision Tree,0.653631,0.59375,0.513514,0.550725
Random Forest,0.73743,0.714286,0.608108,0.656934
Support Vector Machine,0.653631,0.75,0.243243,0.367347
K-Nearest Neighbors,0.675978,0.642857,0.486486,0.553846
