In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 데이터 로드
df = pd.read_csv("../d")

# 불필요한 컬럼 제거
df.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], inplace=True)

# 결측치 처리
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)
df['Fare'].fillna(df['Fare'].median(), inplace=True)

# 범주형 변수 변환
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})  # Label Encoding
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

# 🔥 새로운 Feature 추가 (점수 향상)
df['FamilySize'] = df['SibSp'] + df['Parch'] + 1  # 가족 크기
df['IsAlone'] = (df['FamilySize'] == 1).astype(int)  # 혼자인지 여부
df['FarePerPerson'] = df['Fare'] / df['FamilySize']  # 1인당 운임비

# X, y 설정
X = df.drop(columns=['Survived'])
y = df['Survived']

# 훈련/테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 표준화
scaler = StandardScaler()
X_train[['Age', 'Fare', 'FarePerPerson']] = scaler.fit_transform(X_train[['Age', 'Fare', 'FarePerPerson']])
X_test[['Age', 'Fare', 'FarePerPerson']] = scaler.transform(X_test[['Age', 'Fare', 'FarePerPerson']])

# 🔥 최적의 하이퍼파라미터 탐색
param_grid = {'C': [0.01, 0.1, 1, 10, 100], 'penalty': ['l2']}
grid = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

# 최적 모델
best_model = grid.best_estimator_

# 예측
y_pred = best_model.predict(X_test)

# 정확도
accuracy = accuracy_score(y_test, y_pred)
print(f"🔥 정확도: {accuracy:.4f}")

# 상세 평가
print("Classification Report:\n", classification_report(y_test, y_pred))

# 혼동 행렬 시각화
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Survived', 'Survived'], yticklabels=['Not Survived', 'Survived'])
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
