In [8]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# 라이브러리 로드
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import confusion_matrix, classification_report
import warnings
warnings.filterwarnings('ignore')

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("fedesoriano/heart-failure-prediction")

print("Path to dataset files:", path)

Path to dataset files: /root/.cache/kagglehub/datasets/fedesoriano/heart-failure-prediction/versions/1


In [None]:
# 데이터 로드
data = pd.read_csv('/content/drive/MyDrive/heart.csv')

In [None]:
data.head()

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0


In [None]:
X=data.drop('HeartDisease',axis=1)
y=data['HeartDisease']

In [None]:
encoder = LabelEncoder()
for column in X.columns:
    if X[column].dtype == 'object':
        X[column] = encoder.fit_transform(X[column])

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, shuffle=True)

In [None]:
# machine-learning (classification)
model = RandomForestClassifier(n_estimators=100 , criterion = "entropy" , max_depth= 4 ,random_state= 0)
scores = cross_val_score(model, X, y, cv=k_fold, scoring='accuracy')
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean()*100)
print("Standard deviation:", scores.std())

# 학습
model.fit(X_train, y_train)

Cross-validation scores: [0.86956522 0.88586957 0.88043478 0.82513661 0.84699454]
Mean accuracy: 86.1600142551675
Standard deviation: 0.02258363910132184


In [None]:
# 예측
y_predTest=model.predict(X_test)
y_predTrain=model.predict(X_train)

print("\nAccuracy Score:")
print(f"Train Accuracy: {accuracy_score(y_train, y_predTrain)*100:.2f}")
print(f"Test Accuracy: {accuracy_score(y_test, y_predTest)*100:.2f}")


Accuracy Score:
Train Accuracy: 89.24
Test Accuracy: 85.87


In [None]:
# CONFUSION_MATRIX
print("Confusion Matrix (Test):")
print(confusion_matrix(y_test, y_predTest))

Confusion Matrix (Test):
[[67 10]
 [16 91]]


In [None]:
# Report
print("분류 보고서:\n")
print(classification_report(y_test, y_predTest))


Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.87      0.84        77
           1       0.90      0.85      0.88       107

    accuracy                           0.86       184
   macro avg       0.85      0.86      0.86       184
weighted avg       0.86      0.86      0.86       184



In [None]:
# K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
accuracy_scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index] # Use .iloc for row-based indexing
    y_train, y_test = y.iloc[train_index], y.iloc[test_index] # Use .iloc for row-based indexing

    model = LogisticRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

print("평균 정확도:", np.mean(accuracy_scores))

평균 정확도: 0.8518294131622713


In [None]:
# Bootstrapping

n_iterations = 1000

# 결과 저장을 위한 리스트
accuracy_scores = []

In [None]:
# Bootstrapping 반복
for i in range(n_iterations):
    # 훈련 데이터에서 bootstrapping 샘플 생성
    # replace=True로 설정하여 중복 허용
    indices = np.random.choice(X_train.index, size=len(X_train), replace=True)
    X_boot = X_train.loc[indices]
    y_boot = y_train.loc[indices]

In [None]:
    # 모델 학습
    model = LogisticRegression()
    model.fit(X_boot, y_boot)

In [None]:
    # 테스트 데이터로 예측 및 성능 평가
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

In [None]:
# Bootstrapping 결과 출력
print("평균 정확도:", np.mean(accuracy_scores))
print("정확도 표준 편차:", np.std(accuracy_scores))

# 분류 보고서 출력 (마지막 모델 기준)
print("분류 보고서:\n", classification_report(y_test, y_pred))

평균 정확도: 0.8469945355191257
정확도 표준 편차: 0.0
분류 보고서:
               precision    recall  f1-score   support

           0       0.91      0.77      0.83        91
           1       0.80      0.92      0.86        92

    accuracy                           0.85       183
   macro avg       0.86      0.85      0.85       183
weighted avg       0.86      0.85      0.85       183



In [None]:
cd /content/drive/MyDrive

/content/drive/MyDrive


In [None]:
!git clone https://kyeong-min-kang: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@github.com/kyeong-min-kang/4th_industrial_revolution_-_AI.git

fatal: destination path '4th_industrial_revolution_-_AI' already exists and is not an empty directory.


In [17]:
cd /content/drive/MyDrive/4th_industrial_revolution_-_AI

/content/drive/MyDrive/4th_industrial_revolution_-_AI


In [18]:
!git config --global user.email 'kkm981024@naver.com'
!git config --global user.name 'kyeong-min-kang'

In [19]:
!git add Test_code.ipynb

In [None]:
pwd

'/content/drive/MyDrive/4th_industrial_revolution_-_AI'

In [15]:
!git commit -m test
!git push --help' for details.

On branch main
Your branch is based on 'origin/main', but the upstream is gone.
  (use "git branch --unset-upstream" to fixup)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   Test_code.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")
/bin/bash: -c: line 1: unexpected EOF while looking for matching `''
/bin/bash: -c: line 2: syntax error: unexpected end of file
