HW#6 신경망모델을 이용한 분류

In [7]:
# 필요한 라이브러리를 불러옵니다
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score

# Google Drive 마운트
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# 데이터 로드
data_path = '/content/drive/MyDrive/Colab Notebooks/shopping_data.csv'
data = pd.read_csv(data_path)

# 데이터의 'Revenue' 열이 boolean 타입인지 확인하고 정수형으로 변환합니다
if data['Revenue'].dtype != 'int':
    data['Revenue'] = data['Revenue'].astype(int)

# 범주형과 수치형 변수를 구분
categorical_features = ['Month', 'VisitorType']
numeric_features = data.select_dtypes(include=['int64', 'float64']).columns.tolist()
numeric_features.remove('Revenue')  # 타겟 변수 제외

# 전처리 파이프라인 생성
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop='first')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# 데이터 분할
X = data.drop('Revenue', axis=1)
y = data['Revenue']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 파이프라인에 전처리기 포함
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

# 전처리기 훈련
X_train_transformed = pipeline.fit_transform(X_train)

# 순차모델 구성, 활성화함수 적용, 뉴런의갯수 128(입력층,은닉층) -> 64(은닉층)  -> 1(출력층)
model = Sequential([
    Dense(128, activation='relu', input_shape=[X_train_transformed.shape[1]]),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 훈련
model.fit(X_train_transformed, y_train, epochs=100, verbose=0)

# 테스트 데이터 변환
X_test_transformed = pipeline.transform(X_test)

# 예측 및 평가
y_pred = model.predict(X_test_transformed)
y_pred = (y_pred > 0.5).astype(int)
accuracy = accuracy_score(y_test, y_pred)

print(f'모델의 정확도: {accuracy:.4f}')


Mounted at /content/drive
모델의 정확도: 0.8751
