In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout


In [28]:

# 1. 데이터 로드 및 전처리
abalone_data = pd.read_csv('D:GC/abalone.csv')
abalone_data

Unnamed: 0,id,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...,...
4172,4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [29]:
# 'Sex' 열은 범주형이므로 원-핫 인코딩을 수행합니다.
abalone_data = pd.get_dummies(abalone_data, columns=['Sex'], drop_first=True)
# 'id' 열은 의미가 없으므로 삭제합니다.
abalone_data = abalone_data.drop(columns=['id'])


In [34]:
# 특징(X)과 타겟(y) 분리 (Rings가 타겟)
X = abalone_data.drop(columns=['Rings'])
y = abalone_data['Rings']

# 데이터를 훈련(90%), 테스트(10%) 세트로 나눕니다.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# NumPy 배열로 변환 (시퀀스 생성을 위해 필요)
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

X_train = X_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

In [35]:
# 4. 시퀀스 데이터를 생성하기 위한 함수 정의 (n_steps = 10로 설정)
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the sequence
        if end_ix > len(sequence) - 1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)


In [36]:
# 시퀀스 생성 (n_steps=10)
n_steps = 10
X_train_seq, y_train_seq = split_sequence(np.hstack([X_train, y_train[:, None]]), n_steps)
X_test_seq, y_test_seq = split_sequence(np.hstack([X_test, y_test[:, None]]), n_steps)

print(f"훈련 시퀀스 데이터 크기: {X_train_seq.shape}")
print(f"테스트 시퀀스 데이터 크기: {X_test_seq.shape}")

훈련 시퀀스 데이터 크기: (3749, 10, 10)
테스트 시퀀스 데이터 크기: (408, 10, 10)


In [37]:
# 5. CNN + LSTM 모델 구축
model = Sequential()

# 1D CNN Layer (특징 추출)
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_steps, X_train_seq.shape[2])))
model.add(MaxPooling1D(pool_size=1))  # MaxPooling의 크기를 1로 조정하여 차원 축소를 방지

# LSTM Layer (시퀀스 패턴 학습)
model.add(LSTM(50, activation='relu'))

# Fully Connected Layer (회귀 문제이므로 Dense 레이어 사용)
model.add(Dense(50, activation='relu'))

# Output Layer (나이 예측, 회귀이므로 출력 노드는 1)
model.add(Dense(1))

# 6. 모델 컴파일 (회귀 문제이므로 MSE를 손실 함수로 사용)
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# 모델 구조 확인
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [38]:

# 7. 모델 훈련 (검증 데이터를 훈련 데이터의 10%로 설정)
history = model.fit(X_train_seq, y_train_seq, epochs=10, validation_split=0.1)

# 8. 모델 평가
loss, mae = model.evaluate(X_test_seq, y_test_seq)
print(f"테스트 세트 성능 - 손실(MSE): {loss}, 평균 절대 오차(MAE): {mae}")


Epoch 1/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - loss: 9.7760 - mae: 1.6955 - val_loss: 9.6309 - val_mae: 1.7516
Epoch 2/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 9.2724 - mae: 1.7201 - val_loss: 9.6412 - val_mae: 1.8576
Epoch 3/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 9.4250 - mae: 1.7260 - val_loss: 9.6445 - val_mae: 1.6808
Epoch 4/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 9.1135 - mae: 1.6993 - val_loss: 9.6320 - val_mae: 1.7303
Epoch 5/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 9.3849 - mae: 1.7241 - val_loss: 9.6297 - val_mae: 1.7577
Epoch 6/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 9.3195 - mae: 1.7174 - val_loss: 9.6309 - val_mae: 1.7463
Epoch 7/10
[1m106/106[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - 