In [131]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical

In [132]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        # 현재 패턴의 끝 인덱스를 찾음
        end_ix = i + n_steps
        # 시퀀스 끝을 넘으면 중단
        if end_ix > len(sequence)-1:
            break
        # 입력과 출력 부분을 나눔
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [133]:
# 데이터 로드
df = pd.read_csv("C:/datasets/abalone.csv")

In [134]:
df.isnull().sum()

id                0
Sex               0
Length            0
Diameter          0
Height            0
Whole_weight      0
Shucked_weight    0
Viscera_weight    0
Shell_weight      0
Rings             0
dtype: int64

In [135]:
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])

In [136]:
df.head()

Unnamed: 0,id,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,0,2,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,1,2,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,2,0,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,3,2,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,4,1,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [137]:
# 특성 및 타겟 설정
X = df.drop(columns=['Rings'])
y = df['Rings']

In [138]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [139]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3341, 9), (836, 9), (3341,), (836,))

In [140]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [141]:
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)
y_pred = linear_regressor.predict(X_test)

In [142]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse, r2

(4.960275930355892, 0.5417847866710463)

In [143]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, LSTM, Flatten, Dropout
from sklearn.preprocessing import MinMaxScaler

In [144]:
scaler = MinMaxScaler()
scaled_df = scaler.fit_transform(df.drop(['id'], axis=1))

In [145]:
n_steps = 3

In [149]:
X_seq, y_seq = split_sequence(scaled_df, n_steps)

In [150]:
# CNN + LSTM 모델 구성
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

model.summary

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


<bound method Model.summary of <Sequential name=sequential_13, built=True>>

In [151]:
X_seq_train, X_seq_test, y_seq_train, y_seq_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

In [152]:
history = model.fit(X_seq_train, y_seq_train, epochs=10, verbose=0, validation_data=(X_seq_test, y_seq_test))

In [156]:
model.evaluate(X_seq_test, y_seq_test, verbose=0)

0.05673278495669365