In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# CSV 파일 로드
data = pd.read_csv('dataset.csv')

# 데이터 정보 확인
data.info()

# 기본 통계량 확인
data.describe()

# 결측치 확인
data.isnull().sum()

# 결측치 제거, 확인
df = df.dropna()
df.isnull().sum()

In [None]:
# pairplot를 사용해 각 상관관계 살펴보기

sns.pairplot(df,
             vars=['mean radius', 'mean texture', 'mean perimeter', 'mean area'],
            hue ='label')
plt.show()

In [None]:
# 산점도 확인

import matplotlib.pyplot as plt
import numpy as np

plt.scatter(df['mean radius'], df['mean texture'])
plt.xlabel('Mean Radius')
plt.ylabel('Mean Texture')

plt.show()

In [None]:
# 컬렴명 확인
df.columns

# 레이블 갯수 확인 (그래프)
sns.countplot(data = df, x="label")
plt.xlabel("Cancer or Not")
plt.ylabel("Count")
plt.title("Label")

# 레이블 갯수 확인 (숫자)
df['label'].value_counts()

# 레이블별 갯수 확인
label_counts = df['activity'].value_counts()

# 바 그래프 그리기
label_counts.plot(kind='bar')

# 그래프 제목 및 라벨 설정
plt.title('Label Count')
plt.xlabel('Labels')
plt.ylabel('Count')

In [None]:
# 원 핫 인코딩(문자열의 경우)후 train/test 분할

encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X)

from sklearn.model_selection import train_test_split
X_train,X_test, y_train,y_test = train_test_split(X_encoded,Y,test_size=0.2,random_state=0) 

# random_state는 0일 때 매번 같은 세트, 임의의 정수는 무작위 시드로 분할함

In [None]:
# train/test 분할

from sklearn.model_selection import train_test_split

X = data.drop('label_column', axis=1)
y = data['label_column']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
# split_sequence 정의

def split_sequence(sequence, n_steps):
    X, y = [], []
    for i in range(len(sequence)):
        end_ix = i + n_steps
        # 시퀀스 끝을 넘어가면 중단
        if end_ix > len(sequence) - 1:
            break
        # 입력 시퀀스와 출력 시퀀스 분리
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# 데이터 분할

X, y = split_sequence(sequence, n_steps)

In [None]:
# 모델

from keras.layers import LSTM

def create_cnn_lstm_model(input_shape, output_shape):
    model = Sequential([
        Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),
        Conv1D(64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        LSTM(50, activation='relu'),
        Dense(64, activation='relu'),
        Dense(output_shape, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:
# 층 추가한 모델
# Dense 층 조절하는게 제일 좋을 듯

from keras.models import Sequential
from keras.models import Model
from keras.layers import Conv1D, MaxPooling1D, UpSampling1D, Dense, Flatten, Input, LSTM, Dropout

def create_cnn_lstm_model(input_shape, output_shape):
    model = Sequential([
        Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),
        
        Conv1D(64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),
        
        Conv1D(128, kernel_size=3, activation='relu'),  # 더 깊은 Conv1D 층 추가
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        
        Conv1D(256, kernel_size=3, activation='relu'),  # 추가된 또 다른 Conv1D 층
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        
        LSTM(100, activation='relu'),  # LSTM의 유닛 수 증가
        Dropout(0.2),
        
        Dense(128, activation='relu'),  # 추가된 Dense 층
        Dropout(0.2),
        
        Dense(64, activation='relu'),  # 추가된 Dense 층
        Dense(output_shape, activation='sigmoid')  # 출력층
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


# 오토 인코더

def create_autoencoder(input_shape):
    input_layer = Input(shape=input_shape)
    
    # 인코더
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(input_layer)
    x = MaxPooling1D(pool_size=2)(x)
    
    x = Conv1D(64, kernel_size=3, activation='relu', padding='same')(x)
    encoded = MaxPooling1D(pool_size=2)(x)  # 이 레이어가 저차원 잠재 공간 (compressed representation)

    # 디코더
    x = Conv1D(64, kernel_size=3, activation='relu', padding='same')(encoded)
    x = UpSampling1D(size=2)(x)
    
    x = Conv1D(32, kernel_size=3, activation='relu', padding='same')(x)
    decoded = UpSampling1D(size=2)(x)
    
    # 입력과 같은 크기로 복원
    decoded = Conv1D(input_shape[1], kernel_size=3, activation='sigmoid', padding='same')(decoded)

    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='mse')
    
    return autoencoder


# 오토인코더 + CNN + LSTM

def create_cnn_lstm_with_autoencoder(input_shape, output_shape):
    # 오토인코더 생성 및 학습
    autoencoder = create_autoencoder(input_shape)
    autoencoder.summary()
    
    # 오토인코더의 인코더 부분을 가져옴 (즉, 특징을 추출하는 부분)
    encoder = Model(inputs=autoencoder.input, outputs=autoencoder.layers[4].output)  # encoded layer 선택
    
    # CNN-LSTM 모델 구성
    model_input = Input(shape=input_shape)
    encoded_features = encoder(model_input)  # 오토인코더의 인코더로부터 추출된 특징을 사용
    
    # CNN + LSTM 층 추가
    x = Conv1D(64, kernel_size=3, activation='relu')(encoded_features)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(0.2)(x)
    
    x = LSTM(50, activation='relu')(x)
    x = Dropout(0.2)(x)
    
    x = Dense(64, activation='relu')(x)
    output_layer = Dense(output_shape, activation='sigmoid')(x)
    
    cnn_lstm_model = Model(model_input, output_layer)
    cnn_lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
    return autoencoder, cnn_lstm_model

In [None]:
# 모델 학습

input_shape = (100, 1)  # n_steps=100, n_features=1
output_shape = 1  # 이진 분류이므로 1

# 모델 생성
model = create_cnn_lstm_model(input_shape, output_shape)

# 모델 요약 정보 출력
model.summary()

# 데이터 준비 후 모델 학습
# model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_val, y_val))


In [None]:
# 모델 성능 평가

loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}, Test Accuracy: {accuracy}')