In [1]:
# 다음 데이터 세트를 이용하여 다이아몬드 가격예측(회귀) 

# url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv" diamonds = pd.read_csv(url) 

# 1. ML (RF, DT, LR) 수행

# 2. Dense layer만 이용 FNCC 구현

# 3. 순환 데이터 변환후 CNN 구현



# 각 단계별로 수행완료후

# 검사받은 이후에 다음 단계 진행

In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
# 1. 데이터 로드 및 전처리
df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/diamonds.csv")

In [7]:
df

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.20,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...,...
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.50
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61
53937,0.70,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74


In [8]:
# 원핫 인코딩
df = pd.get_dummies(df, columns=['cut', 'color', 'clarity'])

In [9]:
# 특성과 타겟 분리
X = df.drop(['price'], axis=1)
y = df['price']

In [10]:
# 데이터 스케일링
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [11]:
# 시퀀스 데이터 생성 함수
def split_sequence(sequence, n_steps):
    X, y = [], []
    for i in range(len(sequence)-n_steps):
        X.append(sequence[i:i+n_steps])
        y.append(sequence[i+n_steps])
    return np.array(X), np.array(y)

In [12]:
# 시퀀스 데이터 생성
n_steps = 3
X_seq, y_seq = split_sequence(X_scaled, n_steps)

In [13]:
# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, 
                                                    test_size=0.2, 
                                                    random_state=42)

In [22]:
# CNN 모델 구축 (회귀)
model = Sequential([
    Conv1D(filters=64, kernel_size=2, activation='relu', 
           input_shape=(n_steps, X.shape[1])),
    MaxPooling1D(pool_size=2),
    Dense(32, activation='relu'),
    Dense(X.shape[1])
])

In [23]:
# 모델 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [24]:
# 모델 학습
history = model.fit(X_train, y_train, 
                   epochs=100, 
                   batch_size=32,
                   validation_split=0.2,
                   verbose=1)

Epoch 1/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 0.1019 - mae: 0.2032 - val_loss: 0.0926 - val_mae: 0.1916
Epoch 2/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - loss: 0.0926 - mae: 0.1933 - val_loss: 0.0925 - val_mae: 0.1935
Epoch 3/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - loss: 0.0926 - mae: 0.1933 - val_loss: 0.0925 - val_mae: 0.1927
Epoch 4/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0924 - mae: 0.1930 - val_loss: 0.0925 - val_mae: 0.1922
Epoch 5/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0926 - mae: 0.1932 - val_loss: 0.0924 - val_mae: 0.1935
Epoch 6/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 0.0925 - mae: 0.1931 - val_loss: 0.0924 - val_mae: 0.1920
Epoch 7/100
[1m1079/1079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

KeyboardInterrupt: 

In [None]:
# 모델 평가
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# MSE, MAE 계산
train_mse = mean_squared_error(y_train, train_predict)
test_mse = mean_squared_error(y_test, test_predict)
train_mae = mean_absolute_error(y_train, train_predict)
test_mae = mean_absolute_error(y_test, test_predict)

print(f'Train MSE: {train_mse:.4f}')
print(f'Test MSE: {test_mse:.4f}')
print(f'Train MAE: {train_mae:.4f}')
print(f'Test MAE: {test_mae:.4f}')