In [None]:
# # TensorFlow 설치
# %pip install tensorflow

# # PyTorch 설치 (기본 CPU 버전)
# %pip install torch torchvision torchaudio

#### 데이터 불러오기

In [None]:
# # 데이터 불러오기 1 : 기존 수치형 데이터
# import pandas as pd
# url = "C:\\Users\\Playdata2\\Downloads\\tree_model_preprocessed.csv"
# df = pd.read_csv(url)
# df.head()

In [2]:
# 데이터 불러오기 2 : 로그변환 수치형 데이터
import pandas as pd
url = "../../DL_data/dataset/re_log_model_preprocessed.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,subscription_age,service_failure_count,download_over_limit,churn,contract_type_active,contract_type_expired,contract_type_no_contract,sub_both,sub_movie,sub_none,sub_tv,bill_avg_log,download_avg_log,upload_avg_log
0,11.95,0,0,0,1,0,0,0,0,0,1,3.258097,2.24071,1.193922
1,8.22,0,0,1,0,0,1,0,0,1,0,0.0,0.0,0.0
2,8.91,0,0,1,0,1,0,0,0,0,1,2.833213,2.687847,0.641854
3,6.87,1,0,1,0,0,1,0,0,1,0,3.091042,0.0,0.0
4,6.39,0,0,1,0,0,1,0,0,1,0,0.0,0.0,0.0


In [None]:
print("샘플 수:", df.shape[0])   # 첫 번째 값이 행(샘플 수)
print("특징 수:", df.shape[1])   # 두 번째 값이 열(특징 수)
print("전체 데이터 갯수:", df.count().sum())

print(f"데이터 크기: {df.shape[0]}개 샘플, {df.shape[1]}개 특징")

print(f'클래스 불균형 확인\n{df['churn'].value_counts()}')
print(f'불균형 비율 ≈ 1.26 : 1')

샘플 수: 71892
특징 수: 14
전체 데이터 갯수: 1006488
데이터 크기: 71892개 샘플, 14개 특징
클래스 불균형 확인
churn
1    40049
0    31843
Name: count, dtype: int64
불균형 비율 ≈ 1.26 : 1


#### 데이터 생성

In [10]:
# 스케일링 별 데이터 생성
# 1. 라이브러리 임포트
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler

# 2. 입력(X), 타깃(y) 분리
X = df.drop('churn', axis=1)
y = df['churn']

# 3. 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 스케일링
# StandardScaler (기본), MinMaxScaler (정규화), RobustScaler (이상치 많을 때)
scaler = [StandardScaler(), MinMaxScaler(), RobustScaler()]

X_train_scaled = scaler[0].fit_transform(X_train)
# X_train_MMS_scaled = scaler[1].fit_transform(X_train)
# X_train_RS_scaled = scaler[2].fit_transform(X_train)

X_test_scaled = scaler[0].transform(X_test)
# X_test_MMS_scaled = scaler[1].transform(X_test)
# X_test_RS_scaled = scaler[2].transform(X_test)

#### 기존 수치형 데이터 와 로그변환 데이터 성능 비교

In [None]:
# 0. 라이브러리 임포트
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report

In [None]:
# 기존 수치형 데이터 와 로그변환 데이터 비교용 모델
# StandardScaler, layer 2, 64-32-1, relu, sigmoid, adam, binary_crossentropy
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== Log Data ===")
# print(f"\n=== Num Data ===")
print(classification_report(y_test, y_pred))

# 로그변환 모델이 0.1만큼 좋았다.

#### TensorFlow

In [None]:
# 0. 라이브러리 임포트
# import pandas as pd
# import numpy as np
# from tensorflow import keras
# from tensorflow.keras import layers
# from sklearn.metrics import classification_report

##### 레이어층 2, 은닉층 구조별 성능 출력
```
64 -> 32 -> 1
32 -> 16 -> 1
16 -> 8 -> 1 *
```

In [None]:
# 64-32-1
# StandardScaler, layer 2, 64-32-1, relu, sigmoid, adam, binary_crossentropy
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 64-32-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 32-16-1
# 기존 수치형 데이터 와 로그변환 데이터 비교용 모델
# StandardScaler, layer 2, 32-16-1, relu, sigmoid, adam, binary_crossentropy
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 32-16-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 16-8-1
# 기존 수치형 데이터 와 로그변환 데이터 비교용 모델
# StandardScaler, layer 2, 16-8-1, relu, sigmoid, adam, binary_crossentropy
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 16-8-1 ===")
print(classification_report(y_test, y_pred))

##### 레이어층 3, 뉴런조합별 성능 출력
- 피라미드형
- 늘렸다 줄이기
- 줄였다 늘리기
과적합 일어날시 Drop out을 사용하지만
Drop out적용해서 성능 수치 올라가는 경우도 있다고 하니, Drop out사용하기

##### 피라미드형

In [None]:
# 128 -> 64 -> 32 -> 1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 128 -> 64 -> 32 -> 1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 64  -> 32 -> 16 -> 1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 64-32-16-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 32 -> 16 -> 8 -> 1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 32-16-8-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 16 -> 8 -> 4 -> 1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(8, activation='relu'),
    layers.Dense(4, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 16-8-4-1 ===")
print(classification_report(y_test, y_pred))

##### 줄였다 늘리기

In [None]:
# 64-32-64-1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 64-32-64-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 32-16-32-1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 32-16-32-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 16-8-16-1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(8, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 16-8-16-1 ===")
print(classification_report(y_test, y_pred))

##### 늘였다 줄이기

In [None]:
# 32-64-32-1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 32-64-32-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 16-32-16-1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 16-32-16-1 ===")
print(classification_report(y_test, y_pred))

In [None]:
# 8-16-8-1
# 1. 모델 정의
model = keras.Sequential([
    layers.Dense(8, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 2. 모델 컴파일
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 3. 학습
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, verbose=2)

# 4. 평가
test_loss, test_acc = model.evaluate(X_test_scaled, y_test)
print(f"\n Test Accuracy: {test_acc:.4f}")

# 4. 예측
y_pred = (model.predict(X_test_scaled) > 0.5).astype(int)

# 6. 성능 조회
print(f"\n=== 8-16-8-1 ===")
print(classification_report(y_test, y_pred))