<a href="https://colab.research.google.com/github/jsjj10002/FackVoiceClassfication/blob/main/%EB%AA%A8%EB%8D%B8_%ED%95%99%EC%8A%B5_TPU_%ED%99%98%EA%B2%BD_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TPU 환경 구축

In [None]:
import tensorflow as tf

# TPUClusterResolver 초기화
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU 이름 없이 호출
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)




### 필요 라이브러리 호출

In [None]:
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import KFold
from tensorflow.keras.layers import Dropout, Conv1D, MaxPooling1D, Bidirectional, LSTM, Flatten
from tensorflow.keras.regularizers import l1_l2

# mfcc 특징 추출 데이터 불러오기

In [None]:
!gdown https://drive.google.com/uc?id=1axGFU6vG3jr742bwVRUW2Y1f2uIILotX

Downloading...
From: https://drive.google.com/uc?id=1axGFU6vG3jr742bwVRUW2Y1f2uIILotX
To: /content/mfcc_df.pkl
100% 16.1M/16.1M [00:00<00:00, 44.1MB/s]


## 데이터 분할

In [None]:
#데이터 불러오기
mfcc_df = pd.read_pickle('/content/mfcc_df.pkl')

train_df = mfcc_df[mfcc_df['category'] == 'training']
val_df = mfcc_df[mfcc_df['category'] == 'validation']
test_df = mfcc_df[mfcc_df['category'] == 'testing']

print("Training Data Shape:", train_df.shape)
print("Validation Data Shape:", val_df.shape)
print("Testing Data Shape:", test_df.shape)
X_train = np.vstack(train_df['mfcc'].apply(lambda x: np.array(x)).values)
Y_train = train_df['label'].values

X_val = np.vstack(val_df['mfcc'].apply(lambda x: np.array(x)).values)
Y_val = val_df['label'].values

X_test = np.vstack(test_df['mfcc'].apply(lambda x: np.array(x)).values)
Y_test = test_df['label'].values

# StandardScaler를 사용하여 특징 데이터의 스케일을 조정합니다.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# 문자열 레이블을 숫자로 변환.
encoder = LabelEncoder()
Y_train = encoder.fit_transform(Y_train)
Y_val = encoder.transform(Y_val)
Y_test = encoder.transform(Y_test)

# 분류 작업을 위해 레이블을 원-핫 인코딩 형식으로 변환.
Y_train = to_categorical(Y_train)
Y_val = to_categorical(Y_val)
Y_test = to_categorical(Y_test)

print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_val shape:", X_val.shape)
print("Y_val shape:", Y_val.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

# 모델 구축 - CNN, 양방향 LSTM, L1-L2 정규화 이용

In [None]:
def build_advanced_model(input_shape, output_units):
  with strategy.scope():
    model = Sequential([
        # 첫 번째 합성곱 레이어와 풀링
        Conv1D(64, kernel_size=5, activation='relu', padding='same', input_shape=input_shape, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        # 두 번째 합성곱 레이어와 풀링
        Conv1D(128, kernel_size=5, activation='relu', padding='same', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        # 세 번째 합성곱 레이어와 풀링
        Conv1D(256, kernel_size=5, activation='relu', padding='same', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
        MaxPooling1D(pool_size=2),
        Dropout(0.3),
        # 양방향 LSTM 레이어
        Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4))),
        Dropout(0.3),
        # 데이터 평탄화
        Flatten(),
        # 완전 연결 레이어
        Dense(128, activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
        Dropout(0.5),
        # 출력 레이어
        Dense(output_units, activation='softmax')
    ])
    # 컴파일
    model.compile(optimizer=Adam(learning_rate=0.01), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

##모델 학습 밎 검증- k-fold 교차검증

In [None]:
# X와 Y 데이터 병합
X = np.concatenate([X_train, X_val, X_test], axis=0)
Y = np.concatenate([Y_train, Y_val, Y_test], axis=0)
print("X shape:", X.shape)
print("Y shape:", Y.shape)

X shape: (101692, 13)
Y shape: (101692, 2)


In [None]:
# k-Fold 교차 검증 설정
n_splits = 5
input_shape = (X.shape[1], 1) # 입력 차원 설정
output_units = np.unique(Y).shape[0] # 출력 유닛 수는 유니크한 레이블 수

kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_no = 1

accuracies = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]

     # 데이터셋 최적화
    train_data = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
    train_data = train_data.cache().shuffle(10000).batch(128).prefetch(tf.data.experimental.AUTOTUNE)
    test_data = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
    test_data = test_data.batch(128).prefetch(tf.data.experimental.AUTOTUNE)

    # 모델 생성 및 훈련
    model = build_advanced_model(input_shape, output_units)
    print(f'Training fold {fold_no}...')
    model.fit(train_data, epochs=20, validation_data=test_data)

    # 평가
    _, accuracy = model.evaluate(X_test, Y_test, verbose=0)
    accuracies.append(accuracy)
    print(f'Score for fold {fold_no}: Accuracy of {accuracy*100:.2f}%')
    fold_no += 1


Training fold 1...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Score for fold 1: Accuracy of 95.47%
Training fold 2...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Score for fold 2: Accuracy of 95.52%
Training fold 3...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Score for fold 3: Accuracy of 95.27%
Training fold 4...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/

## 모델 평가

In [None]:
# 최종 평가: 평균 정확도 및 표준 편차 계산
average_accuracy = np.mean(accuracies)
std_deviation = np.std(accuracies)
print(f'Final Mean Accuracy: {average_accuracy*100:.2f}%')
print(f'Standard Deviation of Accuracies: {std_deviation*100:.2f}%')
# 모델 요약
print("------[model_summary]------")
model.summary()

Final Mean Accuracy: 95.52%
Standard Deviation of Accuracies: 0.20%
------[model_summary]------
Model: "sequential_18"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_54 (Conv1D)          (None, 13, 64)            384       
                                                                 
 max_pooling1d_54 (MaxPooli  (None, 6, 64)             0         
 ng1D)                                                           
                                                                 
 dropout_90 (Dropout)        (None, 6, 64)             0         
                                                                 
 conv1d_55 (Conv1D)          (None, 6, 128)            41088     
                                                                 
 max_pooling1d_55 (MaxPooli  (None, 3, 128)            0         
 ng1D)                                                           
                       

## 모델 저장
Fake or Real Classification model(FoRC)

In [None]:
from google.colab import files
model.save('FoR_C_model.h5')  # 모델 저장
files.download('FoR_C_model.h5')  # 이 파일을 로컬로 다운로드

  saving_api.save_model(


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>