In [48]:
import pandas as pd
import os
import sys
from sklearn.decomposition import PCA
import librosa
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.manifold import TSNE
import torchaudio

def pca_plot(dimension, feature_matrix, class_num, width, height):
    # PCA 적용
    n_components = dimension
    pca = PCA(n_components=n_components)
    pca_result = pca.fit_transform(feature_matrix)
    print("Explained Variance Ratio:", pca.explained_variance_ratio_)

    cmap = px.colors.sequential.Viridis  # Viridis 색상 팔레트 사용

    if dimension == 2:
        # DataFrame 생성
        df = pd.DataFrame({'Feature1': pca_result[:, 0], 'Feature2': pca_result[:, 1], 'Class': label_list})

        # 'Class' 열의 값을 리스트로 변환
        df['Class'] = df['Class'].apply(lambda x: x.iloc[0])

        # 2D 산점도 그리기
        fig = px.scatter(df, x='Feature1', y='Feature2', size_max=1, color='Class',
                        title='PCA 2D Scatter Plot', labels={'Feature1': 'Feature 1', 'Feature2': 'Feature 2', 'Class': 'Class'})

        # 레전드 위치 조정
        fig.update_layout(legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1, itemsizing='constant'))

    elif dimension == 3:
        # DataFrame 생성
        df = pd.DataFrame({'Feature1': pca_result[:, 0], 'Feature2': pca_result[:, 1], 'Feature3': pca_result[:, 2], 'Class': label_list})
        df['Class'] = df['Class'].apply(lambda x: x.iloc[0])

        # 3D 산점도 그리기
        fig = px.scatter_3d(df, x='Feature1', y='Feature2', z='Feature3', color='Class',
                            title='PCA 3D Scatter Plot',
                            labels={'Feature1': 'Feature 1', 'Feature2': 'Feature 2', 'Feature3': 'Feature 3', 'Class': 'Class'})

        # 레전드 위치 조정
        fig.update_layout(legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1))

    else:
        print("Wrong dimension")
        return None

    # iplot 사용
    fig.update_layout(width=width, height=height)
    # fig.savefig('pca-%dd.png'%(dimension))
    fig.show()


def tsne_plot(feature_matrix, class_num, width=1000, height=800):
    # t-SNE 적용
    n_components = 2
    tsne = TSNE(n_components=n_components, random_state=42)
    tsne_result = tsne.fit_transform(feature_matrix)
    
    cmap = px.colors.sequential.Viridis  # Viridis 색상 팔레트 사용

    
    # DataFrame 생성
    df = pd.DataFrame({'Feature1': tsne_result[:, 0], 'Feature2': tsne_result[:, 1], 'Class': label_list})

    # 'Class' 열의 값을 리스트로 변환
    df['Class'] = df['Class'].apply(lambda x: x.iloc[0])

    # 2D 산점도 그리기
    fig = px.scatter(df, x='Feature1', y='Feature2', size_max=5, color='Class',
                    title='t-SNE 2D Scatter Plot', labels={'Feature1': 'Feature 1', 'Feature2': 'Feature 2', 'Class': 'Class'})

    # 레전드 위치 조정
    fig.update_layout(legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1, itemsizing='constant'))

    fig.update_layout(width=width, height=height)
    fig.show()


In [49]:
sys.path.append('/home/elicer/project/src/ml')
from transform import Wav2Vec2Extractor

feature_extractor = Wav2Vec2Extractor(**{'model_name': 'Rajaram1996/Hubert_emotion',
                                        'sample_rate': 16000,
                                        'audio_max_ms': 4000}).train_transform
            
def audio_to_feature_vector(audio_path):
    feature_vector = feature_extractor(torchaudio.load(audio_path)[0])
    return feature_vector

label_csv = pd.read_csv('/home/elicer/project/월간 데이콘 음성 감정 인식 AI 경진대회/train.csv')
audio_directory = '/home/elicer/project/월간 데이콘 음성 감정 인식 AI 경진대회/train/'

all_features = []
label_list = []

for filename in label_csv['file_path']:
    audio_path = os.path.join(audio_directory, filename)
    features = audio_to_feature_vector(audio_path)
    all_features.append(features)
    label_list.append(label_csv[label_csv['file_path']==filename]['label'])


# 특징 매트릭스 생성
feature_matrix = np.vstack(all_features)
print('feature_matrix.shape:', feature_matrix.shape)

class_num = 6
pca_plot(2, feature_matrix, class_num, 1200, 800)  # 2차원 산점도
pca_plot(3, feature_matrix, class_num, 1000, 1000)  # 3차원 산점도
tsne_plot(feature_matrix, class_num, width=700, height=700) # t-sne 2차원

feature_matrix.shape: (5001, 4000)
Explained Variance Ratio: [0.03362004 0.03155678]


Explained Variance Ratio: [0.03362004 0.03155675 0.02344172]


In [50]:
from torchaudio import transforms

spec = transforms.MelSpectrogram(
            sample_rate=16000,
            n_mels = 40, 
            n_fft = 512, 
            win_length = 400,
            hop_length = 160,
            f_min = 25,
            f_max = 7500,
            pad = 0,
        )

def audio_to_feature_vector(audio_path):
    x = spec(torchaudio.load(audio_path)[0])
    x = transforms.AmplitudeToDB(top_db=80)(x)
    return x[0].reshape(-1)
from torch.nn.utils.rnn import pad_sequence

label_csv = pd.read_csv('/home/elicer/project/월간 데이콘 음성 감정 인식 AI 경진대회/train.csv')
audio_directory = '/home/elicer/project/월간 데이콘 음성 감정 인식 AI 경진대회/train/'

all_features = []
label_list = []

for filename in label_csv['file_path']:
    audio_path = os.path.join(audio_directory, filename)
    features = audio_to_feature_vector(audio_path)
    all_features.append(features)
    label_list.append(label_csv[label_csv['file_path']==filename]['label'])
# 특징 매트릭스 생성
all_features = pad_sequence(all_features).mT
feature_matrix = np.vstack(all_features)
print('feature_matrix.shape:', feature_matrix.shape)

class_num = 6
pca_plot(2, feature_matrix, class_num, 1200, 800)  # 2차원 산점도
pca_plot(3, feature_matrix, class_num, 1000, 1000)  # 3차원 산점도
tsne_plot(feature_matrix, class_num, width=700, height=700) # t-sne 2차원

feature_matrix.shape: (5001, 19520)
Explained Variance Ratio: [0.21835695 0.10247604]


Explained Variance Ratio: [0.21835694 0.10247592 0.05756751]
