In [1]:
import torch
import clip
import laion_clap
import numpy as np
import librosa
from msclap import CLAP
#from transformers import ClapModel, ClapProcessor

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.is_available()

True

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [6]:
model1, preprocess1 = clip.load("ViT-B/32", device=device)

In [None]:
# quantization
def int16_to_float32(x):
    return (x / 32767.0).astype(np.float32)


def float32_to_int16(x):
    x = np.clip(x, a_min=-1., a_max=1.)
    
#model2 = laion_clap.CLAP_Module(enable_fusion=False)
#model2.load_ckpt() # download the default pretrained checkpoint.
model2 = laion_clap.CLAP_Module(enable_fusion=False, amodel= 'HTSAT-base')
model2.load_ckpt('../Models/Laion_CLAP/music_audioset_epoch_15_esc_90.14.patched.pt')

**CLIP**&**CLAP** textdata-processing ->save to .npy

In [None]:
import pandas as pd
import torch
import numpy as np
import pickle

def clap_process_and_add_embedding(csv_path, column_name, save_path):
    try:
        # CSVファイルの読み込み
        df = pd.read_csv(csv_path)

        # 新しい列 "clap_text_embed_A" と "clap_text_embed_B" を追加
        df["clip_text_embed"] = None
        df["clap_text_embed"] = None

        # 指定された列のデータをリストとして取得
        text_data_list = df[column_name].tolist()

        # NumPy配列で結果を保存するためのリスト（AとBの処理結果用）
        embedding_array_list_clip = []
        embedding_array_list_clap = []

        # ERROR回数カウント変数
        e_num = 0

        # テキストデータを順に処理して "clap_text_embed" に格納
        for text_data in text_data_list:
            try:
                # text_dataを2要素の配列に変換
                text_data_array = [text_data, ""]
                
                with torch.no_grad():
                    # model2.get_text_embeddingの前にtext_dataを配列に変換
                    token = clip.tokenize(text_data).to(device)
                    text_embed_clip = model1.encode_text(token)

                    text_embed_clap = model2.get_text_embedding(text_data_array, use_tensor=True).to(device)
                    

                    # [2, 512]のTensorから第一要素を取り出し、[1, 512]のTensorにする
                    text_embed_clap = text_embed_clap[0].unsqueeze(0)

                    # CUDAデバイス上のテンソルをCPU上のNumPy配列に変換
                    text_embed_np_clip = text_embed_clip.cpu().numpy()
                    text_embed_np_clap = text_embed_clap.cpu().numpy()

                # テンソルをNumPy配列に変換してリストに追加
                embedding_array_list_clip.append(text_embed_np_clip)
                embedding_array_list_clap.append(text_embed_np_clap)

            except Exception as e:
                # エラーが発生した場合、エラー内容を表示して保存する
                error_message = str(e)
                embedding_array_list_clip.append(np.array([float('nan')] * 512))  # エラーの場合はNaNで埋める
                embedding_array_list_clap.append(np.array([float('nan')] * 512))  # エラーの場合はNaNで埋める
                e_num = e_num +1
                print(f"エラーが発生しました: {error_message}")

        # NumPy配列のリストを結合して、2次元のNumPy配列に変換
        embedding_array_clip = np.vstack(embedding_array_list_clip)
        embedding_array_clap = np.vstack(embedding_array_list_clap)

        # 結果をデータフレームに格納
        df["clip_text_embed"] = embedding_array_clip.tolist()
        df["clap_text_embed"] = embedding_array_clap.tolist()

        # NumPy配列を保存
        #with open(save_path + "_A.npy", 'wb') as f:
        #    np.save(f, embedding_array_A)
        #with open(save_path + "_B.npy", 'wb') as f:
        #    np.save(f, embedding_array_B)
        
        # NumPy配列を保存
        with open(save_path, 'wb') as f:
            pickle.dump((embedding_array_clip, embedding_array_clap), f)

        # 結果を表示
        print("結果が保存されました。")
        print(f"ERROR数：{e_num}")

    except Exception as e:
        print(f"エラーが発生しました: {e}")

# CSVファイル、抽出列、保存パスを指定
csv_file_path = "../Datasets/archive/musiccaps-processed.csv"
target_column_name = "aspect_list"
numpy_save_path = "../Datasets/archive/text_embeds.npy"

clap_process_and_add_embedding(csv_file_path, target_column_name, numpy_save_path)


In [4]:
import torch
import numpy as np

# npyファイルからNumPy配列を読み込む
numpy_save_path = "../Datasets/archive/text_embeds.npy"
numpy_array = np.load(numpy_save_path, allow_pickle=True)
#print(numpy_array.shape)
# NumPy配列をPyTorchのテンソルに変換する
tensor = torch.tensor(numpy_array)
print(tensor.shape)

# 最初の要素を表示する
print("最初の要素1:", tensor[0][6])
print("最初の要素2:", tensor[1][6])

torch.Size([2, 5521, 512])
最初の要素1: tensor([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, n

In [14]:
print(tensor[0][0].shape)
print(tensor[0][0].unsqueeze(0))
print(tensor[0][0].unsqueeze(0).shape)

torch.Size([512])
tensor([[ 4.7363e-02, -9.1614e-02,  3.0347e-01, -6.8054e-02, -8.0322e-02,
          5.9052e-02,  5.0934e-02,  3.8330e-02,  2.1802e-01, -2.1301e-01,
          2.6947e-02,  1.5976e-02, -4.8370e-02,  3.5254e-01, -1.5356e-01,
          5.7404e-02, -9.7021e-01, -2.4548e-01, -1.2152e-01, -4.8218e-01,
         -3.1543e-01, -5.6610e-03, -1.6345e-01,  4.3121e-02, -1.0571e-01,
          1.5137e-01,  6.2256e-02,  1.0651e-01, -1.2924e-02,  4.2419e-02,
          3.3905e-02,  4.9530e-02,  3.0426e-02, -3.6279e-01,  9.1003e-02,
         -2.1863e-01,  3.6285e-02, -5.9619e-01,  1.5405e-01, -2.0837e-01,
         -4.3750e-01, -3.8257e-01,  1.1530e-01, -1.4148e-01, -2.1545e-01,
          5.7422e-01,  2.5293e-01, -1.9836e-01,  3.0127e-01,  1.6260e-01,
         -5.9937e-02, -3.1714e-01, -2.0447e-01,  1.2671e-01,  2.2388e-01,
          2.0459e-01, -1.4868e-01, -2.7435e-02,  4.6606e-01, -2.6099e-01,
          4.6045e-01, -1.5247e-01, -2.9877e-02, -1.4368e-01,  2.5781e-01,
          1.6077e-01

**Reshaping Data**

In [5]:
import numpy as np
import torch

# npyファイルを読み取り
data = np.load('../Datasets/archive/text_embeds.npy', allow_pickle=True)

# テンソル化
tensor_data = torch.tensor(data)

# 形状を変更
reshaped_tensor = tensor_data.permute(1, 0, 2)  # [5521, 2, 512]に並べ直す

print(reshaped_tensor.shape)


torch.Size([5521, 2, 512])


**Save Reshaped Data**

In [13]:
# PyTorchテンソルをNumPy配列に変換
reshaped_array = reshaped_tensor.numpy()

# npyファイルとして保存
np.save('../Datasets/archive/reshaped_text_embeds.npy', reshaped_array)

**Read Saved Reshaped Data**

In [16]:
# npyファイルを読み取り
redata = np.load('../Datasets/archive/reshaped_text_embeds.npy', allow_pickle=True)

# テンソル化
tensor_data = torch.tensor(redata)
print(redata.shape)
print(redata[0][0])

(5521, 2, 512)
[ 4.73632812e-02 -9.16137695e-02  3.03466797e-01 -6.80541992e-02
 -8.03222656e-02  5.90515137e-02  5.09338379e-02  3.83300781e-02
  2.18017578e-01 -2.13012695e-01  2.69470215e-02  1.59759521e-02
 -4.83703613e-02  3.52539062e-01 -1.53564453e-01  5.74035645e-02
 -9.70214844e-01 -2.45483398e-01 -1.21520996e-01 -4.82177734e-01
 -3.15429688e-01 -5.66101074e-03 -1.63452148e-01  4.31213379e-02
 -1.05712891e-01  1.51367188e-01  6.22558594e-02  1.06506348e-01
 -1.29241943e-02  4.24194336e-02  3.39050293e-02  4.95300293e-02
  3.04260254e-02 -3.62792969e-01  9.10034180e-02 -2.18627930e-01
  3.62854004e-02 -5.96191406e-01  1.54052734e-01 -2.08374023e-01
 -4.37500000e-01 -3.82568359e-01  1.15295410e-01 -1.41479492e-01
 -2.15454102e-01  5.74218750e-01  2.52929688e-01 -1.98364258e-01
  3.01269531e-01  1.62597656e-01 -5.99365234e-02 -3.17138672e-01
 -2.04467773e-01  1.26708984e-01  2.23876953e-01  2.04589844e-01
 -1.48681641e-01 -2.74353027e-02  4.66064453e-01 -2.60986328e-01
  4.604492