### 경로 설정

In [None]:
import os

# 변경할 디렉토리 경로
new_path = '/content/drive/MyDrive/youtube'

# 디렉토리 변경
os.chdir(new_path)

# 변경된 디렉토리 출력
print("현재 경로:", os.getcwd())

현재 경로: /content/drive/MyDrive/youtube


### 데이터 다운로드

In [None]:
# !curl data.yt8m.org/download.py | partition=2/video/validate mirror=asia python

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
>> Downloading http://asia.data.yt8m.org/2/video/validate/validateTx.tfrecord 100.4%Succesfully downloaded validate2813.tfrecord 1248421 bytes.
Successfully downloaded validate2813.tfrecord


Downloading: validate2097.tfrecord
>> Downloading http://asia.data.yt8m.org/2/video/validate/validateHZ.tfrecord 100.2%Succesfully downloaded validate2097.tfrecord 1333242 bytes.
Successfully downloaded validate2097.tfrecord


Downloading: validate2435.tfrecord
>> Downloading http://asia.data.yt8m.org/2/video/validate/validateNr.tfrecord 100.4%Succesfully downloaded validate2435.tfrecord 1281382 bytes.
Successfully downloaded validate2435.tfrecord


Downloading: validate1349.tfrecord
>> Downloading http://asia.data.yt8m.org/2/video/validate/validatevV.tfrecord 100.6%Succesfully downloaded validate1349.tfrecord 1319087 bytes.
Successfully downloaded validate1349.tfrecord


Downloading: validate1491.tfrecord
>> Downloading http://asia.data.yt8m.org/2

### 데이터 로드 및 전처리

In [None]:
from torchdata.datapipes.iter import FileLister, FileOpener
datapipe1 = FileLister(".", "*.tfrecord")
datapipe2 = FileOpener(datapipe1, mode="b")
dp = datapipe2.load_from_tfrecord()

In [None]:
dp

TFRecordLoaderIterDataPipe

In [None]:
type(dp)

In [None]:
dp_iter = iter(dp)


first_dp = next(dp_iter)
second_dp = next(dp_iter)
third_dp = next(dp_iter)


print("first_dp : ",first_dp)
print("second_dp : ",second_dp)
print("third_dp : ",third_dp)

first_dp :  {'mean_audio': tensor([ 0.9267,  0.4857,  0.8598,  0.3353, -0.1296,  0.0260,  0.7640,  0.5293,
        -1.4497,  0.8693, -0.7343,  0.6396,  0.5051,  0.8306, -0.6718, -0.7527,
        -0.4628, -0.3049, -1.0142,  0.4080,  0.5704, -0.6675, -1.1520, -1.1420,
        -0.9271,  1.2328, -0.3878, -0.4780, -0.3206,  0.2193,  0.4585,  0.0176,
        -0.9125, -0.4352,  0.1248, -0.7745,  0.6508,  0.8498, -0.7571,  0.4497,
         0.3109, -0.8420,  0.1525,  0.3093,  0.5593, -0.8665,  0.8311, -0.2873,
        -0.1012, -0.4112,  0.0880, -0.9859, -0.6037,  0.5276, -0.0525,  0.7347,
         0.6046,  0.6086, -1.1697, -0.1417, -0.5101, -0.5204, -0.3758, -0.6102,
        -1.1584, -0.6985, -0.7198,  0.7707,  0.0710, -0.3478, -0.5245,  0.6103,
        -0.0502,  0.1651,  0.1008,  0.6937, -0.0585, -0.5083, -0.2555,  0.6085,
         0.0367, -0.3302,  0.0837, -0.5465, -0.2298, -0.5779, -1.0859,  0.3489,
         0.4608, -0.1158,  0.1177, -0.1882, -0.6757,  0.1752,  0.5754,  0.6299,
        -0.53

In [None]:
# 이진 인코딩을 위한 함수 정의
def binary_encoding(label, num_classes):
    binary_label = torch.zeros(num_classes)  # 모든 값을 0으로 초기화
    for l in label:
        binary_label[l] = 1  # 해당하는 클래스의 인덱스에 1 설정
    return binary_label

Multi-Class Label: [1, 2, 19], Binary Encoded: tensor([0., 1., 1.,  ..., 0., 0., 0.])
Multi-Class Label: [1, 56, 892, 3001], Binary Encoded: tensor([0., 1., 0.,  ..., 0., 0., 0.])
Multi-Class Label: [5, 76, 774], Binary Encoded: tensor([0., 0., 0.,  ..., 0., 0., 0.])


In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
import torchdata


# 데이터로더 생성
batch_size = 128

from torch.utils.data import DataLoader

def collate_fn(batch):
    collated_batch = {
        'mean_audio': torch.stack([torch.tensor(data['mean_audio']).clone().detach() for data in batch]),
        'mean_rgb': torch.stack([torch.tensor(data['mean_rgb']).clone().detach() for data in batch]),
        'labels': torch.stack([binary_encoding(data['labels'], num_classes) for data in batch])
    }
    return collated_batch

# DataLoader 생성 시 collate_fn 매개변수에 collate_fn 함수 지정
dataloader = DataLoader(dp, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)




### 모델 생성 및 학습

In [None]:
import torch
import torch.nn as nn

class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(input_size * 2, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, mean_audio, mean_rgb):
        # 입력 데이터를 하나의 텐서로 결합
        combined_input = torch.cat((mean_audio, mean_rgb), dim=1)
        out = self.fc1(combined_input)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [None]:
cnt = 0
for batch in dataloader:
    cnt= cnt + 1

print("배치 수 :", cnt)

  'mean_audio': torch.stack([torch.tensor(data['mean_audio']).clone().detach() for data in batch]),
  'mean_rgb': torch.stack([torch.tensor(data['mean_rgb']).clone().detach() for data in batch]),


배치 수 : 8691


In [None]:
# 모델, 손실 함수, 최적화기 설정
input_size = 576  # 입력 피처의 크기 (mean_audio와 mean_rgb의 크기에 따라 달라질 수 있음)
hidden_size = 4  # 은닉층의 크기
num_classes = 3862  # 클래스의 개수

model = SimpleMLP(input_size, hidden_size, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 모델 학습
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0
    for batch in dataloader:
        # 데이터 및 레이블 가져오기
        mean_audio, mean_rgb, labels = batch['mean_audio'], batch['mean_rgb'], batch['labels']

        # 순전파 + 역전파 + 최적화
        optimizer.zero_grad()
        outputs = model(mean_audio, mean_rgb)  # 모델에 입력 데이터 전달
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # 에폭마다 손실 출력
    print(f"Epoch {epoch+1}, Loss: {total_loss / cnt}")

  'mean_audio': torch.stack([torch.tensor(data['mean_audio']).clone().detach() for data in batch]),
  'mean_rgb': torch.stack([torch.tensor(data['mean_rgb']).clone().detach() for data in batch]),


Epoch 1, Loss: 13.61286085267265
Epoch 2, Loss: 12.055239590192853
Epoch 3, Loss: 11.804041288703074
Epoch 4, Loss: 11.65611647034306
Epoch 5, Loss: 11.564100341985613
Epoch 6, Loss: 11.503540342030488
Epoch 7, Loss: 11.459672722768735
Epoch 8, Loss: 11.426715806608383
Epoch 9, Loss: 11.400802133437502
Epoch 10, Loss: 11.381092849637515
