In [1]:
import random

import torchvision.models as models
import torch.nn as nn
import pandas as pd
import torch
from torch.utils.data import Dataset
import cv2
import numpy as np
from torch.utils.data import DataLoader
import torch
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch
import torch.nn as nn
import torchvision.models as models

In [2]:

import torch.nn.init as init
def init_weights(m):
    if isinstance(m, torch.nn.Conv3d):
        init.xavier_uniform_(m.weight.data)
        if m.bias is not None:
            init.zeros_(m.bias.data)


In [3]:
from models import MobileNetV2

In [4]:
#model.apply(init_weights)

In [5]:
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 100
SEQ_LENGTH = 30
NUM_WORKERS = 0
pos_weight = torch.tensor([1.5]).cuda()
name = "MobileNetV2len30size224w4V4"


In [6]:
model = MobileNetV2.get_model(sample_size = IMG_SIZE, num_classes =1, in_channels = SEQ_LENGTH)


In [7]:
model.load_state_dict(torch.load(f'{name}.pth', map_location=torch.device('cpu')))


<All keys matched successfully>

In [8]:
#model = nn.DataParallel(model).cuda()
model = model.to(device)

In [9]:
#model.load_state_dict(torch.load(f'{name}.pth'))

In [10]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
])


In [11]:
import torch
import torchvision.transforms as transforms
import random



In [12]:
import torch
import random
import torchvision.transforms as transforms

def augment_video(video_tensor):
    flip = random.random() > 0.5
    rotation_degree = random.uniform(-15, 15)
    grayscale = True  # Всегда делаем изображение черно-белым

    transform = transforms.Compose([
        transforms.RandomRotation(degrees=(rotation_degree, rotation_degree)),
        transforms.Grayscale(num_output_channels=1) if grayscale else transforms.Lambda(lambda x: x)
    ])

    augmented_video = transform(video_tensor)

    if flip:
        augmented_video = torch.flip(augmented_video, [-1])

    return augmented_video


In [13]:
class CustomDataset(Dataset):
    def __init__(self, root_path="", df_path="train.csv", img_size=224, SEQ_LENGTH=15, transform=None):
        self.SEQ_LENGTH = SEQ_LENGTH
        self.root_path = root_path
        self.img_size = img_size
        df = pd.read_csv(df_path)
        self.video_paths = df['path'].tolist()
        self.labels = df['label'].tolist()
        self.transform = transform
        unique_labels = sorted(set(self.labels))
        print(unique_labels)
        self.label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
        self.idx_to_label = {idx: label for label, idx in self.label_to_idx.items()}

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        cap = cv2.VideoCapture(self.video_paths[idx])
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        frames = []
        if total_frames >= self.SEQ_LENGTH:
            frame_indices = np.linspace(0, total_frames - 1, self.SEQ_LENGTH, dtype=int)
        else:
            frame_indices = np.tile(np.arange(total_frames), self.SEQ_LENGTH // total_frames + 1)[:self.SEQ_LENGTH]

        for i in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                break

            #
            frame_sum = np.sum(frame, axis = 2)
            y_idx, x_idx = np.where(frame_sum>0)
            if len(y_idx) == 0 and len(x_idx) == 0:
                return self.__getitem__(idx-1)
            y_min, y_max = y_idx.min(), y_idx.max()
            x_min, x_max = x_idx.min(), x_idx.max()
            
            cropped_frame = frame[y_min:y_max, x_min:x_max]
            h, w, _ = cropped_frame.shape
            
            if h>w:
                pad = int((h-w)/2)
                padded_frame = np.pad(cropped_frame, ((0,0), (pad,pad), (0,0)))
            else:
                pad = int((w-h)/2)
                padded_frame = np.pad(cropped_frame, ((pad,pad), (0,0), (0,0)))
            
            
            frame_tensor = self.transform(padded_frame).unsqueeze(0)
            frames.append(frame_tensor)

        while len(frames) < self.SEQ_LENGTH:
            frames.append(torch.zeros_like(frames[0]))

        frames_tensor = torch.cat(frames, dim=0)
        frames_tensor = augment_video(frames_tensor)
        #frames_tensor = frames_tensor.squeeze(1)
        #frames_tensor = frames_tensor.permute(1,0,2)
        #print(frames_tensor.shape)
        return frames_tensor, self.label_to_idx[self.labels[idx]]

In [14]:
train_dataset = CustomDataset(df_path="train.csv", transform=transform, SEQ_LENGTH = SEQ_LENGTH)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_dataset = CustomDataset(df_path="test.csv", transform=transform,  SEQ_LENGTH = SEQ_LENGTH)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

['not_violence', 'violence']
['not_violence', 'violence']


In [15]:
data = train_dataset[0][0]

In [16]:
data.shape

torch.Size([30, 1, 224, 224])

In [17]:
#model = nn.DataParallel(model).cuda()

criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight).cuda()
optimizer = torch.optim.Adam(model.parameters())


In [None]:
import logging
import torch
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, precision_score, recall_score
from torch.utils.tensorboard import SummaryWriter  # Добавлено

open(f'{name}.log', 'w').close()
logging.basicConfig(filename=f'{name}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
best_valid_loss = 0

# Инициализация SummaryWriter
writer = SummaryWriter()

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.float().to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        labels = labels.unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        predicted = (outputs > 0.5).float()  # Применяем пороговую функцию для бинарной классификации
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        if i % 10 == 9:
            precision = precision_score(labels.cpu().numpy(), predicted.cpu().numpy())
            recall = recall_score(labels.cpu().numpy(), predicted.cpu().numpy())
            f1 = f1_score(labels.cpu().numpy(), predicted.cpu().numpy())
            print('[%d, %5d] loss: %.7f accuracy: %.3f Precision: %.3f Recall: %.3f F1: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100, correct / total, precision, recall, f1))
            logging.info('[%d, %5d] loss: %.7f accuracy: %.3f Precision: %.3f Recall: %.3f F1: %.3f' %
                         (epoch + 1, i + 1, running_loss / 100, correct / total, precision, recall, f1))
            # Запись данных в TensorBoard
            writer.add_scalar('Training Loss', running_loss / 100, epoch * len(train_loader) + i)
            writer.add_scalar('Training Accuracy', correct / total, epoch * len(train_loader) + i)
            writer.add_scalar('Training Precision', precision, epoch * len(train_loader) + i)
            writer.add_scalar('Training Recall', recall, epoch * len(train_loader) + i)
            writer.add_scalar('Training F1', f1, epoch * len(train_loader) + i)
            running_loss = 0.0
            correct = 0
            total = 0

    model.eval()

    with torch.no_grad():
        all_predictions = []
        all_labels = []
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()  # Применяем пороговую функцию для бинарной классификации
            all_predictions.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        accuracy = accuracy_score(all_labels, all_predictions)
        precision = precision_score(all_labels, all_predictions)
        recall = recall_score(all_labels, all_predictions)
        f1 = f1_score(all_labels, all_predictions)
        confusion = confusion_matrix(all_labels, all_predictions)

        # Запись данных в TensorBoard после валидации
        writer.add_scalar('Validation Accuracy', accuracy, epoch)
        writer.add_scalar('Validation Precision', precision, epoch)
        writer.add_scalar('Validation Recall', recall, epoch)
        writer.add_scalar('Validation F1', f1, epoch)

        # Вывод F1-меры, аккуратности и матрицы ошибок
        logging.info('Accuracy: %.3f' % accuracy)
        logging.info('Precision: %.3f' % precision)
        logging.info('Recall: %.3f' % recall)
        logging.info('F1 Score: %.3f' % f1)
        logging.info('Confusion Matrix:\n %s' % confusion)
        print('Accuracy: %.3f' % accuracy)
        print('Precision: %.3f' % precision)
        print('Recall: %.3f' % recall)
        print('F1 Score: %.3f' % f1)
        print('Confusion Matrix:\n', confusion)

        if f1 > best_valid_loss:
            best_valid_loss = f1
            torch.save(model.state_dict(), f'{name}.pth')
            logging.info('Model saved with accuracy: %.3f' % (accuracy))
            print('Model saved with accuracy: %.3f' % (accuracy))

# Закрытие SummaryWriter
writer.close()

print('Finished Training')


[1,    10] loss: 0.0730140 accuracy: 0.794 Precision: 0.500 Recall: 0.500 F1: 0.500


In [37]:
torch.save(model.state_dict(), f'last{23}.pth')
