In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import os
import cv2
import json
import random
import numpy as np
import joblib
from datetime import timedelta

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
class CustomVideoDataset(Dataset):
    def __init__(self, base_path,  video_names, violation_type, annotation_file, fragment_length=50, overlap=100, transform=None, ):
        self.base_path = base_path
        self.video_names = video_names
        self.transform = transform
        self.annotation_file = annotation_file
        self.violation_type = violation_type
        self.target_resolution = (360, 360)
        self.fragment_length = fragment_length
        self.overlap = overlap
        sum_frame = self.sum_frames_in_folder()
        self.len = sum_frame // self.fragment_length + 1

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return next(self.get_fragment(idx))

    def get_fragment(self, idx):
        for video_name in self.video_names:
            with open(self.annotation_file, 'r', encoding='utf-8') as f:
                annotations = json.load(f)
            cap = cv2.VideoCapture(os.path.join(self.base_path,video_name))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            frame_rate = cap.get(cv2.CAP_PROP_FPS)


            intervals = []
            for annotation in annotations:
                if annotation["name"] == video_name and self.violation_type == annotation['type']:
                    intervals.append((self.time_to_seconds(annotation["start"]), self.time_to_seconds(annotation['end'])))
            if len(intervals) == 0:
                continue
            labels = np.zeros(total_frames)
            for start_time, end_time in intervals:
                start_frame = int(frame_rate * start_time)
                end_frame = int(frame_rate * end_time)
                labels[start_frame:end_frame+1] = 1

            #if start_frame - 20 >= 0:
                #cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame-20)
            #else:
                #cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
            frames = []
            frame_count = 0
            for i in range(total_frames):
                frame_count += 1
                ret, frame = cap.read()
                if not ret:
                    break
                if self.transform:
                    frame = self.transform(frame)
                frame = cv2.resize(frame, self.target_resolution)
                frame = torch.from_numpy(frame).float()
                frames.append(frame)

                if frame_count >= self.fragment_length:
                    video_fragment = torch.stack(frames)
                    yield video_fragment, torch.from_numpy(labels[i+1-frame_count:i+1]).float()
                    frames = []
                    frame_count = 0

            cap.release()

    def count_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        return total_frames

    def sum_frames_in_folder(self):
        total_frames = 0
        for file in self.video_names:
            video_path = os.path.join(self.base_path, file)
            total_frames += self.count_frames(video_path)
        return total_frames

    def time_to_seconds(self, time_str):
        """Преобразует строку формата MM:SS в количество секунд"""
        minutes, seconds = map(int, time_str.split(':'))
        return minutes * 60 + seconds

In [3]:
# Путь к папке с видео
video_dir = '../data/train'

# Путь к файлу разметки
annotation_file = '../data/train/разметка.json'
output_path = "../data/подлезание/"

# Путь к папке, куда будут сохранены последовательности
violation_type = "Подлезание под вагоны стоящего состава"
target_resolution = (640, 480)

# Подготовка данных
#X, y = prepare_dataset(video_dir, annotation_file, violation_type, target_resolution)

In [4]:
vide_names = os.listdir(video_dir)

# Задаем долю для тестовой выборки (например, 20%)
test_ratio = 0.1

# Вычисляем количество элементов для тестовой выборки
num_test = int(len(vide_names) * test_ratio)

# Рандомно выбираем элементы для тестовой выборки
test_set = random.sample(vide_names, num_test)

# Формируем обучающую выборку
train_set = [x for x in vide_names if x not in test_set]

In [5]:
train_dataset = CustomVideoDataset(video_dir, train_set, violation_type, annotation_file)
val_dataset = CustomVideoDataset(video_dir, test_set, violation_type, annotation_file)

In [6]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

In [7]:
"""
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
"""

'\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)\ntrain_dataset = CustomDataset(X_train, y_train)\ntest_dataset = CustomDataset(X_test, y_test)\ntrain_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)\nval_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)\n'

In [8]:
data, lable = next(iter(train_loader))

In [9]:
data.shape

torch.Size([1, 50, 360, 360, 3])

In [10]:
lable.shape

torch.Size([1, 50])

In [11]:
class CNNLSTM(nn.Module):
    def __init__(self, num_classes):
        super(CNNLSTM, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 24, 3),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(24, 48, 3)
            )
        #self.cnn = nn.Sequential(*list(self.cnn.children())[:-2])  # Убираем полносвязный слой

        self.lstm_input_size = self._get_conv_output_size((3, 360, 360))
        self.lstm = nn.LSTM(self.lstm_input_size, 32, batch_first=True)
        self.fc = nn.Linear(32, num_classes)

    def _get_conv_output_size(self, shape):
        o = self.cnn(torch.zeros(1, *shape))
        return int(torch.prod(torch.tensor(o.size())))

    def forward(self, x):
        batch_size, seq_length, h, w, c = x.size()
        x = x.view(batch_size * seq_length, c, h, w)
        x = self.cnn(x)
        x = x.view(batch_size, seq_length, -1)
        x, _ = self.lstm(x)
        x = self.fc(x)
        return x

model = CNNLSTM(num_classes=1)  # Одна выходная нейрона для вероятности нарушения


In [12]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, delta=0, path='checkpoint.pt'):
        """
        Args:
            patience (int): Сколько эпох ждать после последнего улучшения метрики
            verbose (bool): Выводить сообщения о каждой проверке (по умолчанию False)
            delta (float): Минимальное изменение для улучшения метрики
            path (str): Путь для сохранения модели
        """
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.path = path
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Сохранение модели, если валидационная потеря уменьшилась'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss


In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cuda


In [14]:
# Инициализация модели, функции потерь и оптимизатора
model.to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Параметры ранней остановки
early_stopping = EarlyStopping(patience=10, verbose=True)

# Цикл обучения
# Lists to store metrics for plotting
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

num_epochs = 1000
for epoch in range(num_epochs):
    # Training
    model.train()
    size = len(train_dataset)
    for batch, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(data)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    # Validation
    model.eval()
    val_loss = 0
    predictions = []
    targets = []
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            val_loss += loss.item()
            predictions.extend(output.argmax(dim=1).tolist())
            targets.extend(target.tolist())

    val_loss = val_loss / len(val_loader)

    # Calculate metrics
    accuracy = accuracy_score(targets, predictions)
    precision = precision_score(targets, predictions)
    recall = recall_score(targets, predictions)
    f1 = f1_score(targets, predictions)

    # Append metrics to lists for plotting
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)

    print(f'Epoch {epoch+1}, Validation Loss: {val_loss:.6f}, Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')

    # Plotting
    plt.figure()
    plt.plot(range(1, epoch+2), accuracy_list, label='Accuracy')
    plt.plot(range(1, epoch+2), precision_list, label='Precision')
    plt.plot(range(1, epoch+2), recall_list, label='Recall')
    plt.plot(range(1, epoch+2), f1_list, label='F1 Score')
    plt.xlabel('Epoch')
    plt.ylabel('Metrics')
    plt.legend()
    plt.show()

    # Early stopping condition
    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping")
        break

# Load the best model weights
model.load_state_dict(torch.load('checkpoint.pt'))

  return F.mse_loss(input, target, reduction=self.reduction)


loss: 0.285660  [    0/ 4527]
loss: 0.001165  [   10/ 4527]
loss: 0.000233  [   20/ 4527]
loss: 0.000286  [   30/ 4527]
loss: 0.000321  [   40/ 4527]
loss: 0.000307  [   50/ 4527]
loss: 0.000290  [   60/ 4527]
loss: 0.000368  [   70/ 4527]
loss: 0.000680  [   80/ 4527]
loss: 0.007860  [   90/ 4527]
loss: 0.000161  [  100/ 4527]
loss: 0.000012  [  110/ 4527]
loss: 0.000096  [  120/ 4527]
loss: 0.000043  [  130/ 4527]
loss: 0.000022  [  140/ 4527]
loss: 0.000015  [  150/ 4527]
loss: 0.000014  [  160/ 4527]
loss: 0.000014  [  170/ 4527]
loss: 0.000014  [  180/ 4527]
loss: 0.000014  [  190/ 4527]
loss: 0.000014  [  200/ 4527]
loss: 0.000014  [  210/ 4527]
loss: 0.000014  [  220/ 4527]
loss: 0.000014  [  230/ 4527]
loss: 0.000014  [  240/ 4527]
loss: 0.000014  [  250/ 4527]
loss: 0.000014  [  260/ 4527]
loss: 0.000014  [  270/ 4527]
loss: 0.000013  [  280/ 4527]
loss: 0.000013  [  290/ 4527]
loss: 0.000013  [  300/ 4527]
loss: 0.000013  [  310/ 4527]
loss: 0.000013  [  320/ 4527]
loss: 0.00

KeyboardInterrupt: 

In [None]:
model.load_state_dict(torch.load('checkpoint.pt'))

In [None]:
import numpy as np

def get_timestamps(predictions, threshold=0.5, frame_rate=30, window_size=1):
    """
    Получение таймкодов нарушений на основе предсказаний модели.

    predictions: np.array, форма [seq_length]
        Предсказанные вероятности наличия нарушения для каждого окна кадров.
    threshold: float
        Пороговое значение для определения нарушения.
    frame_rate: int
        Количество кадров в секунду.
    window_size: int
        Количество кадров в одном окне.

    Возвращает список кортежей (start_time, end_time) для каждого обнаруженного нарушения.
    """
    timestamps = []
    in_violation = False
    start_time = None

    for i, prob in enumerate(predictions):
        if prob >= threshold and not in_violation:
            in_violation = True
            start_time = i * window_size / frame_rate
        elif prob < threshold and in_violation:
            in_violation = False
            end_time = (i + 1) * window_size / frame_rate
            timestamps.append((start_time, end_time))

    # Если нарушение продолжается до конца видео
    if in_violation:
        end_time = len(predictions) * window_size / frame_rate
        timestamps.append((start_time, end_time))

    return timestamps

# Пример предсказаний (для упрощения, генерируем случайные данные)
predictions = np.random.rand(10)  # 10 окон кадров
predictions[2:4] = 0.7  # Пример нарушения
predictions[7:8] = 0.8  # Пример нарушения

# Получение таймкодов нарушений
timestamps = get_timestamps(predictions, threshold=0.5, frame_rate=30, window_size=1)
print(timestamps)  # Ожидаемый выход: [(start_time, end_time), ...]
