In [None]:
import os
from ultralytics import YOLO

def normalize_annotation(annotation_path):
    with open(annotation_path, 'r') as file:
        lines = file.readlines()

    normalized_lines = []
    for line in lines:
        parts = line.strip().split()
        class_id = parts[0]
        coordinates = list(map(float, parts[1:]))

        # Проверка и нормализация координат
        for i in range(len(coordinates)):
            if coordinates[i] < 0:
                coordinates[i] = 0
            elif coordinates[i] > 1:
                coordinates[i] = 1

        normalized_line = f"{class_id} " + " ".join(map(str, coordinates))
        normalized_lines.append(normalized_line)

    with open(annotation_path, 'w') as file:
        file.write("\n".join(normalized_lines))

def normalize_annotations_in_directory(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.txt'):
                annotation_path = os.path.join(root, file)
                normalize_annotation(annotation_path)

# Нормализация аннотаций
normalize_annotations_in_directory('/home/jupyter/work/resources/dataset_rr/train/labels')
normalize_annotations_in_directory('/home/jupyter/work/resources/dataset_rr/valid/labels')
normalize_annotations_in_directory('/home/jupyter/work/resources/dataset_rr/test/labels')

# Загрузка модели с архитектурой YOLOv8
model = YOLO('yolov8.yaml')

# Обучение модели
model.train(data='data.yaml', epochs=100, imgsz=640,batch=32, workers=8)


In [None]:
import os
from ultralytics import YOLO

# Путь к сохраненной модели после первых 20 эпох
model_path = 'runs/detect/train2/weights/best.pt'  # Убедитесь, что путь правильный

# Загрузка модели с сохраненного состояния
model = YOLO(model_path)

# Обучение модели
model.train(data='data.yaml', epochs=40, imgsz=640, batch=32, workers=8)  

In [None]:
import os
from ultralytics import YOLO

def normalize_annotation(annotation_path):
    with open(annotation_path, 'r') as file:
        lines = file.readlines()

    normalized_lines = []
    for line in lines:
        parts = line.strip().split()
        class_id = parts[0]
        coordinates = list(map(float, parts[1:]))

        # Проверка и нормализация координат
        for i in range(len(coordinates)):
            if coordinates[i] < 0:
                coordinates[i] = 0
            elif coordinates[i] > 1:
                coordinates[i] = 1

        normalized_line = f"{class_id} " + " ".join(map(str, coordinates))
        normalized_lines.append(normalized_line)

    with open(annotation_path, 'w') as file:
        file.write("\n".join(normalized_lines))

def normalize_annotations_in_directory(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.txt'):
                annotation_path = os.path.join(root, file)
                normalize_annotation(annotation_path)

# Нормализация аннотаций
normalize_annotations_in_directory('/home/jupyter/work/resources/dataset_rr/train/labels')
normalize_annotations_in_directory('/home/jupyter/work/resources/dataset_rr/valid/labels')
normalize_annotations_in_directory('/home/jupyter/work/resources/dataset_rr/test/labels')

# Путь к предобученной модели YOLOv8
pretrained_model_path = 'yolov8n.pt'  # Замените на путь к вашей предобученной модели

# Загрузка предобученной модели
model = YOLO(pretrained_model_path)

# Обучение модели
model.train(
    data='data.yaml', 
    epochs=100,         # Общее количество эпох
    imgsz=640,         # Размер изображений
    batch=64,          # Размер батча, увеличьте до максимума, который ваша GPU может обрабатывать без ошибки памяти
    workers=8,         # Количество рабочих потоков для загрузки данных
)


In [None]:
%pip install datasets==2.3.2 fsspec==2021.7.0 s3fs==2021.7.0


In [None]:
import os
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
import json
from sklearn.model_selection import train_test_split

# Функция для чтения данных из текстовых файлов
def read_data(data_dir):
    data = []
    label_to_id = {}
    label_id = 0
    for label in os.listdir(data_dir):
        label_dir = os.path.join(data_dir, label)
        if os.path.isdir(label_dir):
            if label not in label_to_id:
                label_to_id[label] = label_id
                label_id += 1
            for file in os.listdir(label_dir):
                if file.endswith('.txt'):
                    file_path = os.path.join(label_dir, file)
                    with open(file_path, 'r') as f:
                        lines = f.readlines()
                        count_str = lines[0].strip()
                        combined_lines = ''.join(lines[1:]).strip()
                        data.append((combined_lines, label_to_id[label]))
    return pd.DataFrame(data, columns=['text', 'label']), label_to_id

# Чтение данных
train_data, train_label_to_id = read_data('/home/jupyter/work/resources/dataset/train/')
valid_data, valid_label_to_id = read_data('/home/jupyter/work/resources/dataset/validate')

# Сохранение словаря меток
os.makedirs('/home/jupyter/work/resources/results', exist_ok=True)
with open('/home/jupyter/work/resources/results/label_to_id.json', 'w') as f:
    json.dump(train_label_to_id, f)

# Разделение данных на тренировочные и валидационные
train_texts, valid_texts, train_labels, valid_labels = train_test_split(
    train_data['text'], train_data['label'], test_size=0.1
)

# Токенизация данных
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, return_tensors="pt")
valid_encodings = tokenizer(list(valid_texts), truncation=True, padding=True, return_tensors="pt")

# Создание PyTorch датасетов
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = CustomDataset(train_encodings, list(train_labels))
valid_dataset = CustomDataset(valid_encodings, list(valid_labels))

# Загрузка модели BERT
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(train_label_to_id))

# Настройка параметров обучения
# Настройка параметров обучения
training_args = TrainingArguments(
    output_dir='/home/jupyter/work/resources/results',
    evaluation_strategy="epoch",  # Обновите evaluation_strategy на eval_strategy
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",  # Сохранение модели на каждой эпохе
)

# Создание тренера
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
)

# Обучение модели
trainer.train()

# Сохранение модели
model.save_pretrained('/home/jupyter/work/resources/results')
tokenizer.save_pretrained('/home/jupyter/work/resources/results')


In [None]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import json

# Абсолютные пути
RESULTS_DIR = '/home/jupyter/work/resources/results'

# Загрузка токенизатора и модели
tokenizer = BertTokenizer.from_pretrained(f'{RESULTS_DIR}')
model = BertForSequenceClassification.from_pretrained(f'{RESULTS_DIR}')

# Загрузка словаря меток
with open(f'{RESULTS_DIR}/label_to_id.json', 'r') as f:
    label_to_id = json.load(f)

# Обратный словарь для отображения меток
id_to_label = {v: k for k, v in label_to_id.items()}

# Функция для предсказания на входном тексте
def predict(text):
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    return predictions


# Входные данные
input_text = """
2
Model 3|rail|83.95|730.3089599609375|0.0|255.2872314453125|680.51123046875
Model 3|rail|27.05|517.669921875|11.56787109375|144.6182861328125|676.1633911132812

"""

# Обработка текста для модели
lines = input_text.strip().split('\n')
count = lines[0]
text_lines = '\n'.join(lines[1:])

# Предсказание
predictions = predict(text_lines)
predicted_label_id = torch.argmax(predictions, dim=1).item()

# Вывод предсказаний
predicted_label = id_to_label[predicted_label_id]
print(f"Predicted label: {predicted_label}")

# Вывод вероятностей для всех классов
for i, prob in enumerate(predictions[0]):
    label = id_to_label[i]
    print(f"{label}: {prob.item() * 100:.2f}%")


In [None]:
%pip install odfpy

In [31]:

# Укажите путь к вашему скрипту
script_path = "/home/jupyter/work/resources/console_yandex/console.py"

# Задайте значение переменной __file__
globals()['__file__'] = script_path

# Выполните скрипт
exec(open(script_path).read())


/home/jupyter/work/resources/console_yandex

0: 384x640 1 machinery, 9.7ms
Speed: 2.7ms preprocess, 9.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)
0.0 ---------------------------

0: 384x640 2 trains, 10.3ms
Speed: 1.8ms preprocess, 10.3ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)
0.0 ---------------------------
0.0 ---------------------------

0: 384x640 3 rails, 9.6ms
Speed: 1.8ms preprocess, 9.6ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)
0.0 ---------------------------
0.0 ---------------------------
0.0 ---------------------------
Predicted label: blizko1

0: 384x640 (no detections), 11.1ms
Speed: 2.6ms preprocess, 11.1ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 trains, 8.8ms
Speed: 1.9ms preprocess, 8.8ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)
0.23356666666666667 ---------------------------
0.23356666666666667 ---------------------------
0.23356666