In [6]:
import os
from PIL import Image

# Укажите нужную директорию
directory = r"D:\URFU\VKR\Ind_pract\dissert\data\classification\class1"

for filename in os.listdir(directory):
    if filename.lower().endswith(".png"):
        filepath = os.path.join(directory, filename)
        try:
            with Image.open(filepath) as img:
                img.verify()  # Проверяем, что изображение можно открыть
        except Exception as e:
            print(f"Удаляю повреждённый файл: {filename} ({e})")
            os.remove(filepath)

In [None]:
# Импорт библиотек
import numpy as np
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from PIL import Image
from imblearn.over_sampling import SMOTE
import os

# Подготовка данных
def load_images_and_labels (root_dir):
    images = []
    labels = []
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    for class_dir in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_dir)
        if os.path.isdir(class_path):
            label = 0 if class_dir == "class0" else 1
            for img_file in os.listdir(class_path):
                img_path = os.path.join(class_path, img_file)
                img = Image.open(img_path).convert('RGB')
                img = transform(img)
                images.append(img)
                labels.append(label)
    return torch.stack(images), np.array(labels)

# Извлечение признаков с помощью ResNet50
def extract_features(images):
    model = models.resnet50(pretrained=True)
    model = torch.nn.Sequential(*list(model.children())[:-1])
    model.eval()
    with torch.no_grad():
        features = model(images)
    return features.squeeze().numpy()


In [8]:

# Загрузка данных
root_dir = r"D:\URFU\VKR\Ind_pract\dissert\data\classification" 
images, labels = load_images_and_labels(root_dir)
features = extract_features(images)


KeyboardInterrupt: 

In [None]:

# Разделение данных
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, stratify=labels, random_state=42
)
sm = SMOTE()
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)


In [None]:

# Обучение XGBoost
model = XGBClassifier(
    objective='binary:logistic',
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_alpha=0.5,
    eval_metric='logloss',
    use_label_encoder=False
)


In [None]:

model.fit(X_resampled, y_resampled)


In [None]:

# Оценка модели
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")
print(f"ROC-AUC: {roc_auc_score(y_test, y_proba):.4f}")
