In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [1]:
import torch
import torchvision.transforms as transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights
from timm.models import create_model


from torch.utils.data import DataLoader, random_split, Dataset
from torchvision.datasets import ImageFolder
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import os
from torchvision.io import read_image
from torchvision.transforms import Compose, Resize, ToTensor
from PIL import Image

In [2]:

train_dir = "/kaggle/input//trees-dataset/vkr"
valid_dir = "/kaggle/input//trees-dataset/vkr"
test = "test"
train_data=  pd.read_csv('/kaggle/input/trees-dataset/vkr/new_train.csv')
valid_data = pd.read_csv('/kaggle/input//trees-dataset/vkr/new_valid.csv')


train_data.class_num = train_data.class_num.astype(str)
valid_data.class_num = valid_data.class_num.astype(str)

In [3]:
def check_files_existence(df, directory, column):
    invalid_files = []
    for filepath in df[column]:
        if not os.path.exists(os.path.join(directory, filepath)):
            invalid_files.append(filepath)
    return invalid_files

In [6]:
invalid_train_files = check_files_existence(train_data, train_dir, 'id')
invalid_valid_files = check_files_existence(valid_data, valid_dir, 'id')

In [5]:
def replace_extension_train(row):
    if row['id'] in invalid_train_files:
        return row['id'].replace('.jpg', '.JPG')
    else:
        return row['id']

def replace_extension_valid(row):
    if row['id'] in invalid_valid_files:
        return row['id'].replace('.jpg', '.JPG')
    else:
        return row['id']

# Применение функции к DataFrame
train_data['id'] = train_data.apply(replace_extension_train, axis=1)
valid_data['id'] = valid_data.apply(replace_extension_valid, axis=1)

In [7]:
print("Недействительные файлы в обучающем наборе:", invalid_train_files)
print("Недействительные файлы в валидационном наборе:", invalid_valid_files)

Недействительные файлы в обучающем наборе: []
Недействительные файлы в валидационном наборе: []


In [8]:
train_data_df = train_data.copy()
valid_data_df = valid_data.copy()

In [9]:
train_img_dir = '/kaggle/input/trees-dataset/vkr/'
val_img_dir = '/kaggle/input/trees-dataset/vkr/'


def read_image_pil(img_path):
    with Image.open(img_path).convert('RGB') as img:
        return ToTensor()(img)

class TreesDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None):
        self.img_labels = annotations_file
        self.transform = transform
        self.img_dir = img_dir

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = self.img_labels.iloc[idx, 0]
        image = read_image_pil(self.img_dir + img_path)  
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        label = int(label)
        return image, torch.tensor(label)

# transform = transforms.Compose([
#     transforms.RandomResizedCrop(224),
#     transforms.RandomHorizontalFlip(),
#     # transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

# transform = transforms.Compose([
#     transforms.RandomResizedCrop(224),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(15),
# ])
# transform = transforms.Compose([
#     transforms.RandomResizedCrop(224),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(15),
#     transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1),
#     transforms.RandomGrayscale(p=0.2),
#     transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])


transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.RandomGrayscale(p=0.2),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# transform = transforms.Compose([
#     transforms.RandomResizedCrop(224),
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomRotation(15),
#     transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1),
#     # transforms.ToTensor(),
#     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# ])

train_data = TreesDataset(train_data_df, train_img_dir, transform=transform)
valid_data = TreesDataset(valid_data_df, val_img_dir, transform=transform)

train_loader = DataLoader(train_data, batch_size=32)
valid_loader = DataLoader(valid_data, batch_size=32)

model = create_model(
    'deit_base_patch16_224',
    pretrained=True,
    num_classes=39
)

# model = vit_b_16(pretrained=True)
# for param in model.parameters():
#     param.requires_grad = False  

for param in model.parameters():
    param.requires_grad = False 


# for param in model.head.parameters():
#     param.requires_grad = True

for param in model.blocks[-1].parameters():  
    param.requires_grad = True


# optimizer = torch.optim.AdamW([
#     {'params': model.head.parameters(), 'lr': 1e-3},
#     {'params': model.blocks[-1].parameters(), 'lr': 1e-4}
# ], lr=1e-4, weight_decay=0.01)
# for param in model.parameters():
#     param.requires_grad = True

# for layer in model.blocks[:-3]:  
#     for param in layer.parameters():
#         param.requires_grad = False


# original_head_layer = model.heads.head
# model.heads = nn.Sequential(nn.Linear(in_features=original_head_layer.in_features,
#                                       out_features=39,
#                                       bias=True))

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW([
    {'params': model.head.parameters(), 'lr': 0.001},
    {'params': model.blocks.parameters(), 'lr': 0.0001}
], lr=0.00001, weight_decay=0.01)


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

In [12]:
def train(model, criterion, optimizer, train_loader, valid_loader, epochs=25):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        print("gpu")
    else:
        print("cpu")
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        total = 0
        correct = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}")

        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                valid_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Loss: {valid_loss/len(valid_loader)}")
        print(f'Accuracy of the network on the validation images: {100 * correct / total}%')

train(model, criterion, optimizer, train_loader, valid_loader)

model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

gpu




Epoch 1, Loss: 3.7360418664998023




Validation Loss: 3.359498014816871
Accuracy of the network on the validation images: 16.160388821385176%
Epoch 2, Loss: 3.3249988350374946
Validation Loss: 2.987804862169119
Accuracy of the network on the validation images: 21.992709599027947%
Epoch 3, Loss: 2.972170729061653
Validation Loss: 2.7137078092648435
Accuracy of the network on the validation images: 27.095990279465372%
Epoch 4, Loss: 2.6996807908189706
Validation Loss: 2.54224621791106
Accuracy of the network on the validation images: 31.591737545565007%
Epoch 5, Loss: 2.474110644439171
Validation Loss: 2.333836243702815
Accuracy of the network on the validation images: 35.47995139732685%
Epoch 6, Loss: 2.2900569500594306
Validation Loss: 2.2216145396232605
Accuracy of the network on the validation images: 39.611178614823814%
Epoch 7, Loss: 2.14720317413067
Validation Loss: 2.0738272070884705
Accuracy of the network on the validation images: 43.377885783718106%
Epoch 8, Loss: 1.966378643594939
Validation Loss: 2.050225134079

In [14]:
torch.save(model.state_dict(), 'model_weights.pth')

In [27]:
def process_image(image_path):
    image = Image.open(image_path)

    transformation = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
    ])

    processed_image = transformation(image).unsqueeze(0).to(device)

    return processed_image

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Используется устройство:", device)

model.to(device)

Используется устройство: cuda


VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

model

In [15]:
model = torch.load('/kaggle/input/models-trees/full_model.pth')
print("Модель успешно загружена.")
model.eval()

Модель успешно загружена.


VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [40]:
import torch.nn.functional as F
def predict(image_path):
    image = process_image(image_path) 
    output = model(image)
    probabilities = F.softmax(output, dim=1) 
    probabilities = probabilities.squeeze().tolist()

    class_probabilities = {i: prob for i, prob in enumerate(probabilities)}

    return class_probabilities


In [41]:
predict('/kaggle/input/klentree/.jpg')

{0: 0.19968588650226593,
 1: 0.0007737760315649211,
 2: 0.07776502519845963,
 3: 0.0009698724024929106,
 4: 0.002414742251858115,
 5: 0.002116408199071884,
 6: 0.0028157804626971483,
 7: 0.0010858316672965884,
 8: 0.0015513594262301922,
 9: 0.0016245472943410277,
 10: 0.000620123406406492,
 11: 0.000667078304104507,
 12: 0.003822344122454524,
 13: 0.001060699112713337,
 14: 0.627441942691803,
 15: 0.000508010620251298,
 16: 0.0005058939568698406,
 17: 0.0014427549904212356,
 18: 0.004695465322583914,
 19: 0.0010829318780452013,
 20: 0.020313667133450508,
 21: 0.0022266958840191364,
 22: 0.002123433630913496,
 23: 0.003813183633610606,
 24: 0.0018579134484753013,
 25: 0.0016312720254063606,
 26: 0.0009638582123443484,
 27: 0.0033224120270460844,
 28: 0.001822005258873105,
 29: 0.0033516166731715202,
 30: 0.010313984006643295,
 31: 0.0015764555428177118,
 32: 0.008202550932765007,
 33: 0.0003408501506783068,
 34: 0.0006209814455360174,
 35: 0.0008583302260376513,
 36: 0.00129049655515700

In [23]:
import torch.nn.functional as F
import torch
import torchvision.transforms as transforms


class Predictor:
    def __init__(self, model_path):
        self.model_path = model_path
        
        self.model = torch.load(self.model_path)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)
        self.model.eval()
        
    def process_image(self, image_path):
        image = Image.open(image_path)

        transformation = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
        ])

        processed_image = transformation(image).unsqueeze(0).to(device)

        return processed_image
    
    
    def predict(self, image_path):
        image = process_image(self, image_path) 
        output = self.model(image)
        probabilities = F.softmax(output, dim=1) 
        probabilities = probabilities.squeeze().tolist()

        class_probabilities = {i: prob for i, prob in enumerate(probabilities)}

        return class_probabilities

In [24]:
predictor = Predictor('/kaggle/input/models-trees/full_model.pth')

In [25]:
predictor.predict('/kaggle/input/klentree/.jpg')

NameError: name 'process_image' is not defined