In [7]:
import torch
import torch.nn as nn
from torchvision.models.resnet import *
import numpy as np
import pandas as pd
import os
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

In [16]:
# 可以修改的参数是epoch, epoch=x代表选取第x次迭代产生的模型
root = ''
train_file = os.path.join(root, 'train.csv')
test_file = os.path.join(root, 'test.csv')
net_path, epoch = 'models', 45
net_path = os.path.join(net_path, 'model_epoch_' + str(epoch) + '.pth')
save_file = './submission.csv'

In [9]:
def pre_process(df, dictionary=None):
    """
    如果没有传入字典，将自动生成：
        对传入的df的label列用整数替换种类字符
    如果传入字典，将使用已有的字典：
        对传入的df的label列用种类字符代替整数
    """
    if dictionary is not None:
        # 用在已经生成了测试结果，把预测结果转换为种类名称时。
        for id, kinds in dictionary.items():
            if isinstance(kinds, int):
                continue
            df.loc[df['label'] == id, 'label'] = kinds
        return df
    else:
        # 用在将训练集dataframe的label列转换成整数，然后生成字典。
        cnt = 0
        dictionary = {}
        for i in df['label'].unique():
            dictionary[i] = cnt
            dictionary[cnt] = i
            cnt += 1
        for i in df['label'].unique():
            df.loc[df['label'] == i, 'label'] = dictionary[i]
        return df, dictionary

class LeavesDataset(Dataset):

    def __init__(self, file_path, df=None, mode='train'):
        self.file_path = file_path
        self.mode = mode
        self.transform = [transforms.ToTensor()]

        if mode == 'train':
            self.img_label = np.zeros(18353, dtype=np.int64)
            self.img_label[:] = df.iloc[:, 1].values
            self.transform.insert(0, transforms.RandomHorizontalFlip(p=0.5))

        self.transform = transforms.Compose(self.transform)

    def __len__(self):
        if self.mode == 'train':
            return 18353
        else:
            return 8800

    def __getitem__(self, index):

        # 读取图像
        if self.mode == 'test':
            index += 18353
        img_name = str(index) + '.jpg'
        img_path = os.path.join(self.file_path, img_name)
        img = Image.open(img_path)

        # 图像转换(增广)
        img = self.transform(img)

        if self.mode == 'test':
            return img
        else:
            return img, self.img_label[index]

# 是否要冻住模型的前面一些层
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        model = model
        for param in model.parameters():
            param.requires_grad = False

# resnet50模型
def res_model(num_classes, feature_extract = False, pretrained=True):

    net_ft = resnet50(pretrained=pretrained)
    set_parameter_requires_grad(net_ft, feature_extract)
    num_ftrs = net_ft.fc.in_features
    net_ft.fc = nn.Linear(num_ftrs, num_classes)

    return net_ft

def test(test_iter, device, dictionary):
    predict = []
    
    for X in tqdm(test_iter):
        output = net(X.to(device))
        predict.extend(torch.argmax(output, dim=-1).cpu().numpy().tolist())

    pred = []
    for id in predict:
        pred.append(dictionary[id])

    return pred

In [17]:
if __name__ == '__main__':

    # 设备
    device = 'cuda:0'
    
    # 网络
    net = res_model(176)
    net.to(device)
    net.load_state_dict(torch.load(net_path))
    
    # test
    train_label = pd.read_csv(train_file)
    train_label, dictionary = pre_process(train_label)
    test_df = pd.read_csv(test_file)
        
    test_set = LeavesDataset('images', mode='test')
    test_iter = DataLoader(test_set, shuffle=False, batch_size=8, num_workers=5)
    predict = test(test_iter, device, dictionary)
    
    test_df['label'] = pd.Series(predict)
    submission = pd.concat([test_df['image'], test_df['label']], axis=1)
    submission.to_csv(save_file, index=False)
    print("Done!!!!!!!!!!!!!!!!!!!!!!!!!!!")

100%|██████████| 1100/1100 [00:30<00:00, 36.14it/s]

Done!!!!!!!!!!!!!!!!!!!!!!!!!!!



