# 试题说明

## 任务说明
基于Caltech数据集的图像分类，Caltech101包含102个类，每种类别大约40到800个图像，训练集总计7999图像。本次试题需要图片为输入，通过课程学习的分类方法（支持向量机、深度神经网络、卷积神经网络等）从中识别该图像属于哪一个类别。


## 数据说明
images下存储所有的训练+测试图像，trian.txt中存储用于训练图像路径和对应标注，图片路径+\t+标签，test.txt中存储测试图像。




## 提交答案
考试提交，需要提交模型代码项目版本和结果文件。结果文件为TXT文件格式，命名为result.txt，文件内的字段需要按照指定格式写入。
结果文件要求：
1,每一行为： 图像名\t标签  101_0073.jpg\t13
2.输出结果应检查是否为1145行数据，否则成绩无效。
3.输出结果文件命名为result.txt，一行一个数据，样例如下：




In [1]:
import zipfile
import os
def unzip_data(src_path,target_path):
    # 解压原始数据集，将src_path路径下的zip包解压至target_path目录下
    if(not os.path.isdir(target_path)):
        z = zipfile.ZipFile(src_path, 'r')
        z.extractall(path=target_path)
        z.close()
unzip_data('data/data146107/dataset.zip','data/data')

In [2]:
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import paddle
import paddle.nn as nn
import paddle.vision.transforms as T

In [3]:
import os 
import sys
import pickle
from paddle.vision.transforms import ToTensor
import paddle.nn.functional as F

In [12]:
with open("./data/train.txt") as f:
    lines=f.readlines()
    print(len(lines))
    t=open("./data/t.txt","w")
    e=open("./data/e.txt","w")
    for i in range(len(lines)//10):
        e.write(lines[i])
    for i in range(len(lines)//10,len(lines)):
        t.write(lines[i])
    t.close()
    e.close()

7999


In [4]:
img_size=224
class Dataset(paddle.io.Dataset):
    def __init__(self, mode='t'):
        self.data = []
        with open(f'./data/{mode}.txt') as f:
            lines = f.readlines()
            np.random.shuffle(lines)
            for line in lines:
                info = line.strip().split('\t')
                # print(info)
                if len(info) > 1:
                    self.data.append([info[0].strip(), info[1].strip()])            
    def __getitem__(self, index):
        image_file, label = self.data[index]
        img = Image.open("./data/images/"+image_file) 
        img = img.resize((img_size, img_size), Image.ANTIALIAS)
        img = np.array(img).astype('float32')
        # img = img[:,:,:]
        # print(img.shape)
        if len(img.shape)==2:
            img = np.stack((img,)*3, axis=-1)
        img = img.transpose((2, 0, 1))[:3,:,:]     #读出来的图像是rgb,rgb,rbg..., 转置为 rrr...,ggg...,bbb...
        # print(img.shape)
        img = img[:,:,:]/255.0
        # if img.size!=img_size*img_size*3:
        #     print('error-----------------------',img.size,img.shape)
        return img, np.array(label, dtype='int64')
    def __len__(self):
        return len(self.data)

In [4]:
train_dataset = Dataset(mode='t')
eval_dataset = Dataset(mode='e')
print('train大小：', train_dataset.__len__())
print('eval大小：', eval_dataset.__len__())

train大小： 7200
eval大小： 799


In [5]:
for data, label in train_dataset:
    print(np.array(data).shape,label)
    break

(3, 224, 224) 60


In [6]:
model = paddle.vision.models.resnet152(pretrained=True,num_classes=102)
paddle.summary(model,(1,3,224,224))

W0901 16:32:53.208151  4054 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 10.1
W0901 16:32:53.212443  4054 device_context.cc:465] device: 0, cuDNN Version: 7.6.


-------------------------------------------------------------------------------
   Layer (type)         Input Shape          Output Shape         Param #    
     Conv2D-1        [[1, 3, 224, 224]]   [1, 64, 112, 112]        9,408     
   BatchNorm2D-1    [[1, 64, 112, 112]]   [1, 64, 112, 112]         256      
      ReLU-1        [[1, 64, 112, 112]]   [1, 64, 112, 112]          0       
    MaxPool2D-1     [[1, 64, 112, 112]]    [1, 64, 56, 56]           0       
     Conv2D-3        [[1, 64, 56, 56]]     [1, 64, 56, 56]         4,096     
   BatchNorm2D-3     [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      
      ReLU-2         [[1, 256, 56, 56]]    [1, 256, 56, 56]          0       
     Conv2D-4        [[1, 64, 56, 56]]     [1, 64, 56, 56]        36,864     
   BatchNorm2D-4     [[1, 64, 56, 56]]     [1, 64, 56, 56]          256      
     Conv2D-5        [[1, 64, 56, 56]]     [1, 256, 56, 56]       16,384     
   BatchNorm2D-5     [[1, 256, 56, 56]]    [1, 256, 56, 56]   

{'total_params': 58504230, 'trainable_params': 58201382}

In [7]:
para_state_dict = paddle.load("resnet152")
model.set_state_dict(para_state_dict)

In [8]:
batch_size=32
lr=0.001
opt = paddle.optimizer.Adam(learning_rate=lr,
                            parameters=model.parameters())
train_loader = paddle.io.DataLoader(train_dataset,
                                    shuffle=True,
                                    batch_size=batch_size)
valid_loader = paddle.io.DataLoader(eval_dataset, batch_size=batch_size)

batch_num=len(train_loader())

In [10]:
epoch_num=10
model.train()
for epoch in range(epoch_num):
    sum_loss=0
    sum_acc=0
    for batch_id, data in enumerate(train_loader()):
        x_data = data[0]
        y_data = paddle.to_tensor(data[1])
        y_data = paddle.unsqueeze(y_data, 1)

        logits = model(x_data)
        loss = F.cross_entropy(logits, y_data)
        sum_loss+=loss
        # print("train_pass:{},batch_id:{},train_loss:{}".format(epoch,batch_id,loss.numpy()[0]))
        acc = paddle.metric.accuracy(logits,y_data)#计算精度
        sum_acc+=acc
        # if batch_id%1==0:
        #     Batch = Batch +1
        #     Batchs.append(Batch)
        #     all_train_loss.append(loss.numpy()[0])
        #     all_train_accs.append(acc.numpy()[0])
        #     print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}".format(epoch,batch_id,loss.numpy(),acc.numpy()))
        loss.backward()
        opt.step()
        opt.clear_grad() #opt.clear_grad()来重置梯度
    print("train_pass:{},train_loss:{},train_acc:{}".format(epoch,sum_loss.numpy()[0]/batch_num,sum_acc.numpy()[0]/batch_num))

  "When training, we now always track global mean and variance.")


train_pass:0,train_loss:0.5921084933810764,train_acc:0.8281944444444445
train_pass:1,train_loss:0.4142721896701389,train_acc:0.87875
train_pass:2,train_loss:0.37762613932291667,train_acc:0.8881944444444444
train_pass:3,train_loss:0.2226786634657118,train_acc:0.9366666666666666
train_pass:4,train_loss:0.12912510342068143,train_acc:0.9645833333333333
train_pass:5,train_loss:0.15866658528645833,train_acc:0.9531944444444445
train_pass:6,train_loss:0.14302341037326388,train_acc:0.9558333333333333
train_pass:7,train_loss:0.10099985758463542,train_acc:0.9709722222222222
train_pass:8,train_loss:0.12325160556369358,train_acc:0.96375
train_pass:9,train_loss:0.10758376227484809,train_acc:0.9670833333333333


In [11]:
paddle.save(model.state_dict(),'resnet152')

In [12]:
model.eval()
accs = []
for batch_id,data in enumerate(valid_loader()):#测试集
    x_data = data[0]
    y_data = paddle.to_tensor(data[1])
    y_data = paddle.unsqueeze(y_data, 1)
    logits = model(x_data)
    acc = paddle.metric.accuracy(logits, y_data)
    accs.append(acc.numpy())
avg_acc = np.mean(accs)
print("test_acc{}".format(avg_acc))

test_acc0.7508870959281921


In [None]:
with open(f'./data/test.txt') as f:
    res_file=open("result.txt","w")
    lines = f.readlines()
    for line in lines:
        image_file = line.strip()
        # image_file="92.jpg"
        img = Image.open("./data/images/"+image_file) 
        img = img.resize((img_size, img_size), Image.ANTIALIAS)
        img = np.array(img).astype('float32')
        # img = img[:,:,:]
        # print(img.shape)
        if len(img.shape)==2:
            img = np.stack((img,)*3, axis=-1)
        img = img.transpose((2, 0, 1))[:3,:,:]     #读出来的图像是rgb,rgb,rbg..., 转置为 rrr...,ggg...,bbb...
        # print(img.shape)
        img = img[:,:,:]/255.0
        out=model(paddle.to_tensor(img.reshape([1,3,224,224])))
        print(paddle.argmax(out).numpy()[0])
        res_file.write("{}\t{}\n".format(image_file,paddle.argmax(out).numpy()[0]))
    res_file.close()