# データセットの取得
German Traffic Sign Recognition Benchmark (GTSRB)

In [None]:
!wget http://benchmark.ini.rub.de/Dataset/GTSRB_Final_Training_Images.zip

In [None]:
!unzip GTSRB_Final_Training_Images.zip

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import csv
from PIL import Image

rootpath = 'GTSRB/Final_Training/Images'

images = [] # images
labels = [] # corresponding labels
# loop over all 43 classes
for c in range(0,43):
    prefix = rootpath + '/' + format(c, '05d') + '/' # subdirectory for class
    gtFile = open(prefix + 'GT-'+ format(c, '05d') + '.csv') # annotations file
    gtReader = csv.reader(gtFile, delimiter=';') # csv parser for annotations file
    next(gtReader)
    # loop over all images in current annotations file
    for row in gtReader:
        image = Image.open(prefix + row[0])
        crop_image = image.crop((int(row[3]),int(row[4]),int(row[5]),int(row[6])))
        resize_image = crop_image.resize((32,32))
        images.append(np.asarray(resize_image))
        labels.append(int(row[7]))
    gtFile.close()

In [None]:
print(np.array(images).shape)
print(np.array(labels).shape)

# データの分割
train, test に分割

In [None]:
from sklearn.model_selection import train_test_split 

train_data, test_data, train_label, test_label = train_test_split(np.array(images), np.array(labels), test_size=0.1)

In [None]:
print('train size:', train_data.shape)
print('test size:', test_data.shape)
print('train label size:', train_label.shape)
print('test label size:', test_label.shape)

# データの確認

In [None]:
!wget https://raw.githubusercontent.com/schiyoda/Self-Driving-Car/master/signnames.csv

In [None]:
import csv

f = open('signnames.csv', 'r')

reader = csv.reader(f)
header = next(reader)
signnames = []
for row in reader:
    signnames.append(row[1])

f.close()

In [None]:
fig, axs = plt.subplots(9,5, figsize=(15, 30))
fig.subplots_adjust(hspace = .1, wspace=.1)
axs = axs.ravel()
for i in range(45):
    if(i < 43):
        idx = np.where(train_label == i )[0][0]
        axs[i].imshow(train_data[idx])
        axs[i].set_title(signnames[i])
    axs[i].axis('off')

# データを pickle に変換してICOSに保管

In [None]:
import pickle

train_tpl = (train_data, train_label)
with open('training_data.pkl', 'wb') as f:
    pickle.dump(train_tpl, f)
    
test_tpl = (test_data, test_label)
with open('test_data.pkl', 'wb') as f:
    pickle.dump(test_tpl, f)

保管先のICOSのcredentialを指定

In [None]:
from ibm_botocore.client import Config
import ibm_boto3

cos_credentials={
  "apikey": "*************",
  "endpoints": "*************",
  "iam_apikey_description": "*************",
  "iam_apikey_name": "*************",
  "iam_role_crn": "*************",
  "iam_serviceid_crn": "*************",
  "resource_instance_id": "*************"
}

auth_endpoint = 'https://iam.bluemix.net/oidc/token'
service_endpoint = 'https://s3-api.us-geo.objectstorage.softlayer.net'

cos = ibm_boto3.client('s3',
                         ibm_api_key_id=cos_credentials['apikey'],
                        ibm_service_instance_id=cos_credentials['resource_instance_id'],
                         ibm_auth_endpoint=auth_endpoint,
                         config=Config(signature_version='oauth'),
                         endpoint_url=service_endpoint)

保管先のICOSのbacketを指定

In [None]:
backet = 'xxx'

cos.upload_file(Filename='training_data.pkl',Bucket=backet,Key='training_data.pkl')
cos.upload_file(Filename='test_data.pkl',Bucket=backet,Key='test_data.pkl')