In [5]:
# 学習データを GitHubから入手する
!git clone https://github.com/robotduinom/lemon_dataset
!rm -rf lemon_dataset/.git

Cloning into 'lemon_dataset'...
remote: Enumerating objects: 53, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (21/21), done.[K
remote: Total 53 (delta 4), reused 18 (delta 3), pack-reused 29[K
Unpacking objects: 100% (53/53), done.
Checking connectivity... done.


In [6]:
# rar ファイルを解凍するのに unrar が必要なので、解凍用の docker image を作成する
!docker build -t unrar .

Sending build context to Docker daemon  81.97MB
Step 1/3 : FROM ubuntu:latest
 ---> 6b7dfa7e8fdb
Step 2/3 : WORKDIR /tmp/work
 ---> Using cache
 ---> ede8452840ba
Step 3/3 : RUN apt-get update && apt-get -y install unrar && rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 4d9e457d289d
Successfully built 4d9e457d289d
Successfully tagged unrar:latest


In [7]:
# rar ファイルの解凍 - data フォルダに解凍される
!docker run --rm -t -v ${PWD}:/tmp/work unrar:latest unrar x ./lemon_dataset/docs/data.rar > unrar.out
!head unrar.out && echo "......." && tail unrar.out && rm unrar.out


UNRAR 6.11 beta 1 freeware      Copyright (c) 1993-2022 Alexander Roshal


Extracting from ./lemon_dataset/docs/data.rar

Creating    data                                                      OK
Creating    data/bad_quality                                          OK
Extracting  data/bad_quality/bad_quality_0.jpg                           0  OK 
Extracting  data/bad_quality/bad_quality_1.jpg                           0  OK 
.......
Extracting  data/good_quality/good_quality_991.jpg                      99  OK 
Extracting  data/good_quality/good_quality_992.jpg                      99  OK 
Extracting  data/good_quality/good_quality_993.jpg                      99  OK 
Extracting  data/good_quality/good_quality_994.jpg                      99  OK 
Extracting  data/good_quality/good_quality_995.jpg                      99  OK 
Extracting  data/good_quality/good_quality_996.jpg                      99  OK 
Extracting  data/good_quality/good_quality_997.jpg                      99  OK 
Ext

In [8]:
# オブジェクト・ストレージにアップロードする
import glob, base64
import params, oci_client
from oci.object_storage import UploadManager, ObjectStorageClient
from oci.object_storage.models import CreateBucketDetails

os_client = oci_client.get(ObjectStorageClient) # type: ObjectStorageClient

# バケットが無かったら作成する
try:
    backet = os_client.get_bucket(params.os_namespace, params.os_bucket).data
except:
    print(f'Creating bucket: {params.os_bucket}')
    create_bucket_details = CreateBucketDetails(
        name = params.os_bucket,
        compartment_id = params.compartment_id,
        public_access_type = CreateBucketDetails.PUBLIC_ACCESS_TYPE_NO_PUBLIC_ACCESS,
        storage_tier = CreateBucketDetails.STORAGE_TIER_STANDARD
    )
    os_client.create_bucket(params.os_namespace, create_bucket_details)

# ファイルをオブジェクト・ストレージにアップロード
upload_manager = UploadManager(os_client)
files = glob.glob("data/**/*.jpg")
print(f'Uploading {len(files)} files to the bucket "{params.os_bucket}", it will take some time...')
count = 0
for file in files:
    response = upload_manager.upload_file(params.os_namespace, params.os_bucket, file, file, content_type="image/jpeg")
    count += 1
    if count % 25 == 0:
        print('.', end='')
print('\ndone.')

Creating bucket: lemon_dataset
Uploading 2528 files to the bucket "lemon_dataset", it will take some time...
.....................................................................................................
done.
