#1.モデルの準備

In [1]:
!nvidia-smi

NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.



In [2]:
import itertools
import os

import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import random
import os

from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.metrics import roc_auc_score

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow._api.v2 import image

import time

def set_seed(seed=1):
    tf.random.set_seed(seed)
    # optional
    # for numpy.random
    np.random.seed(seed)
    # for built-in random
    random.seed(seed)
    # for hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
set_seed(1)

print('TF version:', tf.__version__)
print('Hub version:', hub.__version__)
print('Phsical devices:', tf.config.list_physical_devices())

TF version: 2.7.0
Hub version: 0.12.0
Phsical devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


#2.データセットの準備

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
sab_path = "/content/drive/MyDrive/techno_pro/submit/sample_submit.tsv"
sab_df = pd.read_csv(sab_path, delimiter='\t', header=None)
sab_df.head()

Unnamed: 0,0,1
0,test_0000,0
1,test_0001,0
2,test_0002,0
3,test_0003,0
4,test_0004,0


In [9]:
# 224 384
image_size = 224
batch_size = 1

In [10]:
data_dir = "/content/drive/MyDrive/techno_pro/data/test/"
    
datagen_kwargs = dict(rescale=1./255)
dataflow_kwargs = dict(target_size=(image_size, image_size),
                       batch_size=batch_size,
                       interpolation="bilinear")

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    samplewise_center = True,
    samplewise_std_normalization=True,
    **datagen_kwargs)
test_generator = test_datagen.flow_from_directory(
    data_dir, subset="training", shuffle=False, class_mode='binary', **dataflow_kwargs)

Found 2300 images belonging to 1 classes.


#保存済みモデルの使用

In [7]:
model_path = '1641367273_efficientnetv2-b0_32batch_8epoch' # @param ['1641311414_efficientnetv2-b0_32batch_5epoch', '1641313096_efficientnetv2-s_16batch_5epoch', '1641364557_efficientnetv2-s_16batch_5epoch', '1641367273_efficientnetv2-b0_32batch_8epoch']
export_path = './drive/MyDrive/techno_pro/saved_model/' + model_path
reloaded = tf.keras.models.load_model(export_path)

In [11]:
time_list = []
for i, (x, y) in enumerate(test_generator):
  start = time.time()
  prediction_score = reloaded.predict(x)
  end = time.time()
  time_list.append(end-start)
  sab_df.iloc[i, 1] = prediction_score[0][0]
  if ((i+1)%100)==0:
    print(i+1," images are passed.")
  if ((i+1)==sab_df.shape[0]):
    break
print("total: ",i+1," images.")

100  images are passed.
200  images are passed.
300  images are passed.
400  images are passed.
500  images are passed.
600  images are passed.
700  images are passed.
800  images are passed.
900  images are passed.
1000  images are passed.
1100  images are passed.
1200  images are passed.
1300  images are passed.
1400  images are passed.
1500  images are passed.
1600  images are passed.
1700  images are passed.
1800  images are passed.
1900  images are passed.
2000  images are passed.
2100  images are passed.
2200  images are passed.
2300  images are passed.
total:  2300  images.


In [12]:
# 推論速度の考察
print(time_list[:20])
print("sum  Inference time(s): ", sum(time_list))
print("mean Inference time(s): ", sum(time_list)/len(time_list))
print("sum  Inference time(s) Except warm up: ", sum(time_list[5:]))
print("mean Inference time(s) Except warm up: ", sum(time_list[5:])/len(time_list[5:]))

[0.9291918277740479, 0.09042072296142578, 0.09307980537414551, 0.09058427810668945, 0.09247422218322754, 0.09027791023254395, 0.09222793579101562, 0.09096074104309082, 0.09542346000671387, 0.08962798118591309, 0.09578299522399902, 0.0884251594543457, 0.08983564376831055, 0.0888063907623291, 0.09282064437866211, 0.09754538536071777, 0.09033679962158203, 0.0924837589263916, 0.09730720520019531, 0.08949875831604004]
sum  Inference time(s):  216.98134064674377
mean Inference time(s):  0.09433971332467121
sum  Inference time(s) Except warm up:  215.68558979034424
mean Inference time(s) Except warm up:  0.09398064914611949


In [13]:
sab_df.head()

Unnamed: 0,0,1
0,test_0000,1.142304e-06
1,test_0001,3.371104e-06
2,test_0002,2.931375e-06
3,test_0003,7.215784e-07
4,test_0004,3.557398e-06


In [14]:
sab_df.tail()

Unnamed: 0,0,1
2295,test_2295,5e-06
2296,test_2296,2e-06
2297,test_2297,2.3e-05
2298,test_2298,4e-06
2299,test_2299,1e-06


In [15]:
# 保存
sab_df.to_csv('/content/drive/MyDrive/techno_pro/submit/'+model_path+'.tsv', sep='\t', index=False, header=None)

In [16]:
'/content/drive/MyDrive/techno_pro/submit/'+model_path+'.tsv'

'/content/drive/MyDrive/techno_pro/submit/1641367273_efficientnetv2-b0_32batch_8epoch.tsv'

In [None]:
# 確認
temp_path = '/content/drive/MyDrive/techno_pro/submit/'+model_path+'.tsv'
temp_df = pd.read_csv(temp_path, delimiter='\t', header=None)
temp_df.head()

In [None]:
model_path = '1641364557_efficientnetv2-s_16batch_5epoch' # @param ['1641311414_efficientnetv2-b0_32batch_5epoch', '1641313096_efficientnetv2-s_16batch_5epoch', '1641364557_efficientnetv2-s_16batch_5epoch']
temp_path = '/content/drive/MyDrive/techno_pro/submit/'+model_path+'.tsv'
temp_df = pd.read_csv(temp_path, delimiter='\t', header=None)
temp_df.head()

In [None]:
plt.figure(figsize=(20,10))
plt.scatter(range(2300), temp_df[1])
plt.show()

In [None]:
temp_df[temp_df[1]>0.01].shape

In [None]:
model_path = '1641313096_efficientnetv2-s_16batch_5epoch' # @param ['1641311414_efficientnetv2-b0_32batch_5epoch', '1641313096_efficientnetv2-s_16batch_5epoch']
temp_path = '/content/drive/MyDrive/techno_pro/submit/'+model_path+'.tsv'
temp_df2 = pd.read_csv(temp_path, delimiter='\t', header=None)
temp_df2.head()

In [None]:
plt.figure(figsize=(20,10))
plt.scatter(range(2300), temp_df2[1])
plt.show()

In [None]:
temp_df2.shape

In [None]:
temp_df2[temp_df2[1]>0.01].shape

In [None]:
fig, ax1 = plt.subplots(figsize=(20,10))
ax2 = ax1.twinx()
# 2軸グラフの本体設定
ax1.scatter(range(2300), temp_df[1],
        color="#5555ff", alpha=0.5, label="temp1")
ax2.scatter(range(2300), temp_df2[1],
        color="#ff5555", alpha=0.5, label="temp2")
plt.show()

In [None]:
plt.figure(figsize=(20,10))
plt.scatter(range(2300), (temp_df[1]+temp_df2[1])/2)
plt.show()

In [None]:
temp_df3 = (temp_df[1]+temp_df2[1])/2
temp_df3[temp_df3>0.01].shape