<a href="https://colab.research.google.com/github/s1250103/Public_mori-lab/blob/confirm_label_noize_for_cm_data/eras/confirm_label/examine_KTH_datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#環境設定

In [295]:
%tensorflow_version 2.x

## import
# file dealing
import os
from os import path

import shutil
from google.colab import drive
import datetime
# data dealing
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
# process deasing
import gc
from time import sleep

# machine learning (back)
import tensorflow as tf
from tensorflow.keras.utils import plot_model
from tensorflow.keras import layers, models, initializers, callbacks

# machine learning
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.utils.np_utils import to_categorical

import json
from collections import OrderedDict
import pprint
import re
import requests

# 実験条件（外乱）を定める

In [296]:
LABEL_NOISE_RATE = 0
TEST_DATA_RATE = 0.25

#学習条件を定める

##学習手法の仕様

In [297]:
seed = 20201218
np.random.seed(seed)
tf.random.set_seed(seed)

learningDict = {
    "optimizer" : {
        "this.optimizer" : "sgd",
        "learning_rate" : 1e-5,
        "momentum" : 0,
        "decay" : 0.1,
        "nesterov" : False
    },
    "theWay" : {
        "batch_size" : 4,
        "epochs" : 1024,
    },
    "compared_losses" : [
                        #  tf.keras.losses.CategoricalCrossentropy(),
                        #  tf.keras.losses.MeanSquaredError(), 
                         tf.keras.losses.MeanAbsoluteError(),
                        #  tf.keras.losses.SquaredHinge()               
    ]
}

def compile_optimizer():
  # 最適化処理 (adamのみ対応)
  if learningDict["optimizer"]["this.optimizer"] == "adam":
    optimizer = keras.optimizers.Adam(
        lr=learningDict["optimizer"]["learning_rate"],
        beta_1=learningDict["optimizer"]["beta_1"],
        beta_2=learningDict["optimizer"]["beta_2"])
    print("adam is used as a optimizer")

  elif learningDict["optimizer"]["this.optimizer"] == "Nadam":
    optimizer = keras.optimizers.Nadam(
        lr=learningDict["optimizer"]["learning_rate"],
        beta_1=learningDict["optimizer"]["beta_1"],
        beta_2=learningDict["optimizer"]["beta_2"],
        epsilon=None, 
        schedule_decay=0.4)
    print("Nadam is used as a optimizer")

  elif learningDict["optimizer"]["this.optimizer"] == "sgd":
    optimizer = keras.optimizers.SGD(
        lr=learningDict["optimizer"]["learning_rate"],
        momentum=learningDict["optimizer"]["momentum"],
        decay=learningDict["optimizer"]["decay"],
        nesterov=learningDict["optimizer"]["nesterov"]) 
    print("sgd is used as a optimizer")
  else:
    print("error")
  
  return optimizer

## モデルの仕様（ニューラルネットワーク）
<ul>
  <li>入力層(フレームサイズ, フレームの高さ, フレームの横幅, RGB情報) </li>
  <li>出力層(予測値) </li>
  <li> 中間層 
    <ol>
      <li>conv0</li>
      <li>pool0</li>
      <li>conv1</li>
      <li>pool1</li>
      <li>dence0</li>
  </li>
</ui>

In [298]:
def make_model(video_format):
  # モデル作成
  model = models.Sequential()
  # 入力層
  model.add(
      layers.Reshape(
          (video_format.FRAME_SIZE,
          video_format.HEIGHT,
          video_format.WIDTH,
          video_format.COLORinfo),
          input_shape=(video_format.FRAME_SIZE * video_format.HEIGHT * video_format.WIDTH * video_format.COLORinfo,),
          name='Input_Layer' )
  )
  # 畳み込み0
  model.add(
      layers.Conv3D(
          filters=32,
          kernel_size=(3, 3, 3),
          strides=(1, 1, 1),
          padding='same',
          activation='relu',
          name='conv0'))
  # pool0
  model.add(
      layers.MaxPooling3D(pool_size=(2, 2, 2), name='pool0'))

  # 畳み込み1
  model.add(
      layers.Conv3D(
          filters=32,
          kernel_size=(3, 3, 3),
          strides=(1, 1, 1),
          padding='same',
          activation='relu',
          name='conv1'))
  # pool1
  model.add(
      layers.MaxPooling3D(pool_size=(2, 2, 2), name='pool1'))

  # dence
  model.add(
      layers.Dense(1024,
        activation='relu',
        name='dence0' ),
  )
  # 出力層
  model.add(
      layers.Dense(4, activation='softmax', name='WATERSUPPLY')
  )
  return model


##データの仕様

In [299]:
## フォーマットの設定
class video_format:
  # 想定された入力CMデータの仕様
  playtime = "15秒"
  displaysize = "(any, any, RGB)"
  videoformat = "any"
  # モデルが扱うCMデータ(上のようなデータは、下のように変換される)
  HEIGHT = 45
  WIDTH = 80
  FRAME_SIZE = 30
  COLORinfo = 3 # "RGB"
  FPS = "2 (FRAME_SIZE / playtime)" # 定義ではなく上から計算される値

# 学習データの用意

In [300]:
## gdrive 接続
if not path.exists('/content/drive'):
  drive.mount('/content/drive')
else:
  print("Already confirm")

## colab テンポラリディレクトリの作成
desk = '/content/desk'
if not os.path.exists(desk):
  os.mkdir(desk)
os.chdir(desk)
print("Created at /content/desk")

Already confirm
Created at /content/desk


In [335]:
learning_data_path = "/content/drive/MyDrive/colab/cleaned_detasets/KTH"

In [336]:
if path.isdir(learning_data_path):
  print("actually exist the", learning_data_path)
  for each_data in os.listdir(learning_data_path):
    if re.match(r"Data.*\.npz", each_data):
      print("________|------------ reading [", each_data, "] as learning data.")
      learning_data_np = np.load(path.join(learning_data_path, each_data))
    elif re.match(r"Label.*\.npz", each_data):
      print("________|------------ reading [", each_data, "] as label data.")
      label_data_np = np.load(path.join(learning_data_path, each_data))
    else:
      print("no such path")
else:
  print("no such path")


actually exist the /content/drive/MyDrive/colab/cleaned_detasets/KTH
________|------------ reading [ Data_of_KTH.npz ] as learning data.
________|------------ reading [ Label_of_KTH.npz ] as label data.
no such path


## 訓練データとテストデータとで分割

In [337]:
learning_data = []
label_data = []
for i in learning_data_np.files:
  learning_data.append(learning_data_np[i])
for i in label_data_np.files:
  label_data.append(label_data_np[i])

# learning_data = np.array(learning_data, dtype=object)
# label_data = np.array(label_data)

In [338]:
learning_data[0]

array([0.5529412 , 0.5529412 , 0.5529412 , ..., 0.47843137, 0.47843137,
       0.47843137], dtype=float32)

In [333]:
a = np.array(learning_data)
a.shape

  """Entry point for launching an IPython kernel.


(400,)

In [312]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(learning_data, label_data, random_state=20200120, train_size=(1-TEST_DATA_RATE))

In [313]:
type(X_train)

numpy.ndarray

## テストデータに意図的なノイズを加える(実験のために)

In [314]:
import random
def rand_ints_nodup(a, b, k):
  ns = []
  while len(ns) < k:
    n = random.randint(a, b)
    if not n in ns:
      ns.append(n)
  return ns
def changed_number(original_num, set_min, set_max):
  while True:
    tmpRndVal = random.randint(set_min, set_max)
    if original_num != tmpRndVal:
      return tmpRndVal

In [315]:
changed_label_number_for_experiment = int(len(Y_test) * LABEL_NOISE_RATE)
change_points = rand_ints_nodup(0, len(Y_test)-1, changed_label_number_for_experiment )
print("change map:", sorted(change_points))
print("the size:", len(change_points))

set_min = np.min(Y_test)
set_max = np.max(Y_test)
for i in range(len(Y_test)):
  if i in change_points:
    print("No.", i, ", original number is", Y_test[i], end=" -> ")
    Y_test[i] = changed_number(Y_test[i], set_min, set_max)
    print("changed number is", Y_test[i])
  else:
    print("error")
    break
  

change map: []
the size: 0
error


## 適切な形に処理

# 学習開始

In [319]:
X_train

(300,)

In [317]:
X_train = np.asarray(X_train).astype(np.float32)

ValueError: ignored

In [316]:
obj_video_format = video_format()

histories = []
for i, each_loss in enumerate(learningDict["compared_losses"]):
  print(each_loss)
  # print(re.search(r))

  # モデル構築
  model = make_model(obj_video_format)
  model.compile(
        optimizer=compile_optimizer(),
        loss=each_loss,
        metrics=['acc'])
  # 実行

  history = model.fit(
        X_train, Y_train,
        # a, b,
        # validation_data=(X_test, Y_test),
        batch_size=learningDict["theWay"]["batch_size"],
        epochs=10
        # verbose=0
        )
  histories.append(history)
  print("Complete.")

<tensorflow.python.keras.losses.MeanAbsoluteError object at 0x7f8663fd7b38>
sgd is used as a optimizer


ValueError: ignored

In [178]:
  sleep(10)
  del model
  keras.backend.clear_session()
  gc.collect()

5744

In [176]:
learningDict["theWay"]["epochs"]

1024

In [172]:
m = re.search(r'\d+', s)

print(m.group())

[<tensorflow.python.keras.losses.MeanAbsoluteError at 0x7f8668403358>]