<a href="https://colab.research.google.com/github/oorora67/AI-HandsOn/blob/master/AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Google Drive　マウント

In [0]:
#Google Drive　Mount
from google.colab import drive
drive.mount('/content/gdrive')

# **画像収集スクリプト**


In [0]:
import json
import os
import sys
import urllib
from IPython.display import Image,display_jpeg

from bs4 import BeautifulSoup
import requests
#集めたい画像の名前を入力
Search_Name = 'カレー'
#集めたい枚数を入力
Get_Number = 10

class Google:
    def __init__(self):
        self.GOOGLE_SEARCH_URL = 'https://www.google.co.jp/search'
        self.session = requests.session()
        self.session.headers.update(
            {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0'})

    def search(self, keyword, maximum):
        print('begin searching', keyword)
        query = self.query_gen(keyword)
        return self.image_search(query, maximum)

    def query_gen(self, keyword):
        # search query generator
        page = 0
        while True:
            params = urllib.parse.urlencode({
                'q': keyword,
                'tbm': 'isch',
                'ijn': str(page)})

            yield self.GOOGLE_SEARCH_URL + '?' + params
            page += 1

    def image_search(self, query_gen, maximum):
        # search image
        result = []
        total = 0
        while True:
            # search
            html = self.session.get(next(query_gen)).text
            soup = BeautifulSoup(html, 'lxml')
            elements = soup.select('.rg_meta.notranslate')
            jsons = [json.loads(e.get_text()) for e in elements]
            imageURLs = [js['ou'] for js in jsons]

            # add search result
            if not len(imageURLs):
                print('-> no more images')
                break
            elif len(imageURLs) > maximum - total:
                result += imageURLs[:maximum - total]
                break
            else:
                result += imageURLs
                total += len(imageURLs)

        print('-> found', str(len(result)), 'images')
        return result


def main():
    google = Google()
    # save location
    name = Search_Name
    data_dir = '/content/gdrive/My Drive/AI-HandsOn/data/'
    os.makedirs(data_dir, exist_ok=True)
    os.makedirs(data_dir + name, exist_ok=True)

        # search image
    result = google.search(
    name, maximum=Get_Number)

        # download
    download_error = []
    for i in range(len(result)):
        print('-> downloading image', str(i + 1).zfill(4))
        try:
            urllib.request.urlretrieve(result[i], data_dir + name + '/' + str(i + 1).zfill(4) + '.jpg')
            display_jpeg(Image(data_dir + name + '/' + str(i + 1).zfill(4) + '.jpg'))
            continue
        except:
            print('--> could not download image', str(i + 1).zfill(4))
            download_error.append(i + 1)
            continue

    print('complete download')
    print('├─ download', len(result)-len(download_error), 'images')
    print('└─ could not download', len(download_error), 'images', download_error)


if __name__ == '__main__':
    main()

# **取得した画像から学習用データセットを作成する**

In [0]:
#データベース作成スクリプト
# pngファイルを24ビット，RGBに変換してNumpyの配列形式で保存する
 
#from sklearn import cross_validation
#from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from PIL import Image
import os, sys, glob
import numpy as np
 
#データベースフォルダの場所 
data_path ="/content/gdrive/My Drive/AI-HandsOn/data"
#ラベル・フォルダネーム
files = sorted(os.listdir(data_path))
files_list = [f for f in files if os.path.isdir(os.path.join(data_path, f))]
print("path",data_path)
print("List:",files_list) 
categories = files_list
# 変換したデータの保存先
os.makedirs('/content/gdrive/My Drive/AI-HandsOn/data_set', exist_ok=True)
npy_file = "/content/gdrive/My Drive/AI-HandsOn/data_set/data_set.npy"
 
# ディレクトリ確認
for s, t in enumerate(categories):
    if not os.path.exists(data_path + "/" + t):
        print("ディレクトリが存在しません：", data_path + "/" + t)
        quit()
 
nb_classes = len(categories)
image_size = 128  # 画像サイズを指定(一辺の長さが64)
pixels = image_size * image_size * 3    # RGBだから3
 
# 画像データを読み込んでNumpy配列に変換
X = []  # 画像データ
Y = []  # ラベルデータ
for idx, cat in enumerate(categories):
    label = [0 for i in range(nb_classes)]
    label[idx] = 1
    image_dir = data_path + "/" + cat
    # glob関数を利用して拡張子が「.png」のものだけを列挙する
    files = glob.glob(image_dir+"/*.jpg")
    for i, f in enumerate(files):
        img = Image.open(f)
        img = img.convert("RGB")
        img = img.resize((image_size, image_size))
        data = np.asarray(img)
        X.append(data)
        Y.append(label)
        # 進捗表示
        print("\r",cat, ":", i // len(files) * 100,"%",end="")
    print("\r",cat, ": 100%")
 
X = np.array(X)
Y = np.array(Y)
 
# 学習データとテストデータを分ける
X_train, X_test, y_train, y_test = train_test_split(X, Y)
xy = (X_train, X_test, y_train, y_test)
np.save(npy_file, xy)
 
print("ok,", len(Y), ",ファイル名：", npy_file)

# **取得した画像を使って学習を行う**

In [0]:
!pip install numpy==1.16.2

In [0]:
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.contrib.tpu.python.tpu import keras_support
from tensorflow.keras.layers import Input, Conv2D,MaxPooling2D, BatchNormalization, Activation, AveragePooling2D, Dense, Dropout, Flatten
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.utils import to_categorical
import os
# Kerasで構築したCNNモデルを用いて学習処理
import numpy as np
 
#データベースフォルダの場所 
data_path ="/content/gdrive/My Drive/AI-HandsOn/data"
#ラベル・フォルダネーム
files = sorted(os.listdir(data_path))
files_list = [f for f in files if os.path.isdir(os.path.join(data_path, f))]
print("path",data_path)
print("List:",files_list) 
categories = files_list

os.makedirs('/content/gdrive/My Drive/AI-HandsOn/trained_data', exist_ok=True)
hdf5_file = "/content/gdrive/My Drive/AI-HandsOn/trained_data/trained_data.hdf5"
npy_file = "/content/gdrive/My Drive/AI-HandsOn/data_set/data_set.npy"
 
 
nb_classes = len(categories)
image_size = 128
print('nb_classes',nb_classes)
# 既に学習済みモデルがあればプログラムを終了する
if os.path.exists(hdf5_file):
    print("既に学習済みモデルが存在します：", hdf5_file)
    print("新たに学習を行う場合は，上記ファイル名を変更ないしは削除してください。")
    quit()
 
# データをロード
X_train, X_test, y_train, y_test = np.load(npy_file)
# データを正規化する
X_train = X_train.astype("float") / 256
X_test  = X_test.astype("float")  / 256
print('X_train shape:', X_train.shape)
 
# CNNのモデルを構築
model = Sequential()
model.add(Conv2D(32, (3, 3),padding = 'same',input_shape = X_train.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes)) # 出力データのカテゴリー数を指定
model.add(Activation('softmax'))
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
 
 
# モデルを訓練する
model.fit(X_train, y_train, epochs=40, batch_size=64)
 
# モデルを保存
model.save_weights(hdf5_file, save_format="h5")
print("新しい学習モデルを保存しました：", hdf5_file)
 
# モデルを評価する
score = model.evaluate(X_test, y_test)
print('loss=', score[0])
print('accuracy=', score[1])


# **学習した結果を使って画像を判定する**