In [None]:
import numpy as np 
import pandas as pd
import random
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')


In [None]:
IMG_SIZE = 224
CHANNELS = 3
BATCH_SIZE = 16
EPOCHS = 10
SEED = 2021

DATA_DIR = '../input/petfinder-pawpularity-score/'
TRAIN_DIR = DATA_DIR + 'train/'
TEST_DIR = DATA_DIR + 'test/'

In [None]:
# Configure Strategy. Assume TPU...if not set default for GPU/CPU
tpu = None
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    # Enable XLA
    tf.config.optimizer.set_jit(enabled = "autoclustering")
    strategy = tf.distribute.get_strategy()

In [None]:
# Load Train Data
sample_df = pd.read_csv(f'{DATA_DIR}train.csv')
sample_df['Id'] = sample_df['Id'].apply(lambda x: f'{TRAIN_DIR}{x}.jpg')

# Label value to be used for feature model 'classification' training.
sample_df['target_value'] = sample_df['Pawpularity'] / 100.

#개 고양이 분류


In [None]:
import os
# 기본 경로
base_dir = '../input/catvsdog/cats_and_dogs_filtered'

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# 훈련에 사용되는 고양이/개 이미지 경로
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
print(train_cats_dir)
print(train_dogs_dir)

# 테스트에 사용되는 고양이/개 이미지 경로
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
print(validation_cats_dir)
print(validation_dogs_dir)

In [None]:
train_cat_fnames = os.listdir( train_cats_dir )
train_dog_fnames = os.listdir( train_dogs_dir )

print(train_cat_fnames[:5])
print(train_dog_fnames[:5])

In [None]:
import tensorflow as tf


model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150, 150, 3)),
  tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

In [None]:
from tensorflow.keras.optimizers import RMSprop

model.compile(optimizer=RMSprop(lr=0.001),
            loss='binary_crossentropy',
            metrics = ['accuracy'])

학습전처리, dog-cat

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator


train_datagen = ImageDataGenerator( rescale = 1.0/255. )
test_datagen  = ImageDataGenerator( rescale = 1.0/255. )

train_generator = train_datagen.flow_from_directory(train_dir,
                                                  batch_size=20,
                                                  class_mode='binary',
                                                  target_size=(150, 150))
validation_generator =  test_datagen.flow_from_directory(validation_dir,
                                                       batch_size=20,
                                                       class_mode  = 'binary',
                                                       target_size = (150, 150))

In [None]:
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    steps_per_epoch=100,
                    epochs=100,
                    validation_steps=50,
                    verbose=2)

In [None]:
model.save('dc_classfication_model.h5')

In [None]:
from tensorflow.keras.models import load_model
model =load_model('dc_classfication_model.h5')
model.summary()

모델 적용

In [None]:
def load_image(image_path):
    IMG_SIZE=150
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=CHANNELS)
    img = tf.image.resize_with_pad(img, IMG_SIZE, IMG_SIZE)
    img = np.reshape(img, [-1,IMG_SIZE, IMG_SIZE, CHANNELS])
    return img

In [None]:
def test_predict(filepath):
    local_image = load_image(filepath)
    prediction = model.predict(local_image)
    return float(prediction)*100

In [None]:
def predictions_over_image(filepath):
    predictions = []
    for path in filepath:
        predictions.append(test_predict(path))
    return predictions

In [None]:
sample_feature = predictions_over_image(sample_df['Id'])

In [None]:
sample_feature = pd.Series(sample_feature, name='dog_cat')

In [None]:
sample_feature

csv파일에 추가하기 cat=0<dog

In [None]:
sample_df=pd.concat([sample_df,sample_feature],axis=1)

In [None]:
sample_df

In [None]:
sample_df = shuffle(sample_df, random_state=SEED)
train_size = int(len(sample_df)*0.8)
train_df = sample_df[:train_size]
validation_df = sample_df[train_size:]
train_df.head()

In [None]:
training_data = tf.data.Dataset.from_tensor_slices((train_df['Id'].values, train_df['target_value'].values))
validation_data = tf.data.Dataset.from_tensor_slices((validation_df['Id'].values, validation_df['target_value'].values))

In [None]:
def load_image_and_label_from_path(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=CHANNELS)
    img = tf.image.resize_with_pad(img, IMG_SIZE, IMG_SIZE)
    return img, label

AUTOTUNE = tf.data.experimental.AUTOTUNE #메모리 동적 할당을 위한 AUTOTUNE
training_data = training_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE) #train 데이터를 불러옴
validation_data = validation_data.map(load_image_and_label_from_path,num_parallel_calls=AUTOTUNE) #validation 데이터를 불러옴

In [None]:
#train 및 validation 데이터를 훈련하기 좋게 batch로 자름
training_data_batches = training_data.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
validation_data_batches = validation_data.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [None]:
model = tf.keras.Sequential()
effnet = tf.keras.applications.EfficientNetB0(include_top=False, classes=None, 
                                              weights="../input/efficientnet-keras-noisystudent-weights-b0b7/noisystudent/noisy.student.notop-b0.h5",
                                              input_shape = (IMG_SIZE, IMG_SIZE, CHANNELS))
effnet.trainable = True
model.add(effnet)
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.summary()

In [None]:
model.compile(optimizer='adam', loss = tf.keras.losses.BinaryCrossentropy(), metrics=[tf.keras.metrics.RootMeanSquaredError('rmse')])

In [None]:
training_data_batches

In [None]:
checkpoint =  tf.keras.callbacks.ModelCheckpoint(f'feature_model.h0',
                                              verbose = 1, 
                                              monitor = 'val_loss', 
                                              mode = 'min', 
                                              save_weights_only = True,
                                              save_best_only = True)
model.fit(training_data_batches, epochs = EPOCHS,callbacks = [checkpoint], validation_data = validation_data_batches, verbose=1 )

In [None]:
def make_tabular_data(df, features, drop_features = ['index', 'Id', 'Pawpularity']):
    features = pd.Series(np.squeeze(features), name='features')
    df = pd.concat([df.reset_index(), features], axis=1)
    df = df.drop(drop_features, axis=1)
    return df

In [None]:
tabular_train = make_tabular_data(train_df, model.predict(training_data_batches))
tabular_valid = make_tabular_data(validation_df, model.predict(validation_data_batches))

In [None]:
y_train = tabular_train['target_value']
X_train = tabular_train.drop(['target_value'], axis=1)
y_valid = tabular_valid['target_value']
X_valid = tabular_valid.drop(['target_value'], axis=1)

In [None]:
X_valid 

In [None]:
rf_model = RandomForestRegressor()
rf_model.fit(X_train, y_train)

In [None]:
np.sqrt(mean_squared_error(rf_model.predict(X_valid), y_valid))

In [None]:
model.save('regressor.h5')

In [None]:
# Load Test Data
test_df = pd.read_csv(f'{DATA_DIR}test.csv')
test_df['Id'] = test_df['Id'].apply(lambda x: f'{TEST_DIR}{x}.jpg')
test_df['Pawpularity'] = 0

# Summary
print(f'test_df: {test_df.shape}')
test_df.head()

In [None]:
model =load_model('dc_classfication_model.h5')
model.summary()


In [None]:
def load_image(image_path):
    IMG_SIZE=150
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=CHANNELS)
    img = tf.image.resize_with_pad(img, IMG_SIZE, IMG_SIZE)
    img = np.reshape(img, [-1,IMG_SIZE, IMG_SIZE, CHANNELS])
    return img
def test_predict(filepath):
    local_image = load_image(filepath)
    prediction = model.predict(local_image)
    return float(prediction)*100
def predictions_over_image(filepath):
    predictions = []
    for path in filepath:
        predictions.append(test_predict(path))
    return predictions

In [None]:
test_feature = predictions_over_image(test_df['Id'])
test_feature = pd.Series(test_feature, name='dog_cat')

In [None]:
test_df=pd.concat([test_df,test_feature],axis=1)

In [None]:
test_df

In [None]:
model =load_model('regressor.h5')
model.summary()

In [None]:
def load_image(image_path):
    IMG_SIZE=224
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=CHANNELS)
    img = tf.image.resize_with_pad(img, IMG_SIZE, IMG_SIZE)
    img = np.reshape(img, [-1,IMG_SIZE, IMG_SIZE, CHANNELS])
    return img
def test_predict(filepath):
    local_image = load_image(filepath)
    prediction = model.predict(local_image)
    return float(prediction)*100
def predictions_over_image(filepath):
    predictions = []
    for path in filepath:
        predictions.append(test_predict(path))
    return predictions

In [None]:
test_feature = predictions_over_image(test_df['Id'])
test_feature = pd.Series(test_feature, name='features')
tabular_test = pd.concat([test_df.reset_index(), test_feature], axis=1)
tabular_test = tabular_test.drop(['index', 'Id', 'Pawpularity'], axis=1)

In [None]:
test_feature
tabular_test

In [None]:
submission = pd.DataFrame(columns={"Id", "Pawpularity"})
submission

In [None]:
submission['Id'] = test_df['Id'].map(lambda i : i.split('/')[-1].split('.')[0])
submission['Id']

In [None]:
submission['Pawpularity'] = rf_model.predict(tabular_test)

In [None]:
submission = pd.DataFrame(columns={"Id", "Pawpularity"})
submission['Id'] = test_df['Id'].map(lambda i : i.split('/')[-1].split('.')[0])
submission['Pawpularity'] = rf_model.predict(tabular_test)
submission['Pawpularity'] = submission['Pawpularity'].map(lambda i: i*100)
submission.head()

In [None]:
submission.to_csv('submission.csv', index = False)