## Configurar kaggle



# Copiar o arquivo kaggle.json na pasta sample_data

In [2]:
! pip install kaggle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
! mkdir ~/.kaggle

In [4]:
%cd sample_data


/content/sample_data


In [5]:
! cp kaggle.json ~/.kaggle

In [6]:
! chmod 600 ~/.kaggle/kaggle.json

In [7]:
! kaggle datasets download gpiosenka/100-bird-species


Downloading 100-bird-species.zip to /content/sample_data
 99% 1.48G/1.49G [00:22<00:00, 94.0MB/s]
100% 1.49G/1.49G [00:22<00:00, 71.3MB/s]


In [8]:
!unzip 100-bird-species.zip

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
  inflating: train/VERMILION FLYCATHER/102.jpg  
  inflating: train/VERMILION FLYCATHER/103.jpg  
  inflating: train/VERMILION FLYCATHER/104.jpg  
  inflating: train/VERMILION FLYCATHER/105.jpg  
  inflating: train/VERMILION FLYCATHER/106.jpg  
  inflating: train/VERMILION FLYCATHER/107.jpg  
  inflating: train/VERMILION FLYCATHER/108.jpg  
  inflating: train/VERMILION FLYCATHER/109.jpg  
  inflating: train/VERMILION FLYCATHER/110.jpg  
  inflating: train/VERMILION FLYCATHER/111.jpg  
  inflating: train/VERMILION FLYCATHER/112.jpg  
  inflating: train/VERMILION FLYCATHER/113.jpg  
  inflating: train/VERMILION FLYCATHER/114.jpg  
  inflating: train/VERMILION FLYCATHER/115.jpg  
  inflating: train/VERMILION FLYCATHER/116.jpg  
  inflating: train/VERMILION FLYCATHER/117.jpg  
  inflating: train/VERMILION FLYCATHER/118.jpg  
  inflating: train/VERMILION FLYCATHER/119.jpg  
  inflating: train/VERMILION FLYCATHER/120.jp

In [9]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.17.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 13.9 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.17.0


## Imports

In [10]:
import numpy as np
np.random.seed(11)

import pandas as pd 
import tensorflow as tf
import tensorflow_addons as tfa
import os
from tensorflow import keras


from tensorflow.keras import layers, optimizers, losses, metrics, callbacks, initializers
from tensorflow.keras import Sequential, Model, Input

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline

import cv2
from PIL import Image

## Preparo das imagens

In [11]:
train="/content/sample_data/train"
test="/content/sample_data/test"
valid="/content/sample_data/valid"

TRAIN = "/content/sample_data/train"
VALID = "/content/sample_data/test"
TEST = "/content/sample_data/valid"

In [12]:
from tensorflow.keras import layers

data_augmentation=keras.Sequential([
    layers.RandomFlip('vertical'),
    layers.RandomRotation(0.3,fill_mode='nearest'),
    # layers.Rescaling(scale=1.0/255)
],name='Data_Augmentation_Layer')

In [13]:
train_datagen = ImageDataGenerator(rescale=1/255,
                  horizontal_flip=True,
                  zoom_range=0.3
                  )
train_datagen


train_generator = train_datagen.flow_from_directory(
               directory=TRAIN,
               batch_size=64,
               shuffle=True,
               class_mode="categorical",
               target_size=(224, 224))
datagen= ImageDataGenerator(rescale=1./255)

    

valid_generator=datagen.flow_from_directory(
    directory=VALID,
    batch_size=64,
    shuffle=True,
    class_mode="categorical",
    target_size=(224, 224))

test_generator=datagen.flow_from_directory(
    directory=TEST,
    batch_size=64,
    shuffle=False,
    class_mode="categorical",
    target_size=(224, 224))



Found 58388 images belonging to 400 classes.
Found 2000 images belonging to 400 classes.
Found 2000 images belonging to 400 classes.


## Criando o modelo

In [16]:
from tensorflow.keras.applications.xception import Xception

base_model = Xception(include_top = False, input_shape = (224,224,3), weights = 'imagenet')
base_model.treinable = False






x = layers.GlobalMaxPooling2D()(base_model.output)
x = layers.Dense(256, 'relu', kernel_initializer='he_normal')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(400, activation='softmax', name= 'outputs')(x)

model = Model(inputs= base_model.input, outputs= [outputs])

reducer = callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    patience=1, 
    verbose=1, 
    factor=0.1)

model.compile(optimizer=optimizers.Adam(0.001), 
              metrics=[metrics.CategoricalAccuracy(name='accuracy'), tfa.metrics.F1Score(400), metrics.TopKCategoricalAccuracy(k=5)], 
              loss=losses.CategoricalCrossentropy(label_smoothing=0.1))

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 111, 111, 32  864         ['input_2[0][0]']                
                                )                                                                 
                                                                                                  
 block1_conv1_bn (BatchNormaliz  (None, 111, 111, 32  128        ['block1_conv1[0][0]']           
 ation)                         )                                                           

## Treinando o modelo

In [None]:
history = model.fit(train_generator,
                    validation_data=valid_generator,
                    callbacks= reducer,
                    epochs=20)

Epoch 1/20

## Printando os resultados 

In [None]:
def plot_loss_curves(history):
    
    '''
      returns seperate loss curves for training and validation metrics
    '''
    train_loss=history.history['loss']
    val_loss=history.history['val_loss']

    train_accuracy=history.history['accuracy']
    val_accuracy=history.history['val_accuracy']

    epochs=range(1,len(history.history['loss'])+1)
    plt.figure(figsize=(20,7))
  # plot loss data
    plt.subplot(1,2,1)
    plt.plot(epochs,train_loss,label="training_loss")
    plt.plot(epochs,val_loss,label="validation_loss")
    plt.title("Loss curves")
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend()
  # plt.show()

  # plot accuracy data
    plt.subplot(1,2,2)
    plt.plot(epochs,train_accuracy,label="training_acc")
    plt.plot(epochs,val_accuracy,label="validation_acc")
    plt.title("Accuracy curves")
    plt.xlabel('epochs')
    plt.ylabel('Accuracy')
    plt.legend()

In [None]:
plot_loss_curves(history)

In [None]:
acc = [0.] + history.history['accuracy']
val_acc = [0.] + history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
base_model = model.layers[1]
base_model.trainable = True

for  layer in base_model.layers[:-20]:
    layer.trainable = False

model.compile(optimizer = optimizers.Adam(0.0001), 
              metrics=[metrics.CategoricalAccuracy(name='accuracy'), tfa.metrics.F1Score(400), metrics.TopKCategoricalAccuracy(k=5)], 
              loss=losses.CategoricalCrossentropy(label_smoothing=0.1))

checkpoint = callbacks.ModelCheckpoint(
    filepath='birds_fine.h5',
    monitor='val_accuracy',
    verbose=1,
    save_best_only=True,
    mode = 'max')

checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
history_fine = model.fit(train_generator,
                         epochs=30,
                         initial_epoch=history.epoch[-1],
                         validation_data=valid_generator,
                         callbacks= [cp_callback])

In [None]:
initial_epochs = 30

acc += history_fine.history['accuracy']
val_acc += history_fine.history['val_accuracy']

loss += history_fine.history['loss']
val_loss += history_fine.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.plot([initial_epochs-1,initial_epochs-1], plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.plot([initial_epochs-1,initial_epochs-1], plt.ylim(), label='Start Fine Tuning')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.ylabel('Cross Entropy')
plt.xlabel('epoch')
plt.show()

In [None]:
model.evaluate(test_generator)

In [None]:
# Evaluate the model
evaluete = model.evaluate(test_generator, verbose=2)
loss=evaluete[0]
acc= evaluete[1]
matrixDeDecisão = evaluete[2]
top_k_categorical_accuracy= evaluete[3]


In [None]:
# Loads the weights
model.load_weights(checkpoint_path)

# Re-evaluate the model
loss,acc,matrix,top_acc=model.evaluate(test_generator, verbose=2)


In [None]:
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))


In [None]:
train="/content/sample_data/train"
test="/content/sample_data/test"
valid="/content/sample_data/valid"

In [None]:
import os
import tensorflow as tf
import glob
import pathlib
from tensorflow import keras
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.layers import Dense,Conv2D,Flatten
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

In [None]:
def process(data):
    path=pathlib.Path(data)#converting the string to path
    filepaths=list(path.glob(r"*/*.jpg"))#Going through all the subpaths 
    labels=list(map(lambda x: os.path.split(os.path.split(x)[0])[1],filepaths))#Separating the label from filepath and storing it
    df1=pd.Series(filepaths,name='filepaths').astype(str)
    df2=pd.Series(labels,name='labels')
    df=pd.concat([df1,df2],axis=1)#Making the dataframe
    return df

In [None]:
df_train=process(train)
df_test=process(test)
df_valid=process(valid)

In [None]:
train_generator=ImageDataGenerator( preprocessing_function=preprocess_input)
test_generator=ImageDataGenerator( preprocessing_function=preprocess_input)
valid_generator=ImageDataGenerator( preprocessing_function=preprocess_input)

In [None]:
train_image=train_generator.flow_from_dataframe(dataframe=df_train,
                                                x_col='filepaths',
                                                y_col='labels',
                                                target_size=(224,224),
                                                batch_size=64,
                                                subset='training',
                                                random_seed=42)

test_image = test_generator.flow_from_dataframe(
    dataframe=df_test,
    x_col='filepaths',
    y_col='labels',
    target_size=(224,224),
    batch_size=32
)

valid_image = test_generator.flow_from_dataframe(
    dataframe=df_valid,
    x_col='filepaths',
    y_col='labels',
    subset='training',
    target_size=(224,224),
    batch_size=32)

In [None]:
classes=train_image.classes


In [None]:
images = []
labels= []
predict_label = []
for i in range(1,5):
   images, labels = test_image[i]
preds = model.predict(images)
fig,axes=plt.subplots(nrows=2,ncols=4,figsize=(20,20))
dic={i:ax for i,ax in enumerate(axes.flat)}
for i in range(0,5):
    label = np.argmax(labels[i])
    pred = np.argmax(preds[i])
    image = images[i]
    dic[i].set_title("real label: " + str(classes[label]) + " v.s " + "predictedd lable: " + str(classes[pred]))
    dic[i].imshow(image)
plt.tight_layout()    
plt.show()

In [None]:
$