In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [13]:
batch_size = 32
epochs = 10
img_height = 224
img_width = 224

In [7]:

datagen = ImageDataGenerator(rescale=1/255, validation_split=0.2)

train_generator = datagen.flow_from_directory('animals',
                                               target_size=(img_height, img_width), 
                                               batch_size=batch_size, 
                                               class_mode='categorical', 
                                               subset='training')

validation_generator = datagen.flow_from_directory('animals', 
                                                    target_size=(img_height, img_width),
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    subset='validation')

Found 2400 images belonging to 3 classes.
Found 600 images belonging to 3 classes.


In [9]:
# Criando CNN do zero com suas layers

model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', 
                        input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(2, 2))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))

model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 111, 111, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_4 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_3 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_5 (Conv2D)           (None, 52, 52, 64)        36928     
                                                                 
 flatten_1 (Flatten)         (None, 173056)           

In [11]:
model.compile(loss='categorical_crossentropy', 
              optimizer=optimizers.RMSprop(learning_rate=1e-4), 
              metrics=['acc'])

In [2]:
# Teste config GPU
from tensorflow.python.client import device_lib
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))

[]


In [14]:
model.fit(train_generator, epochs=epochs, validation_data=validation_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2d3c5497fa0>

In [17]:
from sklearn.metrics import classification_report, confusion_matrix

Y_pred = model.predict(validation_generator)
y_pred = np.argmax(Y_pred, axis=1)

print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes, y_pred))
print('Classification Report')
target_names = ['cats', 'dogs', 'panda']
print(classification_report(validation_generator.classes, y_pred, target_names=target_names))



Confusion Matrix
[[72 71 57]
 [67 68 65]
 [55 78 67]]
Classification Report
              precision    recall  f1-score   support

        cats       0.37      0.36      0.37       200
        dogs       0.31      0.34      0.33       200
       panda       0.35      0.34      0.34       200

    accuracy                           0.34       600
   macro avg       0.35      0.34      0.35       600
weighted avg       0.35      0.34      0.35       600



In [3]:
# importando dados Musical_Instruments_reviews.csv
import pandas as pd
df = pd.read_csv('Musical_Instruments_reviews.csv')

df.head()

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime
0,A2IBPI20UZIR0U,1384719342,"cassandra tu ""Yeah, well, that's just like, u...","[0, 0]","Not much to write about here, but it does exac...",5.0,good,1393545600,"02 28, 2014"
1,A14VAT5EAX3D9S,1384719342,Jake,"[13, 14]",The product does exactly as it should and is q...,5.0,Jake,1363392000,"03 16, 2013"
2,A195EZSQDW3E21,1384719342,"Rick Bennette ""Rick Bennette""","[1, 1]",The primary job of this device is to block the...,5.0,It Does The Job Well,1377648000,"08 28, 2013"
3,A2C00NNG1ZQQG2,1384719342,"RustyBill ""Sunday Rocker""","[0, 0]",Nice windscreen protects my MXL mic and preven...,5.0,GOOD WINDSCREEN FOR THE MONEY,1392336000,"02 14, 2014"
4,A94QU4C90B1AX,1384719342,SEAN MASLANKA,"[0, 0]",This pop filter is great. It looks and perform...,5.0,No more pops when I record my vocals.,1392940800,"02 21, 2014"


In [4]:
del df['reviewTime']
del df['reviewerID']
del df['asin']
del df['reviewerName']
del df['unixReviewTime']
del df['helpful']
df.overall.value_counts()

5.0    6938
4.0    2084
3.0     772
2.0     250
1.0     217
Name: overall, dtype: int64

In [None]:
from nltk.stem import WordNetLemmatizer


In [5]:
def sentiment_rating(rating):
    # Replacing ratings of 1,2,3 with 0 (not good) and 4,5 with 1 (good)
    if(int(rating) == 1 or int(rating) == 2 or int(rating) == 3):
        return 0
    else: 
        return 1
df.overall = df.overall.apply(sentiment_rating) 

In [12]:
from nltk.corpus import stopwords
import string

stop = set(stopwords.words('english'))
punctuation = list(string.punctuation)
stop.update(punctuation)

In [15]:
from nltk.corpus import wordnet

def get_simple_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

In [25]:
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
lemmatizer = WordNetLemmatizer()
def lemmatize_words(text):
    final_text = []
    for i in text.split():
        if i.strip().lower() not in stop:
            pos = pos_tag([i.strip()])
            word = lemmatizer.lemmatize(i.strip(),get_simple_pos(pos[0][1]))
            final_text.append(word.lower())
    return " ".join(final_text)

In [23]:
df['text'] = df['reviewText'] + ' ' + df['summary']
df

Unnamed: 0,reviewText,overall,summary,text
0,"Not much to write about here, but it does exac...",1,good,"Not much to write about here, but it does exac..."
1,The product does exactly as it should and is q...,1,Jake,The product does exactly as it should and is q...
2,The primary job of this device is to block the...,1,It Does The Job Well,The primary job of this device is to block the...
3,Nice windscreen protects my MXL mic and preven...,1,GOOD WINDSCREEN FOR THE MONEY,Nice windscreen protects my MXL mic and preven...
4,This pop filter is great. It looks and perform...,1,No more pops when I record my vocals.,This pop filter is great. It looks and perform...
...,...,...,...,...
10256,"Great, just as expected. Thank to all.",1,Five Stars,"Great, just as expected. Thank to all. Five S..."
10257,I've been thinking about trying the Nanoweb st...,1,"Long life, and for some players, a good econom...",I've been thinking about trying the Nanoweb st...
10258,I have tried coated strings in the past ( incl...,1,Good for coated.,I have tried coated strings in the past ( incl...
10259,"Well, MADE by Elixir and DEVELOPED with Taylor...",1,Taylor Made,"Well, MADE by Elixir and DEVELOPED with Taylor..."


In [26]:
df.text = df.text.apply(lemmatize_words)

LookupError: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle[0m

  Searched in:
    - 'C:\\Users\\Pedro/nltk_data'
    - 'c:\\Users\\Pedro\\anaconda3\\envs\\pycaret\\nltk_data'
    - 'c:\\Users\\Pedro\\anaconda3\\envs\\pycaret\\share\\nltk_data'
    - 'c:\\Users\\Pedro\\anaconda3\\envs\\pycaret\\lib\\nltk_data'
    - 'C:\\Users\\Pedro\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************
