# NSFW Content Moderator

### Imports

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from dataholder import DataHolder
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

### Create DataHolder Object from Training Dataset

In [3]:
data = DataHolder('/Users/chase/Documents/Repositories/nsfw_data_scraper/data/train')

Total images: 107142
----- drawings: 13165
----- hentai: 2627
----- porn: 45278
----- sexy: 13617
----- neutral: 32455


In [5]:
batch_size = 32
img_height = 180
img_width = 180

data.train_set = tf.keras.preprocessing.image_dataset_from_directory(
    data.data_dir,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 107142 files belonging to 5 classes.
Using 85714 files for training.


In [6]:
"""
The current dataset is split into a train and test already, so the validation split is unnecessary
"""
data.val_set = tf.keras.preprocessing.image_dataset_from_directory(
    data.data_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

Found 107142 files belonging to 5 classes.
Using 21428 files for validation.


### Performance Configuration

In [7]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
data.train_set = data.train_set.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
data.val_set = data.val_set.cache().prefetch(buffer_size=AUTOTUNE)

### Standardize the Data

In [None]:
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
normalized_set = data.train_set.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_set))
print(np.min(image_batch[0]), np.max(image_batch[0]))

### Create the Model

In [None]:
classes = 5
model = Sequential([
    layers.experimental.preprocessing.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(classes)
])

### Compile the Model

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

### Train the Model

In [None]:
epochs = 10
history = model.fit(
    data.train_set,
    validation_data=data.val_set,
    epochs=epochs
)

### Plot Results

In [None]:
accuracy = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8,8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, accuracy, label="Training Accuracy")
plt.plot(epochs_range, val_acc, label="Validation Accuracy")
plt.legend(loc="lower right")
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label="Training Loss")
plt.plot(epochs_range, val_loss, label="Validation Loss")
plt.title("Training and Validation Loss")
plt.show()

### Overfitting