In [72]:
import argparse
import sys
import praw
import glob
import os
import errno
import time
import shutil
import uuid

import tensorflow as tf
import numpy as np

from PIL import Image
from skimage import transform
from keras.models import load_model
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

physical_devices = tf.config.list_physical_devices('GPU') 
for gpu_instance in physical_devices: 
    tf.config.experimental.set_memory_growth(gpu_instance, True)

timestr = time.strftime("%Y%m%d-%H%M%S")
class_names = ['Gore', 'None', 'Pornographic', 'Racy']

REDDIT_NAME = "yuri_jp"
MAX_NUM_FILES = 1000
ACTIVE_SAFETY_NET_MODEL_PATH = "nvme_drive/Active Models/SafetyNet"
ACTIVE_SYMBOL_NET_MODEL_PATH = "nvme_drive/Active Models/SymbolNet"
SCRIPT_LOCATION = "scraper-scripts/reddit-scraper/scraper.py"
OUTPUT_DIRECTORY = "nvme_drive/scraped_files/" + timestr + "-reddit"
PREDICT_DIRECTORY = "nvme_drive/scraped_files/PREDICTED_" + timestr + "-reddit"

if not os.path.isdir(OUTPUT_DIRECTORY):
    print("Creating folder: " + OUTPUT_DIRECTORY)
    os.makedirs(OUTPUT_DIRECTORY)

if not os.path.isdir(PREDICT_DIRECTORY):
    print("Creating folder: " + PREDICT_DIRECTORY)
    os.makedirs(PREDICT_DIRECTORY)

COMMAND = "python3 "  + SCRIPT_LOCATION + " -l " + str(MAX_NUM_FILES) + " -p day -d " + OUTPUT_DIRECTORY + " " + REDDIT_NAME
print("Executable script is: \n" + COMMAND)

Creating folder: nvme_drive/scraped_files/20220818-230726-reddit
Creating folder: nvme_drive/scraped_files/PREDICTED_20220818-230726-reddit
Executable script is: 
python3 scraper-scripts/reddit-scraper/scraper.py -l 1000 -p day -d nvme_drive/scraped_files/20220818-230726-reddit yuri_jp


In [73]:
os.system(COMMAND)

Call this function with 'time_filter' as a keyword argument.
  hot_subreddit = reddit.subreddit(args.subreddit).top(args.period,
[gallery-dl][debug] Version 1.22.4
[gallery-dl][debug] Python 3.9.13 - Linux-5.18.10-76051810-generic-x86_64-with-glibc2.35
[gallery-dl][debug] requests 2.28.1 - urllib3 1.26.11
[1/24] https://i.redd.it/ihmrrv5jcdi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/ihmrrv5jcdi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/ihmrrv5jcdi91.jpg'
[urllib3.connectionpool][debug] Starting new HTTPS connection (1): i.redd.it:443
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /ihmrrv5jcdi91.jpg HTTP/1.1" 200 862974


nvme_drive/scraped_files/20220818-230726-reddit/reddit/ihmrrv5jcdi91.jpg


[2/24] https://pbs.twimg.com/media/FaSDT2gaAAEjzQ2.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://pbs.twimg.com/media/FaSDT2gaAAEjzQ2.jpg'
[twitter][debug] Using TwitterImageExtractor for 'https://pbs.twimg.com/media/FaSDT2gaAAEjzQ2.jpg'
[urllib3.connectionpool][debug] Starting new HTTPS connection (1): pbs.twimg.com:443
[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FaSDT2gaAAEjzQ2?format=jpg&name=orig HTTP/1.1" 200 499971


nvme_drive/scraped_files/20220818-230726-reddit/twitter/FaSDT2gaAAEjzQ2.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/s8s5fsicjei91.jpg


[3/24] https://i.redd.it/s8s5fsicjei91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/s8s5fsicjei91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/s8s5fsicjei91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /s8s5fsicjei91.jpg HTTP/1.1" 200 478656
[4/24] https://i.redd.it/6ysmk6c4rci91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/6ysmk6c4rci91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/6ysmk6c4rci91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /6ysmk6c4rci91.jpg HTTP/1.1" 200 1995360


nvme_drive/scraped_files/20220818-230726-reddit/reddit/6ysmk6c4rci91.jpg


[5/24] https://i.redd.it/qlr44rot1di91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/qlr44rot1di91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/qlr44rot1di91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /qlr44rot1di91.jpg HTTP/1.1" 200 1045662


nvme_drive/scraped_files/20220818-230726-reddit/reddit/qlr44rot1di91.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/i97pu0x9mgi91.jpg


[6/24] https://i.redd.it/i97pu0x9mgi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/i97pu0x9mgi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/i97pu0x9mgi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /i97pu0x9mgi91.jpg HTTP/1.1" 200 387723
[7/24] https://pbs.twimg.com/media/FaLQsdJagAMxEA8.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://pbs.twimg.com/media/FaLQsdJagAMxEA8.jpg'
[twitter][debug] Using TwitterImageExtractor for 'https://pbs.twimg.com/media/FaLQsdJagAMxEA8.jpg'
[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FaLQsdJagAMxEA8?format=jpg&name=orig HTTP/1.1" 200 359140


nvme_drive/scraped_files/20220818-230726-reddit/twitter/FaLQsdJagAMxEA8.jpg


[8/24] https://pbs.twimg.com/media/FaFvrRAUUAAY2k3.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://pbs.twimg.com/media/FaFvrRAUUAAY2k3.jpg'
[twitter][debug] Using TwitterImageExtractor for 'https://pbs.twimg.com/media/FaFvrRAUUAAY2k3.jpg'
[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FaFvrRAUUAAY2k3?format=jpg&name=orig HTTP/1.1" 200 556941


nvme_drive/scraped_files/20220818-230726-reddit/twitter/FaFvrRAUUAAY2k3.jpg


[9/24] https://i.redd.it/2pdkd8t5qfi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/2pdkd8t5qfi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/2pdkd8t5qfi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /2pdkd8t5qfi91.jpg HTTP/1.1" 200 886764


nvme_drive/scraped_files/20220818-230726-reddit/reddit/2pdkd8t5qfi91.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/96oxqsh8ndi91.jpg


[10/24] https://i.redd.it/96oxqsh8ndi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/96oxqsh8ndi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/96oxqsh8ndi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /96oxqsh8ndi91.jpg HTTP/1.1" 200 484757
[11/24] https://i.redd.it/6dndu2np5ci91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/6dndu2np5ci91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/6dndu2np5ci91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /6dndu2np5ci91.jpg HTTP/1.1" 200 782565


nvme_drive/scraped_files/20220818-230726-reddit/reddit/6dndu2np5ci91.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/rjv4a4kkbgi91.jpg


[12/24] https://i.redd.it/rjv4a4kkbgi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/rjv4a4kkbgi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/rjv4a4kkbgi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /rjv4a4kkbgi91.jpg HTTP/1.1" 200 712377
[13/24] https://i.redd.it/u7dbhw2eihi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/u7dbhw2eihi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/u7dbhw2eihi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /u7dbhw2eihi91.jpg HTTP/1.1" 200 933564


nvme_drive/scraped_files/20220818-230726-reddit/reddit/u7dbhw2eihi91.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/9cauyhav0gi91.jpg


[14/24] https://i.redd.it/9cauyhav0gi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/9cauyhav0gi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/9cauyhav0gi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /9cauyhav0gi91.jpg HTTP/1.1" 200 628796
[15/24] https://i.redd.it/z107mvzegci91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/z107mvzegci91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/z107mvzegci91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /z107mvzegci91.jpg HTTP/1.1" 200 1001021


nvme_drive/scraped_files/20220818-230726-reddit/reddit/z107mvzegci91.jpg


[16/24] https://twitter.com/mangatimekirara/status/1559726832450420736
[gallery-dl][debug] Starting DownloadJob for 'https://twitter.com/mangatimekirara/status/1559726832450420736'
[twitter][debug] Using TwitterTweetExtractor for 'https://twitter.com/mangatimekirara/status/1559726832450420736'
[urllib3.connectionpool][debug] Starting new HTTPS connection (1): twitter.com:443
[urllib3.connectionpool][debug] https://twitter.com:443 "GET /i/api/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail?variables=%7B%22focalTweetId%22%3A%221559726832450420736%22%2C%22with_rux_injections%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Afalse%2C%22includePromotedContent%22%3Afalse%2C%22withSuperFollowsUserFields%22%3Atrue%2C%22withBirdwatchPivots%22%3Afalse%2C%22withDownvotePerspective%22%3Afalse%2C%22withReactionsMetadata%22%3Afalse%2C%22withReactionsPerspective%22%3Afalse%2C%22withSuperFollowsTweetFields%22%3Atrue%2C%22withClientEve

nvme_drive/scraped_files/20220818-230726-reddit/twitter/mangatimekirara/1559726832450420736_1.jpg
nvme_drive/scraped_files/20220818-230726-reddit/twitter/FaWokHXaUAAlu1J.jpg


[17/24] https://pbs.twimg.com/media/FaWokHXaUAAlu1J.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://pbs.twimg.com/media/FaWokHXaUAAlu1J.jpg'
[twitter][debug] Using TwitterImageExtractor for 'https://pbs.twimg.com/media/FaWokHXaUAAlu1J.jpg'
[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FaWokHXaUAAlu1J?format=jpg&name=orig HTTP/1.1" 200 545951
[18/24] https://i.redd.it/ljd1xrfzwgi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/ljd1xrfzwgi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/ljd1xrfzwgi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /ljd1xrfzwgi91.jpg HTTP/1.1" 200 572671


nvme_drive/scraped_files/20220818-230726-reddit/reddit/ljd1xrfzwgi91.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/elvlenzm8ei91.jpg


[19/24] https://i.redd.it/elvlenzm8ei91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/elvlenzm8ei91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/elvlenzm8ei91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /elvlenzm8ei91.jpg HTTP/1.1" 200 217700
[20/24] https://twitter.com/_ugaigusuri/status/1556681553802973184
[gallery-dl][debug] Starting DownloadJob for 'https://twitter.com/_ugaigusuri/status/1556681553802973184'
[twitter][debug] Using TwitterTweetExtractor for 'https://twitter.com/_ugaigusuri/status/1556681553802973184'
[urllib3.connectionpool][debug] https://twitter.com:443 "GET /i/api/graphql/ItejhtHVxU7ksltgMmyaLA/TweetDetail?variables=%7B%22focalTweetId%22%3A%221556681553802973184%22%2C%22with_rux_injections%22%3Afalse%2C%22withCommunity%22%3Atrue%2C%22withQuickPromoteEligibilityTweetFields%22%3Atrue%2C%22withBirdwatchNotes%22%3Afalse%2C%22includePromotedContent%22%3Afalse%2C%22withSuperFollowsUserFields%22%3Atrue%

nvme_drive/scraped_files/20220818-230726-reddit/twitter/_ugaigusuri/1556681553802973184_1.jpg


[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FZpxnrtUcAA0rNA?format=jpg&name=orig HTTP/1.1" 200 433664


nvme_drive/scraped_files/20220818-230726-reddit/twitter/_ugaigusuri/1556681553802973184_2.jpg


[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FZpxnruUcAEnD3z?format=jpg&name=orig HTTP/1.1" 200 507668


nvme_drive/scraped_files/20220818-230726-reddit/twitter/_ugaigusuri/1556681553802973184_3.jpg
nvme_drive/scraped_files/20220818-230726-reddit/reddit/mylwvje3thi91.jpg


[21/24] https://i.redd.it/mylwvje3thi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/mylwvje3thi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/mylwvje3thi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /mylwvje3thi91.jpg HTTP/1.1" 200 521520
[22/24] https://i.redd.it/8zkmwxso7hi91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/8zkmwxso7hi91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/8zkmwxso7hi91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /8zkmwxso7hi91.jpg HTTP/1.1" 200 192593


nvme_drive/scraped_files/20220818-230726-reddit/reddit/8zkmwxso7hi91.jpg


[23/24] https://pbs.twimg.com/media/FaNziC6UIAEe1i2.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://pbs.twimg.com/media/FaNziC6UIAEe1i2.jpg'
[twitter][debug] Using TwitterImageExtractor for 'https://pbs.twimg.com/media/FaNziC6UIAEe1i2.jpg'
[urllib3.connectionpool][debug] https://pbs.twimg.com:443 "GET /media/FaNziC6UIAEe1i2?format=jpg&name=orig HTTP/1.1" 200 254771


nvme_drive/scraped_files/20220818-230726-reddit/twitter/FaNziC6UIAEe1i2.jpg


[24/24] https://i.redd.it/0sy0cjwwzii91.jpg
[gallery-dl][debug] Starting DownloadJob for 'https://i.redd.it/0sy0cjwwzii91.jpg'
[reddit][debug] Using RedditImageExtractor for 'https://i.redd.it/0sy0cjwwzii91.jpg'
[urllib3.connectionpool][debug] https://i.redd.it:443 "GET /0sy0cjwwzii91.jpg HTTP/1.1" 200 809316


nvme_drive/scraped_files/20220818-230726-reddit/reddit/0sy0cjwwzii91.jpg


0

In [74]:
model = load_model(ACTIVE_SAFETY_NET_MODEL_PATH)

In [75]:
files = glob.glob(OUTPUT_DIRECTORY + '/**/*.jpg', recursive=True)
file_count = files.count

resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(240, 240, crop_to_aspect_ratio=True),
  layers.Rescaling(1./255)
])

def load(filename):
    print("Loading " + filename)
    img = tf.keras.utils.load_img(filename, target_size = (240, 240))
    img_array = tf.keras.utils.img_to_array(img)
    img_array = tf.expand_dims(img_array, axis=0) # Create a batch
    img_array = tf.keras.applications.resnet50.preprocess_input(img_array)
    return img_array


for file in files:
    image = load(file)
    predictions = model.predict(image)
    score = tf.nn.softmax(predictions[0])
    class_name = class_names[np.argmax(score)];
    print("Image belongs to {} with a {:.2f} percent confidence."
    .format(class_name, 100 * np.max(score)))
    new_directory = PREDICT_DIRECTORY + "/" + class_name

    if not os.path.isdir(new_directory):
        print("Could not find " + new_directory + " creating it now.")
        os.makedirs(new_directory)

    file_name = os.path.basename(file)
    new_path = new_directory + "/" + file_name
    shutil.move(file, new_path)


Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/i97pu0x9mgi91.jpg
Image belongs to None with a 47.54 percent confidence.
Could not find nvme_drive/scraped_files/PREDICTED_20220818-230726-reddit/None creating it now.
Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/96oxqsh8ndi91.jpg
Image belongs to None with a 47.54 percent confidence.
Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/z107mvzegci91.jpg
Image belongs to None with a 47.54 percent confidence.
Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/ihmrrv5jcdi91.jpg
Image belongs to None with a 47.54 percent confidence.
Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/elvlenzm8ei91.jpg
Image belongs to None with a 47.54 percent confidence.
Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/s8s5fsicjei91.jpg
Image belongs to None with a 47.54 percent confidence.
Loading nvme_drive/scraped_files/20220818-230726-reddit/reddit/2pdkd8t5qfi91.jpg
Image be