In [6]:
import json
import requests
import tensorflow as tf
import numpy as np 
import cv2
from urllib.request import urlopen
import matplotlib.pyplot as plt
from PIL import Image
import os

In [2]:
# Simple parse of the 'games.json' file.

# returns array of a column
def get(collumn):
    url = "https://huggingface.co/datasets/FronkonGames/steam-games-dataset/resolve/main/games.json?download=true"
    response = requests.get(url)
    dataset = json.loads(response.text)

    data = []

    for app in dataset:
        data.append(dataset[app][collumn])

    return data

In [None]:
img_width = 460
img_height = 215

images = []
est_owners = []

for image, players in zip(get('header_image'), get('estimated_owners')):
    images.append(image)
    est_owners.append(players)
    print(image + ": " + players)

Expected output:

https://cdn.akamai.steamstatic.com/steam/apps/20200/header.jpg?t=1640121033: 0 - 20000
https://cdn.akamai.steamstatic.com/steam/apps/655370/header.jpg?t=1617500526: 0 - 20000
https://cdn.akamai.steamstatic.com/steam/apps/1732930/header.jpg?t=1637149386: 0 - 20000
https://cdn.akamai.steamstatic.com/steam/apps/1355720/header.jpg?t=1639875115: 0 - 20000
https://cdn.akamai.steamstatic.com/steam/apps/1139950/header.jpg?t=1595003825: 0 - 20000
https://cdn.akamai.steamstatic.com/steam/apps/1469160/header.jpg?t=1617078164: 50000 - 100000

...
etc

In [None]:
dataset = tf.data.Dataset.from_tensor_slices(images)

def get(url):
    with urlopen(str(url.numpy().decode("utf-8"))) as request:
        img_array = np.asarray(bytearray(request.read()), dtype=np.uint8)
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def read_image_from_url(url):
    return tf.py_function(get, [url], tf.uint8)


dataset_images = dataset.map(lambda x: read_image_from_url(x))

for d in dataset_images:
  print(d)


In [None]:
def download_image(url, folder):

    # Create folder if it doesn't exist
    try:
        os.mkdir("dataset/" + folder)
    except FileExistsError:
        pass

    # Send GET request
    response = requests.get(url)

    # Save the image
    if response.status_code == 200:
        with open("dataset/" + folder + "/" + str(i) + ".jpg", "wb") as f:
            f.write(response.content)
    else:
        print(response.status_code)

i = 0
for (image, catogories) in zip(images, est_owners):
    download_image(image, catogories)
    print(i)
    i += 1

In [None]:
train_data = tf.keras.utils.image_dataset_from_directory(
    "dataset", 
    validation_split = 0.3,
    subset = 'training',
    seed = 123,
    image_size = (img_height, img_width),
    batch_size = 32
)
test_data = tf.keras.utils.image_dataset_from_directory(
    'dataset', 
    validation_split = 0.3,
    subset = 'validation',
    seed = 123,
    image_size = (img_height, img_width),
    batch_size = 32
)

class_name = train_data.class_names
print(class_name)

In [None]:
for image_batch, label_batch in train_data:
    print(image_batch.shape)
    print(label_batch.shape)
    break

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(13)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

In [None]:
epochs = 10

x = model.fit(train_data, validation_data=test_data, epochs=epochs)

Epoch 1/10
285/285 [==============================] - 150s 521ms/step - loss: 1.2986 - accuracy: 0.6707 - val_loss: 1.2551 - val_accuracy: 0.6703

Epoch 2/10
285/285 [==============================] - 143s 502ms/step - loss: 1.2055 - accuracy: 0.6726 - val_loss: 1.2788 - val_accuracy: 0.6678

Epoch 3/10
 52/285 [====>.........................] - ETA: 1:46 - loss: 1.0531 - accuracy: 0.6965

 Computer got to hot, had to stop lol.