In [1]:
!pip install praw
!pip install psutil

Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting prawcore<3,>=2.4 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading prawcore-2.4.0-py3-none-any.whl (17 kB)
Installing collected packages: prawcore, praw
Successfully installed praw-7.8.1 prawcore-2.4.0


## Imports

In [2]:
import praw
import os
import requests
import pandas as pd
import time
import json
from io import BytesIO
from PIL import Image
from pathlib import Path
from praw.models import MoreComments
from huggingface_hub import HfApi, HfFolder
from datasets import Dataset, Features, Value, Array3D, Sequence
from datasets import Image as DsImage
import numpy as np
from tqdm.notebook import tqdm
import psutil

## Configuration

In [None]:
REDDIT_CLIENT_ID = ""
REDDIT_CLIENT_SECRET = ""
REDDIT_USER_AGENT = "reddit-roomstyle-scraper:v1.0 (by /u/pawmyer)"
SUBREDDIT = "malelivingspace"
POST_LIMIT = 10_000
SLEEP = 1.2
CHECKPOINT_EVERY = 100
SAVE_PATH = "./temp/reddit_malelivingspace_scraped.parquet"
HF_REPO_ID = "partzel/reddit-malelivingspace-scraped"
HF_TOKEN = ""

In [4]:
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT
)

In [5]:
Path("temp").mkdir(exist_ok=True)
seen_ids = set()
data = []

## Scraping

Checkpointing

In [6]:
MAX_SIZE = (512, 512)

def fetch_image_bitmap(url):
    try:
        resp = requests.get(url, stream=True, timeout=5)
        resp.raise_for_status()
        img = Image.open(BytesIO(resp.content)).convert("RGB")
        img.thumbnail(MAX_SIZE)
        return img
    except Exception as e:
        print(f"[IMAGE ERROR] {url}: {e}")
        return None

In [None]:
counter = 0

batch_size = 20
batch_id = 0
batch = []
for submission in tqdm(reddit.subreddit(SUBREDDIT).new(limit=POST_LIMIT)):
    # Monitor RAM usage for Kernel problems
    print(f"RAM used: {psutil.virtual_memory().percent}%")
    
    time.sleep(SLEEP)
    if submission.id in seen_ids:
        continue

    # Image URLs
    image_urls = []
    if submission.url.endswith(('.jpg', '.jpeg', '.png')):
        image_urls = [submission.url]
    elif getattr(submission, 'is_gallery', False):
        try:
            for item in submission.media_metadata.values():
                url = item['s']['u'].replace("&amp;", "&")
                image_urls.append(url)
        except Exception:
            pass

    if not image_urls:
        continue

    # Download images
    images = [fetch_image_bitmap(url) for url in image_urls]
    images = [np.array(img).tolist() for img in images if img is not None]

    if not images:
        continue

    # Get top comments
    try:
        submission.comments.replace_more(limit=0)
        top_comments = [c.body for c in submission.comments[:5] if isinstance(c, praw.models.Comment)]
    except Exception as e:
        print(f"[COMMENT FAIL] {submission.id}: {e}")
        top_comments = []

    batch.append({
        "post_id": submission.id,
        "title": submission.title,
        "top_comments": top_comments,
        "images": images  # list of RGB arrays
    })

    seen_ids.add(submission.id)
    counter += 1

    # Checkpoint to Parquet
    features = Features({
        "post_id": Value("string"),
        "title": Value("string"),
        "top_comments": Sequence(Value("string")),
        "images": Sequence(DsImage())
    })
    
    if len(batch) == batch_size:
        ds = Dataset.from_list(batch, features=features)
        
        # Push as a new split or with a custom config
        ds.push_to_hub(HF_REPO_ID, token=HF_TOKEN, split=f"batch_{batch_id}")
        print(f"Pushed batch {batch_id}")
        
        batch.clear()
        batch_id += 1


0it [00:00, ?it/s]

RAM used: 3.4%
RAM used: 3.4%
RAM used: 3.7%
RAM used: 3.9%
RAM used: 3.9%
RAM used: 4.0%
RAM used: 4.1%
RAM used: 4.3%




RAM used: 4.4%
RAM used: 5.1%
RAM used: 5.5%
RAM used: 5.7%
RAM used: 5.8%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 6.0%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.8%
RAM used: 7.9%




Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/393 [00:00<?, ?B/s]

Pushed batch 0
RAM used: 4.7%
RAM used: 4.4%
RAM used: 4.4%
RAM used: 4.4%
RAM used: 4.4%
RAM used: 4.5%
RAM used: 4.5%
RAM used: 4.5%
RAM used: 5.2%
RAM used: 5.2%
RAM used: 5.6%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 6.0%
RAM used: 6.2%
RAM used: 6.6%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.3%
RAM used: 7.6%
RAM used: 7.8%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/504 [00:00<?, ?B/s]

Pushed batch 1
RAM used: 5.0%
RAM used: 4.5%
RAM used: 4.8%
RAM used: 5.0%
RAM used: 5.0%
RAM used: 5.3%
RAM used: 5.6%
RAM used: 6.0%
RAM used: 6.1%
RAM used: 6.4%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 6.9%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.5%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 7.7%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/613 [00:00<?, ?B/s]

Pushed batch 2
RAM used: 5.1%
RAM used: 4.9%
RAM used: 4.8%
RAM used: 4.9%
RAM used: 5.1%
RAM used: 5.4%
RAM used: 5.7%
RAM used: 5.8%
RAM used: 6.1%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.5%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.5%
RAM used: 7.5%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/722 [00:00<?, ?B/s]

Pushed batch 3
RAM used: 5.4%
RAM used: 5.0%
RAM used: 4.9%
RAM used: 5.0%
RAM used: 5.1%
RAM used: 5.1%
RAM used: 5.1%
RAM used: 5.4%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 6.0%
RAM used: 6.1%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.7%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/831 [00:00<?, ?B/s]

Pushed batch 4
RAM used: 5.3%
RAM used: 5.0%
RAM used: 5.0%
RAM used: 5.0%
RAM used: 5.0%
RAM used: 5.1%
RAM used: 5.3%
RAM used: 5.4%
RAM used: 5.6%
RAM used: 5.7%
RAM used: 6.3%
RAM used: 6.7%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 8.0%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/940 [00:00<?, ?B/s]

Pushed batch 5
RAM used: 5.4%
RAM used: 5.1%
RAM used: 5.3%
RAM used: 5.1%
RAM used: 5.1%
RAM used: 5.1%
RAM used: 5.4%
RAM used: 5.4%
RAM used: 5.6%
RAM used: 5.7%
RAM used: 6.2%
RAM used: 6.4%
RAM used: 6.6%
RAM used: 6.8%
RAM used: 7.2%
RAM used: 7.5%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 8.1%
RAM used: 8.1%
RAM used: 8.5%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

Pushed batch 6
RAM used: 6.1%
RAM used: 5.3%
RAM used: 5.3%
RAM used: 5.3%
RAM used: 5.3%
RAM used: 5.3%
RAM used: 5.2%
RAM used: 5.3%
RAM used: 5.3%
RAM used: 5.5%
RAM used: 5.5%
RAM used: 5.9%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.5%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.4%
RAM used: 7.7%
RAM used: 7.8%
RAM used: 7.9%
RAM used: 9.1%
RAM used: 9.3%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.16k [00:00<?, ?B/s]

Pushed batch 7
RAM used: 5.8%
RAM used: 5.7%
RAM used: 5.6%
RAM used: 5.5%
RAM used: 5.6%
RAM used: 5.6%
RAM used: 5.6%
RAM used: 5.6%
RAM used: 5.5%
RAM used: 5.6%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 6.2%
RAM used: 6.4%
RAM used: 6.4%
RAM used: 6.7%
RAM used: 7.0%
RAM used: 7.2%
RAM used: 7.5%
RAM used: 7.8%
RAM used: 7.9%
RAM used: 8.1%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

Pushed batch 8
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.7%
RAM used: 5.6%
RAM used: 5.6%
RAM used: 5.6%
RAM used: 5.5%
RAM used: 5.7%
RAM used: 5.7%
RAM used: 5.5%
RAM used: 5.7%
RAM used: 5.7%
RAM used: 5.7%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.4%
RAM used: 6.7%
RAM used: 6.8%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.3%
RAM used: 7.7%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

Pushed batch 9
RAM used: 6.1%
RAM used: 5.9%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 6.0%
RAM used: 5.9%
RAM used: 6.2%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.6%
RAM used: 6.5%
RAM used: 6.9%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.7%
RAM used: 7.8%
RAM used: 8.0%
RAM used: 8.1%
RAM used: 8.1%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

Pushed batch 10
RAM used: 6.0%
RAM used: 6.0%
RAM used: 5.7%
RAM used: 5.7%
RAM used: 5.8%
RAM used: 5.6%
RAM used: 5.7%
RAM used: 5.8%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.5%
RAM used: 6.6%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.8%
RAM used: 7.9%
RAM used: 7.9%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.60k [00:00<?, ?B/s]

Pushed batch 11
RAM used: 5.9%
RAM used: 5.8%
RAM used: 5.7%
RAM used: 5.8%
RAM used: 5.7%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 6.2%
RAM used: 6.4%
RAM used: 6.5%
RAM used: 6.7%
RAM used: 6.9%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.71k [00:00<?, ?B/s]

Pushed batch 12
RAM used: 6.1%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.8%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 5.8%
RAM used: 6.0%
RAM used: 6.0%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.4%
RAM used: 6.4%
RAM used: 6.7%
RAM used: 6.9%
RAM used: 6.9%
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.6%
RAM used: 7.9%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.82k [00:00<?, ?B/s]

Pushed batch 13
RAM used: 6.1%
RAM used: 6.1%
RAM used: 6.0%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 6.0%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 6.0%
RAM used: 6.1%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.5%
RAM used: 6.7%
RAM used: 6.9%
RAM used: 7.4%
RAM used: 7.6%
RAM used: 7.7%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/1.94k [00:00<?, ?B/s]

Pushed batch 14
RAM used: 6.1%
RAM used: 6.0%
RAM used: 6.0%
RAM used: 5.8%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 5.9%
RAM used: 6.2%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.4%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 8.1%
RAM used: 8.2%
RAM used: 8.2%
RAM used: 8.2%
RAM used: 8.2%
RAM used: 8.4%
RAM used: 8.5%
RAM used: 8.8%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.05k [00:00<?, ?B/s]

Pushed batch 15
RAM used: 6.7%
RAM used: 6.1%
RAM used: 6.1%
RAM used: 6.0%
RAM used: 6.0%
RAM used: 6.0%
RAM used: 6.1%
RAM used: 6.0%
RAM used: 6.0%
RAM used: 6.1%
RAM used: 6.0%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.6%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 7.9%
RAM used: 8.0%
RAM used: 8.0%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

Pushed batch 16
RAM used: 6.7%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.4%
RAM used: 6.6%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.1%
RAM used: 7.4%
RAM used: 7.5%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

Pushed batch 17
RAM used: 6.5%
RAM used: 6.3%
RAM used: 6.4%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.2%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.7%
RAM used: 7.0%
RAM used: 7.3%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.7%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.38k [00:00<?, ?B/s]

Pushed batch 18
RAM used: 6.5%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.4%
RAM used: 6.4%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.9%
RAM used: 6.8%
RAM used: 7.1%
RAM used: 7.5%
RAM used: 7.7%
RAM used: 7.8%
RAM used: 7.8%
RAM used: 7.9%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.50k [00:00<?, ?B/s]

Pushed batch 19
RAM used: 6.7%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.2%
RAM used: 6.3%
RAM used: 6.3%
RAM used: 6.4%
RAM used: 6.6%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 7.3%
RAM used: 7.7%
RAM used: 8.2%
RAM used: 8.3%
RAM used: 8.3%
RAM used: 8.5%
RAM used: 9.2%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.61k [00:00<?, ?B/s]

Pushed batch 20
RAM used: 6.7%
RAM used: 6.6%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.6%
RAM used: 6.4%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.0%
RAM used: 7.4%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 8.2%
RAM used: 8.6%
RAM used: 8.9%
RAM used: 9.0%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.72k [00:00<?, ?B/s]

Pushed batch 21
RAM used: 7.0%
RAM used: 6.7%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 6.9%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.4%
RAM used: 7.7%
RAM used: 7.9%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.83k [00:00<?, ?B/s]

Pushed batch 22
RAM used: 7.7%
RAM used: 6.6%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.5%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.8%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.3%
RAM used: 7.2%
RAM used: 7.3%
RAM used: 7.8%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/2.94k [00:00<?, ?B/s]

Pushed batch 23
RAM used: 6.9%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.6%
RAM used: 6.7%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.7%
RAM used: 7.8%
RAM used: 7.9%
RAM used: 8.3%
RAM used: 8.6%
RAM used: 8.9%
RAM used: 8.9%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.06k [00:00<?, ?B/s]

Pushed batch 24
RAM used: 7.2%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.7%
RAM used: 6.6%
RAM used: 6.8%
RAM used: 6.9%
RAM used: 7.3%
RAM used: 7.1%
RAM used: 7.4%
RAM used: 7.8%
RAM used: 7.9%
RAM used: 7.9%
RAM used: 8.0%
RAM used: 8.0%
RAM used: 8.2%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.17k [00:00<?, ?B/s]

Pushed batch 25
RAM used: 7.3%
RAM used: 7.3%
RAM used: 6.9%
RAM used: 6.9%
RAM used: 6.9%
RAM used: 6.9%
RAM used: 6.9%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.7%
[IMAGE ERROR] https://preview.redd.it/32utzsf1cnue1.png?width=4032&format=png&auto=webp&s=dbe112e42e6f0b22361673b20db03beb8a1fa585: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 6.9%
RAM used: 6.8%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.28k [00:00<?, ?B/s]

Pushed batch 26
RAM used: 7.1%
RAM used: 7.0%
RAM used: 6.9%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 6.7%
RAM used: 6.8%
RAM used: 6.7%
RAM used: 6.8%
RAM used: 6.8%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 8.0%
RAM used: 8.0%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.39k [00:00<?, ?B/s]

Pushed batch 27
RAM used: 7.0%
RAM used: 6.8%
RAM used: 6.7%
RAM used: 6.9%
RAM used: 6.9%
RAM used: 7.0%
RAM used: 7.5%
RAM used: 7.9%
RAM used: 7.9%
RAM used: 7.9%
RAM used: 8.2%
RAM used: 8.3%
RAM used: 8.3%
RAM used: 8.3%
RAM used: 8.4%
RAM used: 8.6%
RAM used: 9.4%
RAM used: 9.5%
RAM used: 9.6%
RAM used: 9.9%
RAM used: 10.4%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.50k [00:00<?, ?B/s]

Pushed batch 28
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.4%
RAM used: 7.6%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.62k [00:00<?, ?B/s]

Pushed batch 29
RAM used: 7.1%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.0%
RAM used: 7.0%
RAM used: 7.1%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.7%
RAM used: 8.3%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.73k [00:00<?, ?B/s]

Pushed batch 30
RAM used: 7.8%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.4%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.3%
RAM used: 7.8%
RAM used: 8.0%
RAM used: 8.2%
RAM used: 8.8%
[IMAGE ERROR] https://preview.redd.it/vyh8o467s2ue1.jpg?width=5712&format=pjpg&auto=webp&s=2bb839a023cd23e435bcbfa4d1afe26c1e10ce5e: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 8.8%
RAM used: 9.1%
RAM used: 9.2%
RAM used: 9.8%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.84k [00:00<?, ?B/s]

Pushed batch 31
RAM used: 7.6%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.2%
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.3%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.4%
RAM used: 7.4%
[IMAGE ERROR] https://preview.redd.it/qckxl75xrxte1.jpg?width=5472&format=pjpg&auto=webp&s=00b8151809f066e52c041aadea0fd98f8d2ea979: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
[IMAGE ERROR] https://preview.redd.it/l46e565xrxte1.jpg?width=5472&format=pjpg&auto=webp&s=61f5eb43b189d4a7d941186376a4e5d97d2f06e5: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 7.4%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 8.1%
RAM used: 8.5%
RAM used: 8.3%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.95k [00:00<?, ?B/s]

Pushed batch 32
RAM used: 7.4%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.1%
RAM used: 7.2%
RAM used: 7.2%
RAM used: 7.4%
RAM used: 7.5%
[IMAGE ERROR] https://preview.redd.it/e6ifevduqpte1.jpg?width=5712&format=pjpg&auto=webp&s=1e5046edd446da7db01f7c0967433664601dc6f1: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 7.8%
RAM used: 8.1%
RAM used: 8.2%
RAM used: 8.8%
RAM used: 9.6%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.06k [00:00<?, ?B/s]

Pushed batch 33
RAM used: 8.0%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.6%
RAM used: 7.8%
RAM used: 7.8%
[IMAGE ERROR] https://preview.redd.it/79s1u0e6knte1.jpg?width=4284&format=pjpg&auto=webp&s=275f9393ec7831dea5716700c4978ae3a9f3e3f7: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
[IMAGE ERROR] https://preview.redd.it/hghmx0e6knte1.jpg?width=4284&format=pjpg&auto=webp&s=b730e51325864046dcb17159780fa0be0aba1ad1: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 8.0%
RAM used: 8.3%
RAM used: 8.5%
RAM used: 8.7%
RAM used: 9.0%
RAM used: 9.1%
RAM used: 9.1%
RAM used: 9.3%
RAM used: 9.3%
RAM used: 9.3%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.17k [00:00<?, ?B/s]

Pushed batch 34
RAM used: 7.6%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.3%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.8%
RAM used: 8.1%
RAM used: 8.2%
[IMAGE ERROR] https://preview.redd.it/8w3xy2qo2ete1.png?width=4916&format=png&auto=webp&s=ebd87ee211ba916e3b4b45c8aa855e750e7d7529: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
[IMAGE ERROR] https://preview.redd.it/wdxy7w7o2ete1.png?width=4916&format=png&auto=webp&s=819de897b69708de0f26a2326e8216f08e841546: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 8.2%
RAM used: 8.3%
RAM used: 8.4%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.29k [00:00<?, ?B/s]

Pushed batch 35
RAM used: 7.8%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
[IMAGE ERROR] https://preview.redd.it/h7726gw3rate1.jpg?width=4595&format=pjpg&auto=webp&s=215e5af0887666b3eb7711148c97f17d0cbb376e: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 7.3%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.3%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 8.0%
RAM used: 8.2%
RAM used: 8.2%
RAM used: 8.5%
RAM used: 8.5%
RAM used: 8.6%
RAM used: 8.8%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.40k [00:00<?, ?B/s]

Pushed batch 36
RAM used: 8.1%
RAM used: 7.7%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 8.0%
RAM used: 8.2%
RAM used: 8.2%
RAM used: 8.5%
RAM used: 8.6%
RAM used: 8.6%
RAM used: 8.6%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.51k [00:00<?, ?B/s]

Pushed batch 37
RAM used: 8.0%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.8%
RAM used: 7.9%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.62k [00:00<?, ?B/s]

Pushed batch 38
RAM used: 7.8%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.7%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.5%
[IMAGE ERROR] https://preview.redd.it/7xg9ebejrqse1.jpg?width=4284&format=pjpg&auto=webp&s=80ccf277a20b658ec054d15f8dc9f8f5ef385640: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
RAM used: 7.6%
RAM used: 7.8%
RAM used: 7.8%
RAM used: 8.3%
RAM used: 8.3%
RAM used: 8.6%
RAM used: 8.6%
RAM used: 8.8%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.74k [00:00<?, ?B/s]

Pushed batch 39
RAM used: 7.9%
RAM used: 7.8%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.8%
RAM used: 7.8%
RAM used: 7.7%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 8.0%
RAM used: 7.9%
RAM used: 8.2%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.85k [00:00<?, ?B/s]

Pushed batch 40
RAM used: 7.8%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.4%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
[IMAGE ERROR] https://preview.redd.it/txbvwnjyfise1.jpg?width=8000&format=pjpg&auto=webp&s=c9ea3b2a441bfa70e244c176a43da2bd1c6a560b: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
[IMAGE ERROR] https://preview.redd.it/j4nxaijyfise1.jpg?width=8000&format=pjpg&auto=webp&s=5b0f1772d74e573707c1a0c6c5c6b01436d40e43: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
[IMAGE ERROR] https://preview.redd.it/8c38ptjyfise1.jpg?width=8000&format=pjpg&auto=webp&s=b585d27550de257b0767cb2044139543d4e56cab: HTTPSConnectionPool(host='preview.redd.it', port=443): Read timed out. (read timeout=5)
[IMAGE ERROR] https://preview.redd.it/0sl8gzjyfise1.jpg?width=6000&format=pjpg&auto=webp&s=e30ed242970b23fb20e0df7d1615cc7e9ef

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/4.96k [00:00<?, ?B/s]

Pushed batch 41
RAM used: 7.9%
RAM used: 7.9%
RAM used: 7.8%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 7.7%
RAM used: 7.6%
RAM used: 7.7%
RAM used: 7.6%
RAM used: 7.5%
RAM used: 7.6%
RAM used: 8.0%
RAM used: 8.2%


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Map:   0%|          | 0/20 [00:00<?, ? examples/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/5.07k [00:00<?, ?B/s]

Pushed batch 42
RAM used: 7.9%
RAM used: 7.6%
