In [18]:
# from dotenv import load_dotenv
import os

from google.colab import userdata
# HF_TOKEN = userdata.get('HF_TOKEN')
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

# load_dotenv()
# huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
from google.colab import drive
import os
import zipfile
from tqdm import tqdm
# from dotenv import load_dotenv
from openai import OpenAI
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

In [11]:
# ========== Mount Google Drive ==========
drive.mount('/content/drive')

# ========== Path Configuration ==========
# Update these paths according to your Google Drive structure
DRIVE_BASE = '/content/drive/MyDrive/'
ZIP_PATH = "./data/elmundo_chunked_es_page1_40years.zip"
EXTRACT_DIR = '/tmp/extracted'  # Using tmp for faster I/O
OUTPUT_DIR = os.path.join(DRIVE_BASE, 'cleaned_articles1')

# Create directories
os.makedirs(EXTRACT_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
ZIP_PATH = "./data/elmundo_chunked_es_page1_40years.zip"
EXTRACT_DIR = '/tmp/extracted'  # Using tmp for faster I/O
os.makedirs(EXTRACT_DIR, exist_ok=True)

In [6]:
# ========== File Extraction ==========
def extract_files():
    with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
        # Extract nested structure
        for file in zip_ref.namelist():
            if file.endswith('.txt'):
                zip_ref.extract(file, EXTRACT_DIR)
    print("*" * 50)
    print(f"Extracted files to: {EXTRACT_DIR}")

1. Extract zip file
2. open the folder
3. For each file in folder,
    read the content and extract the text
    <!-- chunk the text into 1000 words -->
    <!-- pass chunks to the model so it can fix the spelling -->
    translate the corrected text to english
    <!-- add the file name and the corrected text to a dictionary -->
    save the corrected text to a new file
4. Save the dictionary to a pkl file
5.


In [19]:
from openai import OpenAI
client = OpenAI(
    api_key=OPENAI_API_KEY
)

def correct_with_openai(text, filename, just_text = True, max_completion_tokens = 2048, temperature = 1, top_p = 1, frequency_penalty=0, presence_penalty=0,**kwargs):
  response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": f"Eres un experto en documentos históricos de Puerto Rico. El texto en español son noticias del siglo XX y contiene muchos errores a causa del OCR. Descifra el contenido y tradúcelo al inglés:\n1. Preserva nombres propios (ej: Mayagüez, Caguas)\n2. Ignora el \"header\" (ej:\n```EL MUNDO\nPRONOSTICOS DEL TIEMPO PARA LA ISLA, HOY: Mayormente nublado, con aguaceros dispersos temprano en la mafiana. EN SAN JUAN. AYER: Temperatura máxima. 80; mínima, 77. Presión barométrica al nivel del mar, a las 4:80 de la tarde. 38.88 pulgadas de mercurio. No hay indicios de disturbio tropical.\n40 páginas 5/\nDIARIO DE LA MARANA\nAÑO XXVIII\nEntered aa second clsss matter, Post Office, San Juan, P. R.)```\n3. Ignora los anuncios\n4. Solo mantén contenido relacionado a Puerto Rico (especialmente sobre ciudades, locaciones o eventos históricos)\n5. Traduce el texto a inglés. Solo mantén los datos mas importantes\n6.  Lista las ciudades o locaciones de Puerto Rico mencionadas\n7. Escribe solo en texto (no uses **negrillas** ni *itálicas* ni nada en markdown)\n8. return it as a JSON object with two fields:\n    - 'metadata': un diccionario con la siguiente informacion: 'filename' (nombre del articulo), 'date' (fecha del articulo), 'locations' (lista de las ciudades o locaciones de Puerto Rico mencionadas).\n    - 'text': the corrected and summarized text in English.\n8. No digas nada mas ni preguntes más. El nombre del articulo es {filename}. Usa el siguiente texto: {text}"
          }
        ]
      }
    ],
    response_format={
      "type": "json_object"
    },
    temperature=temperature,
    max_completion_tokens=max_completion_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
    presence_penalty=presence_penalty,
    **kwargs
  )
  if just_text:
    return response.choices[0].message.content

  return response

In [8]:
from datetime import datetime
import pickle as pkl

def save_progress(data, filename="all_docs.pkl"):
    """ Save the current state of data to Google Drive. """
    save_path = os.path.join(OUTPUT_DIR, filename)

    with open(save_path, 'wb') as f:
        pkl.dump(data, f)

    print(f"Progress saved at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} to {save_path}")

In [9]:
PROGRESS_FILE = os.path.join(OUTPUT_DIR, "processed_files.log")

def get_processed_files():
    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, 'r') as f:
            return set(f.read().splitlines())
    return set()

def update_progress(filename):
    with open(PROGRESS_FILE, 'a') as f:
        f.write(f"{filename}\n")

In [26]:
# ========== Processing Pipeline ==========
import json
import pickle as pkl
from langchain.docstore.document import Document
import time

# Save progress every 15 minutes
interval_minutes = 15

def process_files():
    extract_files()

    all_docs = [] # for storing all the documents

    # Track when the last save occurred
    last_save_time = time.time()
    processed = get_processed_files()

    # Get all text files from nested directory
    base_dir = os.path.join(EXTRACT_DIR, "elmundo_chunked_es_page1_40years")
    txt_files = [f for f in os.listdir(base_dir) if f.endswith('.txt')]

    for filename in tqdm(txt_files, desc="Processing files"):

        if filename in processed:
            # Skip already processed files
            continue

        input_path = os.path.join(base_dir, filename)
        output_path = os.path.join(OUTPUT_DIR, f"cleaned_{filename}")

        with open(input_path, 'r', encoding='utf-8', errors='ignore') as f: # open current text file
            raw_text = f.read()

        try:
            # gets gpt-4o-mini JSON object with 'metadata' and 'text' fields:
            json_object = json.loads(correct_with_openai(raw_text, filename))  # OpenAI version

            cleaned_text = json_object['text']  # get the text from the gpt-4o-mini model

            with open(output_path, 'w', encoding='utf-8') as f: # save text on google drive
                f.write(cleaned_text)

            print(f"Processed: {filename} -> Saved to Drive")

            doc = Document(                           # convert text to a langchain text object (for use on Chroma later)
                page_content=json_object['text'],
                metadata=json_object['metadata']
            )
            all_docs.append(doc)                      # append docs to list

            # Update the processed log
            update_progress(filename)

        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
            continue

        current_time = time.time()
        if (current_time - last_save_time) >= (interval_minutes * 60):
            save_progress(all_docs)
            last_save_time = current_time  # Update the last save time

    # Save all_docs as pkl file
    with open(os.path.join(OUTPUT_DIR, "all_docs.pkl"), 'wb') as f:
        pkl.dump(all_docs, f)

    with open("all_docs.pkl", 'wb') as f:
        pkl.dump(all_docs, f)

    return all_docs

In [27]:
all_docs = process_files()

**************************************************
Extracted files to: /tmp/extracted


Processing files:   0%|          | 1/1668 [00:06<2:55:24,  6.31s/it]

Processed: 19220527_1.txt -> Saved to Drive


Processing files:   0%|          | 2/1668 [00:12<2:59:48,  6.48s/it]

Processed: 19470118_1.txt -> Saved to Drive


Processing files:   0%|          | 3/1668 [00:16<2:29:31,  5.39s/it]

Processed: 19291228_1.txt -> Saved to Drive


Processing files:   0%|          | 4/1668 [00:22<2:27:25,  5.32s/it]

Processed: 19300705_1.txt -> Saved to Drive


Processing files:   0%|          | 5/1668 [00:26<2:16:25,  4.92s/it]

Processed: 19440722_1.txt -> Saved to Drive


Processing files:   0%|          | 6/1668 [00:31<2:18:04,  4.98s/it]

Processed: 19211119_1.txt -> Saved to Drive


Processing files:   0%|          | 7/1668 [00:59<5:42:01, 12.35s/it]

Processed: 19250808_1.txt -> Saved to Drive


Processing files:   0%|          | 8/1668 [01:05<4:52:51, 10.59s/it]

Processed: 19450616_1.txt -> Saved to Drive


Processing files:   1%|          | 9/1668 [01:10<4:03:06,  8.79s/it]

Processed: 19210305_1.txt -> Saved to Drive


Processing files:   1%|          | 10/1668 [01:16<3:37:51,  7.88s/it]

Processed: 19501111_1.txt -> Saved to Drive


Processing files:   1%|          | 11/1668 [01:23<3:32:58,  7.71s/it]

Processed: 19430821_1.txt -> Saved to Drive


Processing files:   1%|          | 12/1668 [01:29<3:15:22,  7.08s/it]

Processed: 19321008_1.txt -> Saved to Drive


Processing files:   1%|          | 13/1668 [01:34<2:59:46,  6.52s/it]

Processed: 19321203_1.txt -> Saved to Drive


Processing files:   1%|          | 14/1668 [01:42<3:08:01,  6.82s/it]

Processed: 19480717_1.txt -> Saved to Drive


Processing files:   1%|          | 15/1668 [01:49<3:14:54,  7.07s/it]

Processed: 19310912_1.txt -> Saved to Drive


Processing files:   1%|          | 16/1668 [01:54<2:53:40,  6.31s/it]

Processed: 19320430_1.txt -> Saved to Drive


Processing files:   1%|          | 17/1668 [02:00<2:50:40,  6.20s/it]

Processed: 19210910_1.txt -> Saved to Drive


Processing files:   1%|          | 18/1668 [02:05<2:37:21,  5.72s/it]

Processed: 19320813_1.txt -> Saved to Drive


Processing files:   1%|          | 19/1668 [02:15<3:14:53,  7.09s/it]

Processed: 19351221_1.txt -> Saved to Drive


Processing files:   1%|          | 20/1668 [02:22<3:16:10,  7.14s/it]

Processed: 19431120_1.txt -> Saved to Drive


Processing files:   1%|▏         | 21/1668 [02:27<2:55:59,  6.41s/it]

Processed: 19340929_1.txt -> Saved to Drive


Processing files:   1%|▏         | 22/1668 [02:32<2:45:47,  6.04s/it]

Processed: 19450922_1.txt -> Saved to Drive


Processing files:   1%|▏         | 23/1668 [02:36<2:32:58,  5.58s/it]

Processed: 19231222_1.txt -> Saved to Drive


Processing files:   1%|▏         | 24/1668 [02:42<2:33:11,  5.59s/it]

Processed: 19511215_1.txt -> Saved to Drive


Processing files:   1%|▏         | 25/1668 [02:48<2:39:29,  5.82s/it]

Processed: 19410503_1.txt -> Saved to Drive


Processing files:   2%|▏         | 26/1668 [02:55<2:45:31,  6.05s/it]

Processed: 19410315_1.txt -> Saved to Drive


Processing files:   2%|▏         | 27/1668 [03:00<2:35:47,  5.70s/it]

Processed: 19450317_1.txt -> Saved to Drive


Processing files:   2%|▏         | 28/1668 [03:06<2:41:28,  5.91s/it]

Processed: 19240913_1.txt -> Saved to Drive


Processing files:   2%|▏         | 29/1668 [03:14<2:52:14,  6.31s/it]

Processed: 19410118_1.txt -> Saved to Drive


Processing files:   2%|▏         | 30/1668 [03:22<3:10:35,  6.98s/it]

Processed: 19440129_1.txt -> Saved to Drive


Processing files:   2%|▏         | 31/1668 [03:29<3:13:03,  7.08s/it]

Processed: 19410607_1.txt -> Saved to Drive


Processing files:   2%|▏         | 32/1668 [03:35<2:59:47,  6.59s/it]

Processed: 19450929_1.txt -> Saved to Drive


Processing files:   2%|▏         | 33/1668 [03:42<3:00:29,  6.62s/it]

Processed: 19200306_1.txt -> Saved to Drive


Processing files:   2%|▏         | 34/1668 [03:45<2:36:34,  5.75s/it]

Processed: 19420103_1.txt -> Saved to Drive


Processing files:   2%|▏         | 35/1668 [03:53<2:51:01,  6.28s/it]

Processed: 19460518_1.txt -> Saved to Drive


Processing files:   2%|▏         | 36/1668 [03:58<2:43:28,  6.01s/it]

Processed: 19430109_1.txt -> Saved to Drive


Processing files:   2%|▏         | 37/1668 [04:04<2:39:21,  5.86s/it]

Processed: 19341208_1.txt -> Saved to Drive


Processing files:   2%|▏         | 38/1668 [04:09<2:36:03,  5.74s/it]

Processed: 19340203_1.txt -> Saved to Drive


Processing files:   2%|▏         | 39/1668 [04:14<2:31:59,  5.60s/it]

Processed: 19400622_1.txt -> Saved to Drive


Processing files:   2%|▏         | 40/1668 [04:20<2:28:32,  5.47s/it]

Processed: 19301101_1.txt -> Saved to Drive


Processing files:   2%|▏         | 41/1668 [04:27<2:40:38,  5.92s/it]

Processed: 19500902_1.txt -> Saved to Drive


Processing files:   3%|▎         | 42/1668 [04:37<3:21:21,  7.43s/it]

Processed: 19440923_1.txt -> Saved to Drive


Processing files:   3%|▎         | 43/1668 [04:44<3:12:13,  7.10s/it]

Processed: 19220318_1.txt -> Saved to Drive


Processing files:   3%|▎         | 44/1668 [04:49<2:56:54,  6.54s/it]

Processed: 19400309_1.txt -> Saved to Drive


Processing files:   3%|▎         | 45/1668 [04:54<2:40:11,  5.92s/it]

Processed: 19320213_1.txt -> Saved to Drive


Processing files:   3%|▎         | 46/1668 [05:02<2:58:12,  6.59s/it]

Processed: 19510714_1.txt -> Saved to Drive


Processing files:   3%|▎         | 47/1668 [05:10<3:12:31,  7.13s/it]

Processed: 19311024_1.txt -> Saved to Drive


Processing files:   3%|▎         | 48/1668 [05:20<3:36:21,  8.01s/it]

Processed: 19460921_1.txt -> Saved to Drive


Processing files:   3%|▎         | 49/1668 [05:26<3:22:00,  7.49s/it]

Processed: 19380910_1.txt -> Saved to Drive


Processing files:   3%|▎         | 50/1668 [05:35<3:33:11,  7.91s/it]

Processed: 19420509_1.txt -> Saved to Drive


Processing files:   3%|▎         | 51/1668 [05:42<3:24:15,  7.58s/it]

Processed: 19321126_1.txt -> Saved to Drive


Processing files:   3%|▎         | 52/1668 [05:52<3:43:51,  8.31s/it]

Processed: 19470301_1.txt -> Saved to Drive


Processing files:   3%|▎         | 53/1668 [06:01<3:50:46,  8.57s/it]

Processed: 19450901_1.txt -> Saved to Drive


Processing files:   3%|▎         | 54/1668 [06:04<3:06:38,  6.94s/it]

Processed: 19280609_1.txt -> Saved to Drive


Processing files:   3%|▎         | 55/1668 [06:10<2:53:35,  6.46s/it]

Processed: 19270723_1.txt -> Saved to Drive


Processing files:   3%|▎         | 56/1668 [06:14<2:35:43,  5.80s/it]

Processed: 19240816_1.txt -> Saved to Drive


Processing files:   3%|▎         | 57/1668 [06:18<2:22:57,  5.32s/it]

Processed: 19490305_1.txt -> Saved to Drive


Processing files:   3%|▎         | 58/1668 [06:23<2:20:59,  5.25s/it]

Processed: 19420829_1.txt -> Saved to Drive


Processing files:   4%|▎         | 59/1668 [06:29<2:26:48,  5.47s/it]

Processed: 19361024_1.txt -> Saved to Drive


Processing files:   4%|▎         | 60/1668 [06:35<2:29:39,  5.58s/it]

Processed: 19471206_1.txt -> Saved to Drive


Processing files:   4%|▎         | 61/1668 [06:42<2:36:01,  5.83s/it]

Processed: 19520517_1.txt -> Saved to Drive


Processing files:   4%|▎         | 62/1668 [06:49<2:49:29,  6.33s/it]

Processed: 19470524_1.txt -> Saved to Drive


Processing files:   4%|▍         | 63/1668 [06:53<2:33:15,  5.73s/it]

Processed: 19321015_1.txt -> Saved to Drive


Processing files:   4%|▍         | 64/1668 [07:07<3:39:15,  8.20s/it]

Processed: 19440916_1.txt -> Saved to Drive


Processing files:   4%|▍         | 65/1668 [07:17<3:48:46,  8.56s/it]

Processed: 19411122_1.txt -> Saved to Drive


Processing files:   4%|▍         | 66/1668 [07:21<3:14:59,  7.30s/it]

Processed: 19520202_1.txt -> Saved to Drive


Processing files:   4%|▍         | 67/1668 [07:29<3:22:18,  7.58s/it]

Processed: 19480605_1.txt -> Saved to Drive


Processing files:   4%|▍         | 68/1668 [07:34<3:01:38,  6.81s/it]

Processed: 19330401_1.txt -> Saved to Drive


Processing files:   4%|▍         | 69/1668 [07:41<2:59:09,  6.72s/it]

Processed: 19301108_1.txt -> Saved to Drive


Processing files:   4%|▍         | 70/1668 [07:45<2:41:57,  6.08s/it]

Processed: 19230602_1.txt -> Saved to Drive


Processing files:   4%|▍         | 71/1668 [07:51<2:35:38,  5.85s/it]

Processed: 19210806_1.txt -> Saved to Drive


Processing files:   4%|▍         | 72/1668 [07:56<2:27:01,  5.53s/it]

Processed: 19281215_1.txt -> Saved to Drive


Processing files:   4%|▍         | 73/1668 [08:02<2:33:16,  5.77s/it]

Processed: 19240531_1.txt -> Saved to Drive


Processing files:   4%|▍         | 74/1668 [08:08<2:35:01,  5.84s/it]

Processed: 19461123_1.txt -> Saved to Drive


Processing files:   4%|▍         | 75/1668 [08:12<2:21:13,  5.32s/it]

Processed: 19261218_1.txt -> Saved to Drive


Processing files:   5%|▍         | 76/1668 [08:17<2:18:21,  5.21s/it]

Processed: 19310207_1.txt -> Saved to Drive


Processing files:   5%|▍         | 77/1668 [08:22<2:16:22,  5.14s/it]

Processed: 19310718_1.txt -> Saved to Drive


Processing files:   5%|▍         | 78/1668 [08:32<2:56:48,  6.67s/it]

Processed: 19461207_1.txt -> Saved to Drive


Processing files:   5%|▍         | 79/1668 [08:37<2:42:17,  6.13s/it]

Processed: 19330527_1.txt -> Saved to Drive


Processing files:   5%|▍         | 80/1668 [08:42<2:35:35,  5.88s/it]

Processed: 19280107_1.txt -> Saved to Drive


Processing files:   5%|▍         | 81/1668 [08:47<2:22:30,  5.39s/it]

Processed: 19431218_1.txt -> Saved to Drive


Processing files:   5%|▍         | 82/1668 [08:58<3:07:37,  7.10s/it]

Processed: 19460525_1.txt -> Saved to Drive


Processing files:   5%|▍         | 83/1668 [09:04<3:04:26,  6.98s/it]

Processed: 19300614_1.txt -> Saved to Drive


Processing files:   5%|▌         | 84/1668 [09:10<2:50:00,  6.44s/it]

Processed: 19301220_1.txt -> Saved to Drive


Processing files:   5%|▌         | 85/1668 [09:14<2:37:48,  5.98s/it]

Processed: 19380129_1.txt -> Saved to Drive


Processing files:   5%|▌         | 86/1668 [09:31<3:58:33,  9.05s/it]

Processed: 19510303_1.txt -> Saved to Drive


Processing files:   5%|▌         | 87/1668 [09:38<3:46:48,  8.61s/it]

Processed: 19201106_1.txt -> Saved to Drive


Processing files:   5%|▌         | 88/1668 [09:43<3:17:14,  7.49s/it]

Processed: 19220422_1.txt -> Saved to Drive


Processing files:   5%|▌         | 89/1668 [09:52<3:24:17,  7.76s/it]

Processed: 19480228_1.txt -> Saved to Drive


Processing files:   5%|▌         | 90/1668 [10:00<3:31:11,  8.03s/it]

Processed: 19380618_1.txt -> Saved to Drive


Processing files:   5%|▌         | 91/1668 [10:09<3:36:19,  8.23s/it]

Processed: 19491217_1.txt -> Saved to Drive


Processing files:   6%|▌         | 92/1668 [10:13<3:05:04,  7.05s/it]

Processed: 19440701_1.txt -> Saved to Drive


Processing files:   6%|▌         | 93/1668 [10:17<2:41:54,  6.17s/it]

Processed: 19281229_1.txt -> Saved to Drive


Processing files:   6%|▌         | 94/1668 [10:22<2:29:23,  5.69s/it]

Processed: 19420815_1.txt -> Saved to Drive


Processing files:   6%|▌         | 95/1668 [10:32<3:01:59,  6.94s/it]

Processed: 19460810_1.txt -> Saved to Drive


Processing files:   6%|▌         | 96/1668 [10:38<2:53:50,  6.63s/it]

Processed: 19420801_1.txt -> Saved to Drive


Processing files:   6%|▌         | 97/1668 [10:48<3:19:41,  7.63s/it]

Processed: 19510505_1.txt -> Saved to Drive


Processing files:   6%|▌         | 98/1668 [10:51<2:49:59,  6.50s/it]

Processed: 19280204_1.txt -> Saved to Drive


Processing files:   6%|▌         | 99/1668 [10:59<3:00:51,  6.92s/it]

Processed: 19401019_1.txt -> Saved to Drive


Processing files:   6%|▌         | 100/1668 [11:04<2:40:21,  6.14s/it]

Processed: 19481016_1.txt -> Saved to Drive


Processing files:   6%|▌         | 101/1668 [11:08<2:29:29,  5.72s/it]

Processed: 19300816_1.txt -> Saved to Drive


Processing files:   6%|▌         | 102/1668 [11:14<2:31:21,  5.80s/it]

Processed: 19391104_1.txt -> Saved to Drive


Processing files:   6%|▌         | 103/1668 [11:20<2:27:08,  5.64s/it]

Processed: 19270514_1.txt -> Saved to Drive


Processing files:   6%|▌         | 104/1668 [11:28<2:44:52,  6.33s/it]

Processed: 19200207_1.txt -> Saved to Drive


Processing files:   6%|▋         | 105/1668 [11:36<2:58:38,  6.86s/it]

Processed: 19460309_1.txt -> Saved to Drive


Processing files:   6%|▋         | 106/1668 [11:46<3:26:43,  7.94s/it]

Processed: 19511013_1.txt -> Saved to Drive


Processing files:   6%|▋         | 107/1668 [11:54<3:28:41,  8.02s/it]

Processed: 19350420_1.txt -> Saved to Drive


Processing files:   6%|▋         | 108/1668 [11:58<2:58:12,  6.85s/it]

Processed: 19220812_1.txt -> Saved to Drive


Processing files:   7%|▋         | 109/1668 [12:02<2:35:08,  5.97s/it]

Processed: 19360502_1.txt -> Saved to Drive


Processing files:   7%|▋         | 110/1668 [12:07<2:25:07,  5.59s/it]

Processed: 19380430_1.txt -> Saved to Drive


Processing files:   7%|▋         | 111/1668 [12:15<2:44:58,  6.36s/it]

Processed: 19310613_1.txt -> Saved to Drive


Processing files:   7%|▋         | 112/1668 [12:26<3:15:49,  7.55s/it]

Processed: 19400518_1.txt -> Saved to Drive


Processing files:   7%|▋         | 113/1668 [12:48<5:12:57, 12.08s/it]

Processed: 19470426_1.txt -> Saved to Drive


Processing files:   7%|▋         | 114/1668 [12:53<4:19:00, 10.00s/it]

Processed: 19380409_1.txt -> Saved to Drive


Processing files:   7%|▋         | 115/1668 [13:00<3:52:06,  8.97s/it]

Processed: 19340609_1.txt -> Saved to Drive


Processing files:   7%|▋         | 116/1668 [13:05<3:25:06,  7.93s/it]

Processed: 19290209_1.txt -> Saved to Drive


Processing files:   7%|▋         | 117/1668 [13:10<2:58:44,  6.91s/it]

Processed: 19330819_1.txt -> Saved to Drive


Processing files:   7%|▋         | 118/1668 [13:19<3:14:34,  7.53s/it]

Processed: 19450113_1.txt -> Saved to Drive


Processing files:   7%|▋         | 119/1668 [13:26<3:09:10,  7.33s/it]

Processed: 19230106_1.txt -> Saved to Drive


Processing files:   7%|▋         | 120/1668 [13:31<2:53:00,  6.71s/it]

Processed: 19380212_1.txt -> Saved to Drive


Processing files:   7%|▋         | 121/1668 [13:38<2:57:34,  6.89s/it]

Processed: 19420307_1.txt -> Saved to Drive


Processing files:   7%|▋         | 122/1668 [13:43<2:41:54,  6.28s/it]

Processed: 19410719_1.txt -> Saved to Drive


Processing files:   7%|▋         | 123/1668 [13:49<2:34:19,  5.99s/it]

Processed: 19401102_1.txt -> Saved to Drive


Processing files:   7%|▋         | 124/1668 [14:07<4:13:31,  9.85s/it]

Processed: 19280811_1.txt -> Saved to Drive


Processing files:   7%|▋         | 125/1668 [14:15<3:57:45,  9.25s/it]

Processed: 19370501_1.txt -> Saved to Drive


Processing files:   8%|▊         | 126/1668 [14:22<3:39:41,  8.55s/it]

Processed: 19231013_1.txt -> Saved to Drive


Processing files:   8%|▊         | 127/1668 [14:26<3:05:30,  7.22s/it]

Processed: 19361226_1.txt -> Saved to Drive


Processing files:   8%|▊         | 128/1668 [14:31<2:47:33,  6.53s/it]

Processed: 19370925_1.txt -> Saved to Drive


Processing files:   8%|▊         | 129/1668 [14:37<2:39:46,  6.23s/it]

Processed: 19270326_1.txt -> Saved to Drive


Processing files:   8%|▊         | 130/1668 [14:43<2:36:19,  6.10s/it]

Processed: 19451020_1.txt -> Saved to Drive


Processing files:   8%|▊         | 131/1668 [14:53<3:13:15,  7.54s/it]

Processed: 19231027_1.txt -> Saved to Drive


Processing files:   8%|▊         | 132/1668 [15:01<3:11:49,  7.49s/it]

Processed: 19320910_1.txt -> Saved to Drive
Progress saved at 2025-02-11 17:56:07 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:   8%|▊         | 133/1668 [15:06<2:54:10,  6.81s/it]

Processed: 19490625_1.txt -> Saved to Drive


Processing files:   8%|▊         | 134/1668 [15:18<3:31:52,  8.29s/it]

Processed: 19220408_1.txt -> Saved to Drive


Processing files:   8%|▊         | 135/1668 [15:32<4:18:29, 10.12s/it]

Processed: 19320625_1.txt -> Saved to Drive


Processing files:   8%|▊         | 136/1668 [15:39<3:54:29,  9.18s/it]

Processed: 19360718_1.txt -> Saved to Drive


Processing files:   8%|▊         | 137/1668 [15:43<3:11:55,  7.52s/it]

Processed: 19430206_1.txt -> Saved to Drive


Processing files:   8%|▊         | 138/1668 [15:51<3:14:44,  7.64s/it]

Processed: 19480403_1.txt -> Saved to Drive


Processing files:   8%|▊         | 139/1668 [16:14<5:13:03, 12.28s/it]

Processed: 19511229_1.txt -> Saved to Drive


Processing files:   8%|▊         | 140/1668 [16:24<4:55:10, 11.59s/it]

Processed: 19451103_1.txt -> Saved to Drive


Processing files:   8%|▊         | 141/1668 [16:43<5:55:40, 13.98s/it]

Processed: 19470503_1.txt -> Saved to Drive


Processing files:   9%|▊         | 142/1668 [16:50<4:59:20, 11.77s/it]

Processed: 19231215_1.txt -> Saved to Drive


Processing files:   9%|▊         | 143/1668 [16:57<4:21:47, 10.30s/it]

Processed: 19340407_1.txt -> Saved to Drive


Processing files:   9%|▊         | 144/1668 [17:03<3:52:21,  9.15s/it]

Processed: 19410726_1.txt -> Saved to Drive


Processing files:   9%|▊         | 145/1668 [17:08<3:20:15,  7.89s/it]

Processed: 19480214_1.txt -> Saved to Drive


Processing files:   9%|▉         | 146/1668 [17:14<3:03:03,  7.22s/it]

Processed: 19340922_1.txt -> Saved to Drive


Processing files:   9%|▉         | 147/1668 [17:29<4:02:16,  9.56s/it]

Processed: 19461012_1.txt -> Saved to Drive


Processing files:   9%|▉         | 148/1668 [17:35<3:36:46,  8.56s/it]

Processed: 19430828_1.txt -> Saved to Drive


Processing files:   9%|▉         | 149/1668 [17:39<3:04:08,  7.27s/it]

Processed: 19480724_1.txt -> Saved to Drive


Processing files:   9%|▉         | 150/1668 [17:45<2:49:01,  6.68s/it]

Processed: 19411115_1.txt -> Saved to Drive


Processing files:   9%|▉         | 151/1668 [17:48<2:25:32,  5.76s/it]

Processed: 19220715_1.txt -> Saved to Drive


Processing files:   9%|▉         | 152/1668 [17:54<2:21:20,  5.59s/it]

Processed: 19210402_1.txt -> Saved to Drive


Processing files:   9%|▉         | 153/1668 [17:59<2:20:29,  5.56s/it]

Processed: 19220722_1.txt -> Saved to Drive


Processing files:   9%|▉         | 154/1668 [18:08<2:44:38,  6.52s/it]

Processed: 19360222_1.txt -> Saved to Drive


Processing files:   9%|▉         | 155/1668 [18:14<2:41:11,  6.39s/it]

Processed: 19250418_1.txt -> Saved to Drive


Processing files:   9%|▉         | 156/1668 [18:26<3:23:37,  8.08s/it]

Processed: 19510519_1.txt -> Saved to Drive


Processing files:   9%|▉         | 157/1668 [20:04<14:44:27, 35.12s/it]

Processed: 19450609_1.txt -> Saved to Drive


Processing files:   9%|▉         | 158/1668 [20:09<10:52:10, 25.91s/it]

Processed: 19381126_1.txt -> Saved to Drive


Processing files:  10%|▉         | 159/1668 [20:13<8:08:44, 19.43s/it] 

Processed: 19320611_1.txt -> Saved to Drive


Processing files:  10%|▉         | 160/1668 [20:19<6:26:10, 15.36s/it]

Processed: 19330218_1.txt -> Saved to Drive


Processing files:  10%|▉         | 161/1668 [20:23<5:02:07, 12.03s/it]

Processed: 19401214_1.txt -> Saved to Drive


Processing files:  10%|▉         | 162/1668 [20:30<4:24:15, 10.53s/it]

Processed: 19260109_1.txt -> Saved to Drive


Processing files:  10%|▉         | 163/1668 [20:36<3:51:12,  9.22s/it]

Processed: 19490430_1.txt -> Saved to Drive


Processing files:  10%|▉         | 164/1668 [20:47<4:02:24,  9.67s/it]

Processed: 19200228_1.txt -> Saved to Drive


Processing files:  10%|▉         | 165/1668 [21:11<5:48:35, 13.92s/it]

Processed: 19500506_1.txt -> Saved to Drive


Processing files:  10%|▉         | 166/1668 [21:14<4:30:23, 10.80s/it]

Processed: 19350511_1.txt -> Saved to Drive


Processing files:  10%|█         | 167/1668 [21:20<3:49:55,  9.19s/it]

Processed: 19380806_1.txt -> Saved to Drive


Processing files:  10%|█         | 168/1668 [21:26<3:29:44,  8.39s/it]

Processed: 19230804_1.txt -> Saved to Drive


Processing files:  10%|█         | 169/1668 [21:31<3:00:04,  7.21s/it]

Processed: 19351005_1.txt -> Saved to Drive


Processing files:  10%|█         | 170/1668 [21:36<2:45:49,  6.64s/it]

Processed: 19330708_1.txt -> Saved to Drive


Processing files:  10%|█         | 171/1668 [21:42<2:41:14,  6.46s/it]

Processed: 19240412_1.txt -> Saved to Drive


Processing files:  10%|█         | 172/1668 [21:47<2:33:27,  6.15s/it]

Processed: 19300510_1.txt -> Saved to Drive


Processing files:  10%|█         | 173/1668 [21:52<2:25:06,  5.82s/it]

Processed: 19500114_1.txt -> Saved to Drive


Processing files:  10%|█         | 174/1668 [21:57<2:14:57,  5.42s/it]

Processed: 19401123_1.txt -> Saved to Drive


Processing files:  10%|█         | 175/1668 [22:01<2:04:30,  5.00s/it]

Processed: 19230414_1.txt -> Saved to Drive


Processing files:  11%|█         | 176/1668 [22:13<2:55:09,  7.04s/it]

Processed: 19450106_1.txt -> Saved to Drive


Processing files:  11%|█         | 177/1668 [22:18<2:38:46,  6.39s/it]

Processed: 19271210_1.txt -> Saved to Drive


Processing files:  11%|█         | 178/1668 [22:36<4:05:11,  9.87s/it]

Processed: 19371225_1.txt -> Saved to Drive


Processing files:  11%|█         | 179/1668 [22:41<3:31:22,  8.52s/it]

Processed: 19481218_1.txt -> Saved to Drive


Processing files:  11%|█         | 180/1668 [22:47<3:12:56,  7.78s/it]

Processed: 19211029_1.txt -> Saved to Drive


Processing files:  11%|█         | 181/1668 [22:53<3:00:55,  7.30s/it]

Processed: 19330701_1.txt -> Saved to Drive


Processing files:  11%|█         | 182/1668 [22:58<2:43:13,  6.59s/it]

Processed: 19391209_1.txt -> Saved to Drive


Processing files:  11%|█         | 183/1668 [23:04<2:40:45,  6.50s/it]

Processed: 19311128_1.txt -> Saved to Drive


Processing files:  11%|█         | 184/1668 [23:10<2:35:28,  6.29s/it]

Processed: 19491105_1.txt -> Saved to Drive


Processing files:  11%|█         | 185/1668 [23:18<2:42:37,  6.58s/it]

Processed: 19211231_1.txt -> Saved to Drive


Processing files:  11%|█         | 186/1668 [23:22<2:25:32,  5.89s/it]

Processed: 19450526_1.txt -> Saved to Drive


Processing files:  11%|█         | 187/1668 [23:27<2:20:35,  5.70s/it]

Processed: 19321022_1.txt -> Saved to Drive


Processing files:  11%|█▏        | 188/1668 [23:30<2:00:42,  4.89s/it]

Processed: 19361212_1.txt -> Saved to Drive


Processing files:  11%|█▏        | 189/1668 [23:40<2:36:47,  6.36s/it]

Processed: 19240119_1.txt -> Saved to Drive


Processing files:  11%|█▏        | 190/1668 [23:45<2:28:19,  6.02s/it]

Processed: 19200807_1.txt -> Saved to Drive


Processing files:  11%|█▏        | 191/1668 [23:50<2:20:47,  5.72s/it]

Processed: 19230113_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 192/1668 [24:01<2:55:28,  7.13s/it]

Processed: 19491203_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 193/1668 [24:09<3:05:26,  7.54s/it]

Processed: 19240223_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 194/1668 [24:16<2:57:19,  7.22s/it]

Processed: 19510113_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 195/1668 [24:20<2:38:57,  6.48s/it]

Processed: 19220325_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 196/1668 [24:25<2:22:46,  5.82s/it]

Processed: 19370403_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 197/1668 [24:34<2:47:02,  6.81s/it]

Processed: 19471004_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 198/1668 [24:39<2:38:34,  6.47s/it]

Processed: 19350831_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 199/1668 [24:43<2:16:19,  5.57s/it]

Processed: 19320423_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 200/1668 [24:49<2:20:14,  5.73s/it]

Processed: 19380514_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 201/1668 [24:53<2:07:22,  5.21s/it]

Processed: 19470705_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 202/1668 [24:59<2:12:23,  5.42s/it]

Processed: 19330422_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 203/1668 [25:04<2:13:06,  5.45s/it]

Processed: 19210129_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 204/1668 [25:09<2:07:12,  5.21s/it]

Processed: 19281117_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 205/1668 [26:46<13:21:20, 32.86s/it]

Processed: 19230407_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 206/1668 [26:51<9:57:11, 24.51s/it] 

Processed: 19241025_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 207/1668 [26:55<7:23:15, 18.20s/it]

Processed: 19391007_1.txt -> Saved to Drive


Processing files:  12%|█▏        | 208/1668 [27:02<6:00:54, 14.83s/it]

Processed: 19471129_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 209/1668 [27:06<4:44:49, 11.71s/it]

Processed: 19310314_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 210/1668 [27:12<4:00:04,  9.88s/it]

Processed: 19470308_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 211/1668 [27:18<3:31:22,  8.70s/it]

Processed: 19421219_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 212/1668 [28:55<14:11:29, 35.09s/it]

Processed: 19350608_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 213/1668 [28:58<10:18:33, 25.51s/it]

Processed: 19280114_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 214/1668 [29:04<8:01:31, 19.87s/it] 

Processed: 19320416_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 215/1668 [29:11<6:24:07, 15.86s/it]

Processed: 19300201_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 216/1668 [29:18<5:23:24, 13.36s/it]

Processed: 19490618_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 217/1668 [29:23<4:19:14, 10.72s/it]

Processed: 19381210_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 218/1668 [29:31<4:02:15, 10.02s/it]

Processed: 19490917_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 219/1668 [29:36<3:24:10,  8.45s/it]

Processed: 19420124_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 220/1668 [29:46<3:33:15,  8.84s/it]

Processed: 19201016_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 221/1668 [29:51<3:04:59,  7.67s/it]

Processed: 19210820_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 222/1668 [29:56<2:48:53,  7.01s/it]

Processed: 19340519_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 223/1668 [30:00<2:23:30,  5.96s/it]

Processed: 19400727_1.txt -> Saved to Drive


Processing files:  13%|█▎        | 224/1668 [30:05<2:17:51,  5.73s/it]

Processed: 19450630_1.txt -> Saved to Drive
Progress saved at 2025-02-11 18:11:11 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  13%|█▎        | 225/1668 [30:10<2:15:35,  5.64s/it]

Processed: 19260206_1.txt -> Saved to Drive


Processing files:  14%|█▎        | 226/1668 [30:15<2:05:44,  5.23s/it]

Processed: 19501216_1.txt -> Saved to Drive


Processing files:  14%|█▎        | 227/1668 [30:19<1:58:21,  4.93s/it]

Processed: 19200904_1.txt -> Saved to Drive


Processing files:  14%|█▎        | 228/1668 [30:23<1:51:16,  4.64s/it]

Processed: 19400113_1.txt -> Saved to Drive


Processing files:  14%|█▎        | 229/1668 [30:28<1:51:04,  4.63s/it]

Processed: 19200821_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 230/1668 [30:33<1:55:01,  4.80s/it]

Processed: 19440708_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 231/1668 [30:38<2:01:56,  5.09s/it]

Processed: 19431030_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 232/1668 [30:43<2:01:01,  5.06s/it]

Processed: 19510915_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 233/1668 [30:49<2:07:07,  5.31s/it]

Processed: 19210723_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 234/1668 [30:55<2:07:36,  5.34s/it]

Processed: 19380730_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 235/1668 [30:59<2:02:48,  5.14s/it]

Processed: 19200529_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 236/1668 [31:05<2:05:18,  5.25s/it]

Processed: 19490326_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 237/1668 [31:17<2:57:00,  7.42s/it]

Processed: 19210423_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 238/1668 [31:24<2:48:30,  7.07s/it]

Processed: 19260116_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 239/1668 [31:27<2:24:49,  6.08s/it]

Processed: 19321224_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 240/1668 [31:34<2:29:10,  6.27s/it]

Processed: 19450505_1.txt -> Saved to Drive


Processing files:  14%|█▍        | 241/1668 [31:40<2:23:49,  6.05s/it]

Processed: 19371204_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 242/1668 [31:48<2:36:40,  6.59s/it]

Processed: 19420822_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 243/1668 [31:52<2:21:27,  5.96s/it]

Processed: 19310711_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 244/1668 [31:59<2:25:42,  6.14s/it]

Processed: 19361205_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 245/1668 [32:04<2:22:50,  6.02s/it]

Processed: 19411206_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 246/1668 [32:09<2:15:42,  5.73s/it]

Processed: 19241129_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 247/1668 [32:18<2:35:56,  6.58s/it]

Processed: 19350209_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 248/1668 [32:23<2:24:56,  6.12s/it]

Processed: 19301206_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 249/1668 [32:37<3:17:39,  8.36s/it]

Processed: 19230825_1.txt -> Saved to Drive


Processing files:  15%|█▍        | 250/1668 [32:41<2:50:54,  7.23s/it]

Processed: 19290720_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 251/1668 [32:46<2:36:06,  6.61s/it]

Processed: 19450303_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 252/1668 [32:54<2:42:56,  6.90s/it]

Processed: 19270709_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 253/1668 [32:59<2:28:04,  6.28s/it]

Processed: 19450324_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 254/1668 [33:08<2:51:34,  7.28s/it]

Processed: 19251107_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 255/1668 [33:16<2:53:12,  7.36s/it]

Processed: 19270528_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 256/1668 [33:23<2:48:34,  7.16s/it]

Processed: 19330204_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 257/1668 [33:32<3:04:11,  7.83s/it]

Processed: 19200221_1.txt -> Saved to Drive


Processing files:  15%|█▌        | 258/1668 [33:39<2:56:46,  7.52s/it]

Processed: 19370123_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 259/1668 [33:43<2:32:58,  6.51s/it]

Processed: 19360314_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 260/1668 [33:50<2:39:20,  6.79s/it]

Processed: 19440909_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 261/1668 [33:54<2:16:44,  5.83s/it]

Processed: 19381022_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 262/1668 [33:58<2:04:42,  5.32s/it]

Processed: 19510331_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 263/1668 [34:03<2:03:56,  5.29s/it]

Processed: 19200417_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 264/1668 [34:10<2:13:32,  5.71s/it]

Processed: 19451006_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 265/1668 [34:15<2:05:15,  5.36s/it]

Processed: 19321119_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 266/1668 [34:20<2:06:46,  5.43s/it]

Processed: 19200327_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 267/1668 [34:25<2:00:38,  5.17s/it]

Processed: 19460330_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 268/1668 [34:32<2:16:11,  5.84s/it]

Processed: 19310228_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 269/1668 [34:39<2:21:24,  6.06s/it]

Processed: 19500617_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 270/1668 [34:43<2:10:45,  5.61s/it]

Processed: 19331223_1.txt -> Saved to Drive


Processing files:  16%|█▌        | 271/1668 [34:48<2:05:52,  5.41s/it]

Processed: 19260814_1.txt -> Saved to Drive


Processing files:  16%|█▋        | 272/1668 [35:17<4:48:30, 12.40s/it]

Processed: 19460914_1.txt -> Saved to Drive


Processing files:  16%|█▋        | 273/1668 [35:22<4:00:36, 10.35s/it]

Processed: 19411004_1.txt -> Saved to Drive


Processing files:  16%|█▋        | 274/1668 [35:27<3:22:13,  8.70s/it]

Processed: 19340825_1.txt -> Saved to Drive


Processing files:  16%|█▋        | 275/1668 [35:31<2:45:17,  7.12s/it]

Processed: 19240322_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 276/1668 [35:35<2:28:21,  6.39s/it]

Processed: 19330805_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 277/1668 [35:43<2:33:31,  6.62s/it]

Processed: 19291026_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 278/1668 [35:51<2:43:31,  7.06s/it]

Processed: 19370612_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 279/1668 [35:58<2:44:56,  7.13s/it]

Processed: 19440422_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 280/1668 [36:04<2:38:12,  6.84s/it]

Processed: 19390204_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 281/1668 [36:09<2:23:33,  6.21s/it]

Processed: 19361114_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 282/1668 [36:15<2:19:26,  6.04s/it]

Processed: 19270625_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 283/1668 [36:36<4:07:52, 10.74s/it]

Processed: 19201002_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 284/1668 [36:41<3:28:18,  9.03s/it]

Processed: 19280519_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 285/1668 [36:47<3:04:36,  8.01s/it]

Processed: 19380604_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 286/1668 [36:51<2:38:49,  6.90s/it]

Processed: 19271022_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 287/1668 [36:58<2:34:46,  6.72s/it]

Processed: 19300726_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 288/1668 [37:05<2:37:02,  6.83s/it]

Processed: 19390318_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 289/1668 [37:12<2:37:57,  6.87s/it]

Processed: 19230303_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 290/1668 [37:17<2:25:41,  6.34s/it]

Processed: 19290413_1.txt -> Saved to Drive


Processing files:  17%|█▋        | 291/1668 [37:21<2:14:47,  5.87s/it]

Processed: 19501007_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 292/1668 [37:28<2:19:53,  6.10s/it]

Processed: 19260508_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 293/1668 [37:34<2:16:58,  5.98s/it]

Processed: 19350413_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 294/1668 [37:41<2:25:52,  6.37s/it]

Processed: 19401221_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 295/1668 [37:46<2:12:39,  5.80s/it]

Processed: 19380115_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 296/1668 [37:50<2:01:02,  5.29s/it]

Processed: 19241220_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 297/1668 [37:55<2:03:13,  5.39s/it]

Processed: 19460831_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 298/1668 [37:59<1:53:11,  4.96s/it]

Processed: 19310905_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 299/1668 [38:05<1:56:06,  5.09s/it]

Processed: 19350706_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 300/1668 [38:15<2:31:45,  6.66s/it]

Processed: 19510811_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 301/1668 [38:36<4:09:18, 10.94s/it]

Processed: 19460803_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 302/1668 [38:44<3:52:09, 10.20s/it]

Processed: 19520524_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 303/1668 [38:58<4:12:50, 11.11s/it]

Processed: 19430320_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 304/1668 [39:03<3:37:04,  9.55s/it]

Processed: 19291109_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 305/1668 [39:13<3:34:24,  9.44s/it]

Processed: 19341006_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 306/1668 [39:19<3:15:09,  8.60s/it]

Processed: 19250228_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 307/1668 [39:27<3:08:35,  8.31s/it]

Processed: 19491126_1.txt -> Saved to Drive


Processing files:  18%|█▊        | 308/1668 [39:32<2:46:22,  7.34s/it]

Processed: 19270319_1.txt -> Saved to Drive


Processing files:  19%|█▊        | 309/1668 [39:38<2:38:22,  6.99s/it]

Processed: 19370220_1.txt -> Saved to Drive


Processing files:  19%|█▊        | 310/1668 [39:55<3:42:23,  9.83s/it]

Processed: 19470104_1.txt -> Saved to Drive


Processing files:  19%|█▊        | 311/1668 [40:00<3:12:38,  8.52s/it]

Processed: 19220909_1.txt -> Saved to Drive


Processing files:  19%|█▊        | 312/1668 [40:08<3:05:46,  8.22s/it]

Processed: 19421205_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 313/1668 [40:12<2:40:11,  7.09s/it]

Processed: 19280414_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 314/1668 [40:17<2:27:30,  6.54s/it]

Processed: 19411220_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 315/1668 [40:46<4:56:02, 13.13s/it]

Processed: 19450210_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 316/1668 [40:51<4:01:04, 10.70s/it]

Processed: 19351109_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 317/1668 [40:56<3:21:22,  8.94s/it]

Processed: 19381224_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 318/1668 [41:05<3:21:13,  8.94s/it]

Processed: 19300809_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 319/1668 [41:09<2:49:23,  7.53s/it]

Processed: 19401130_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 320/1668 [41:14<2:32:47,  6.80s/it]

Processed: 19291214_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 321/1668 [41:21<2:36:08,  6.95s/it]

Processed: 19370918_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 322/1668 [41:31<2:57:20,  7.91s/it]

Processed: 19240906_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 323/1668 [41:36<2:31:49,  6.77s/it]

Processed: 19490730_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 324/1668 [41:41<2:20:54,  6.29s/it]

Processed: 19301213_1.txt -> Saved to Drive


Processing files:  19%|█▉        | 325/1668 [41:51<2:47:10,  7.47s/it]

Processed: 19330225_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 326/1668 [42:03<3:16:59,  8.81s/it]

Processed: 19210618_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 327/1668 [42:29<5:13:02, 14.01s/it]

Processed: 19420919_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 328/1668 [42:33<4:04:30, 10.95s/it]

Processed: 19411018_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 329/1668 [42:37<3:19:22,  8.93s/it]

Processed: 19430508_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 330/1668 [42:42<2:49:57,  7.62s/it]

Processed: 19341013_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 331/1668 [43:03<4:19:14, 11.63s/it]

Processed: 19420321_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 332/1668 [43:09<3:44:11, 10.07s/it]

Processed: 19490604_1.txt -> Saved to Drive


Processing files:  20%|█▉        | 333/1668 [43:14<3:10:18,  8.55s/it]

Processed: 19400323_1.txt -> Saved to Drive


Processing files:  20%|██        | 334/1668 [43:22<3:03:54,  8.27s/it]

Processed: 19340324_1.txt -> Saved to Drive


Processing files:  20%|██        | 335/1668 [43:28<2:50:25,  7.67s/it]

Processed: 19400406_1.txt -> Saved to Drive


Processing files:  20%|██        | 336/1668 [43:36<2:53:49,  7.83s/it]

Processed: 19320507_1.txt -> Saved to Drive


Processing files:  20%|██        | 337/1668 [43:42<2:38:52,  7.16s/it]

Processed: 19440513_1.txt -> Saved to Drive


Processing files:  20%|██        | 338/1668 [43:49<2:39:43,  7.21s/it]

Processed: 19410329_1.txt -> Saved to Drive


Processing files:  20%|██        | 339/1668 [43:54<2:23:49,  6.49s/it]

Processed: 19350907_1.txt -> Saved to Drive


Processing files:  20%|██        | 340/1668 [43:58<2:10:14,  5.88s/it]

Processed: 19240607_1.txt -> Saved to Drive


Processing files:  20%|██        | 341/1668 [44:09<2:38:56,  7.19s/it]

Processed: 19461026_1.txt -> Saved to Drive


Processing files:  21%|██        | 342/1668 [44:19<2:58:16,  8.07s/it]

Processed: 19401116_1.txt -> Saved to Drive


Processing files:  21%|██        | 343/1668 [44:24<2:42:30,  7.36s/it]

Processed: 19330121_1.txt -> Saved to Drive


Processing files:  21%|██        | 344/1668 [44:30<2:32:28,  6.91s/it]

Processed: 19370515_1.txt -> Saved to Drive


Processing files:  21%|██        | 345/1668 [44:34<2:11:55,  5.98s/it]

Processed: 19280512_1.txt -> Saved to Drive


Processing files:  21%|██        | 346/1668 [44:39<2:07:59,  5.81s/it]

Processed: 19350622_1.txt -> Saved to Drive


Processing files:  21%|██        | 347/1668 [44:47<2:15:58,  6.18s/it]

Processed: 19460119_1.txt -> Saved to Drive


Processing files:  21%|██        | 348/1668 [44:51<2:02:27,  5.57s/it]

Processed: 19360118_1.txt -> Saved to Drive


Processing files:  21%|██        | 349/1668 [44:56<2:00:14,  5.47s/it]

Processed: 19320604_1.txt -> Saved to Drive


Processing files:  21%|██        | 350/1668 [45:01<1:55:31,  5.26s/it]

Processed: 19370102_1.txt -> Saved to Drive


Processing files:  21%|██        | 351/1668 [45:08<2:07:55,  5.83s/it]

Processed: 19410215_1.txt -> Saved to Drive
Progress saved at 2025-02-11 18:26:14 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  21%|██        | 352/1668 [45:12<2:00:06,  5.48s/it]

Processed: 19310815_1.txt -> Saved to Drive


Processing files:  21%|██        | 353/1668 [45:17<1:53:55,  5.20s/it]

Processed: 19341124_1.txt -> Saved to Drive


Processing files:  21%|██        | 354/1668 [45:21<1:47:56,  4.93s/it]

Processed: 19340310_1.txt -> Saved to Drive


Processing files:  21%|██▏       | 355/1668 [45:26<1:43:30,  4.73s/it]

Processed: 19230310_1.txt -> Saved to Drive


Processing files:  21%|██▏       | 356/1668 [45:30<1:41:45,  4.65s/it]

Processed: 19330603_1.txt -> Saved to Drive


Processing files:  21%|██▏       | 357/1668 [45:34<1:34:16,  4.31s/it]

Processed: 19280121_1.txt -> Saved to Drive


Processing files:  21%|██▏       | 358/1668 [45:46<2:30:01,  6.87s/it]

Processed: 19491015_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 359/1668 [45:57<2:52:56,  7.93s/it]

Processed: 19310822_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 360/1668 [46:17<4:14:20, 11.67s/it]

Processed: 19451201_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 361/1668 [46:27<4:00:37, 11.05s/it]

Processed: 19320924_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 362/1668 [46:36<3:45:04, 10.34s/it]

Processed: 19450519_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 363/1668 [46:40<3:07:07,  8.60s/it]

Processed: 19391223_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 364/1668 [46:45<2:40:04,  7.37s/it]

Processed: 19380528_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 365/1668 [46:47<2:10:16,  6.00s/it]

Processed: 19410920_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 366/1668 [46:53<2:08:28,  5.92s/it]

Processed: 19351130_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 367/1668 [46:57<1:57:07,  5.40s/it]

Processed: 19400713_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 368/1668 [47:06<2:16:25,  6.30s/it]

Processed: 19460413_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 369/1668 [47:14<2:31:52,  7.01s/it]

Processed: 19300503_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 370/1668 [47:20<2:21:04,  6.52s/it]

Processed: 19370227_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 371/1668 [47:27<2:24:29,  6.68s/it]

Processed: 19240524_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 372/1668 [47:33<2:21:42,  6.56s/it]

Processed: 19490723_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 373/1668 [47:37<2:07:04,  5.89s/it]

Processed: 19421031_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 374/1668 [47:42<1:58:48,  5.51s/it]

Processed: 19280804_1.txt -> Saved to Drive


Processing files:  22%|██▏       | 375/1668 [47:50<2:14:15,  6.23s/it]

Processed: 19450728_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 376/1668 [47:54<1:59:27,  5.55s/it]

Processed: 19300329_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 377/1668 [47:59<1:59:29,  5.55s/it]

Processed: 19320326_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 378/1668 [48:05<1:56:38,  5.43s/it]

Processed: 19380219_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 379/1668 [48:10<1:54:43,  5.34s/it]

Processed: 19380305_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 380/1668 [48:19<2:18:35,  6.46s/it]

Processed: 19410531_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 381/1668 [48:24<2:10:18,  6.08s/it]

Processed: 19250509_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 382/1668 [48:30<2:08:16,  5.98s/it]

Processed: 19450414_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 383/1668 [48:37<2:14:07,  6.26s/it]

Processed: 19250725_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 384/1668 [48:42<2:06:33,  5.91s/it]

Processed: 19261204_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 385/1668 [48:46<1:56:26,  5.45s/it]

Processed: 19230908_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 386/1668 [48:51<1:52:50,  5.28s/it]

Processed: 19380226_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 387/1668 [48:59<2:10:09,  6.10s/it]

Processed: 19251121_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 388/1668 [49:06<2:17:09,  6.43s/it]

Processed: 19240719_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 389/1668 [49:13<2:18:45,  6.51s/it]

Processed: 19380709_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 390/1668 [49:19<2:18:43,  6.51s/it]

Processed: 19270219_1.txt -> Saved to Drive


Processing files:  23%|██▎       | 391/1668 [49:25<2:10:57,  6.15s/it]

Processed: 19300531_1.txt -> Saved to Drive


Processing files:  24%|██▎       | 392/1668 [49:30<2:04:54,  5.87s/it]

Processed: 19220826_1.txt -> Saved to Drive


Processing files:  24%|██▎       | 393/1668 [49:35<2:02:35,  5.77s/it]

Processed: 19250110_1.txt -> Saved to Drive


Processing files:  24%|██▎       | 394/1668 [49:43<2:14:54,  6.35s/it]

Processed: 19200612_1.txt -> Saved to Drive


Processing files:  24%|██▎       | 395/1668 [49:48<2:07:44,  6.02s/it]

Processed: 19290824_1.txt -> Saved to Drive


Processing files:  24%|██▎       | 396/1668 [49:54<2:06:59,  5.99s/it]

Processed: 19470816_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 397/1668 [50:03<2:26:34,  6.92s/it]

Processed: 19360912_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 398/1668 [50:07<2:07:34,  6.03s/it]

Processed: 19371002_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 399/1668 [50:12<1:55:24,  5.46s/it]

Processed: 19350601_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 400/1668 [50:18<2:01:56,  5.77s/it]

Processed: 19470726_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 401/1668 [50:22<1:48:38,  5.14s/it]

Processed: 19250718_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 402/1668 [50:27<1:47:09,  5.08s/it]

Processed: 19461228_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 403/1668 [50:32<1:50:47,  5.25s/it]

Processed: 19440520_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 404/1668 [50:41<2:13:06,  6.32s/it]

Processed: 19511124_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 405/1668 [50:47<2:11:11,  6.23s/it]

Processed: 19221007_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 406/1668 [50:52<2:03:38,  5.88s/it]

Processed: 19411025_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 407/1668 [50:57<1:54:23,  5.44s/it]

Processed: 19230714_1.txt -> Saved to Drive


Processing files:  24%|██▍       | 408/1668 [51:03<2:01:45,  5.80s/it]

Processed: 19330415_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 409/1668 [51:09<2:00:59,  5.77s/it]

Processed: 19510224_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 410/1668 [51:14<1:56:33,  5.56s/it]

Processed: 19400706_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 411/1668 [51:18<1:49:10,  5.21s/it]

Processed: 19340602_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 412/1668 [51:24<1:52:14,  5.36s/it]

Processed: 19430807_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 413/1668 [51:28<1:44:01,  4.97s/it]

Processed: 19340303_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 414/1668 [51:35<1:52:43,  5.39s/it]

Processed: 19340113_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 415/1668 [51:38<1:41:07,  4.84s/it]

Processed: 19360822_1.txt -> Saved to Drive


Processing files:  25%|██▍       | 416/1668 [51:45<1:53:54,  5.46s/it]

Processed: 19330722_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 417/1668 [51:49<1:46:38,  5.11s/it]

Processed: 19311212_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 418/1668 [51:56<1:59:10,  5.72s/it]

Processed: 19470830_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 419/1668 [52:02<2:00:24,  5.78s/it]

Processed: 19430612_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 420/1668 [52:07<1:52:23,  5.40s/it]

Processed: 19371016_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 421/1668 [52:11<1:42:40,  4.94s/it]

Processed: 19280407_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 422/1668 [52:16<1:43:53,  5.00s/it]

Processed: 19210528_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 423/1668 [52:22<1:48:10,  5.21s/it]

Processed: 19520705_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 424/1668 [52:28<1:55:27,  5.57s/it]

Processed: 19501021_1.txt -> Saved to Drive


Processing files:  25%|██▌       | 425/1668 [52:33<1:50:24,  5.33s/it]

Processed: 19400210_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 426/1668 [52:39<1:56:06,  5.61s/it]

Processed: 19510818_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 427/1668 [52:46<2:05:02,  6.05s/it]

Processed: 19341222_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 428/1668 [53:06<3:28:33, 10.09s/it]

Processed: 19260807_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 429/1668 [53:10<2:53:33,  8.41s/it]

Processed: 19230421_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 430/1668 [53:19<2:53:32,  8.41s/it]

Processed: 19480522_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 431/1668 [53:23<2:27:32,  7.16s/it]

Processed: 19500408_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 432/1668 [53:59<5:24:45, 15.76s/it]

Processed: 19451117_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 433/1668 [54:02<4:10:36, 12.18s/it]

Processed: 19330826_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 434/1668 [54:08<3:30:38, 10.24s/it]

Processed: 19490716_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 435/1668 [54:13<2:59:04,  8.71s/it]

Processed: 19510630_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 436/1668 [54:18<2:34:31,  7.53s/it]

Processed: 19340616_1.txt -> Saved to Drive


Processing files:  26%|██▌       | 437/1668 [54:51<5:10:50, 15.15s/it]

Processed: 19480828_1.txt -> Saved to Drive


Processing files:  26%|██▋       | 438/1668 [54:56<4:05:26, 11.97s/it]

Processed: 19281006_1.txt -> Saved to Drive


Processing files:  26%|██▋       | 439/1668 [55:01<3:24:22,  9.98s/it]

Processed: 19421121_1.txt -> Saved to Drive


Processing files:  26%|██▋       | 440/1668 [55:07<3:00:46,  8.83s/it]

Processed: 19400615_1.txt -> Saved to Drive


Processing files:  26%|██▋       | 441/1668 [55:16<3:01:19,  8.87s/it]

Processed: 19430522_1.txt -> Saved to Drive


Processing files:  26%|██▋       | 442/1668 [55:24<2:57:50,  8.70s/it]

Processed: 19380924_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 443/1668 [55:28<2:26:56,  7.20s/it]

Processed: 19271231_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 444/1668 [55:33<2:13:29,  6.54s/it]

Processed: 19390218_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 445/1668 [55:38<2:01:02,  5.94s/it]

Processed: 19280818_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 446/1668 [56:02<3:52:33, 11.42s/it]

Processed: 19390812_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 447/1668 [56:08<3:23:57, 10.02s/it]

Processed: 19490611_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 448/1668 [56:19<3:27:02, 10.18s/it]

Processed: 19361219_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 449/1668 [56:25<2:59:57,  8.86s/it]

Processed: 19271112_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 450/1668 [56:36<3:12:52,  9.50s/it]

Processed: 19470222_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 451/1668 [56:43<2:56:21,  8.69s/it]

Processed: 19280218_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 452/1668 [56:49<2:42:27,  8.02s/it]

Processed: 19220701_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 453/1668 [56:54<2:21:53,  7.01s/it]

Processed: 19231103_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 454/1668 [56:59<2:13:34,  6.60s/it]

Processed: 19270611_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 455/1668 [57:07<2:21:40,  7.01s/it]

Processed: 19520315_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 456/1668 [57:16<2:33:12,  7.58s/it]

Processed: 19301115_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 457/1668 [57:21<2:16:57,  6.79s/it]

Processed: 19390422_1.txt -> Saved to Drive


Processing files:  27%|██▋       | 458/1668 [57:30<2:28:10,  7.35s/it]

Processed: 19461109_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 459/1668 [57:35<2:17:46,  6.84s/it]

Processed: 19250328_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 460/1668 [57:39<1:58:28,  5.88s/it]

Processed: 19371120_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 461/1668 [57:45<1:56:52,  5.81s/it]

Processed: 19221111_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 462/1668 [57:50<1:55:10,  5.73s/it]

Processed: 19240927_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 463/1668 [57:56<1:53:20,  5.64s/it]

Processed: 19240920_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 464/1668 [58:01<1:49:33,  5.46s/it]

Processed: 19290223_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 465/1668 [58:05<1:42:12,  5.10s/it]

Processed: 19331118_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 466/1668 [58:11<1:47:39,  5.37s/it]

Processed: 19280908_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 467/1668 [58:15<1:36:32,  4.82s/it]

Processed: 19220930_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 468/1668 [58:20<1:40:21,  5.02s/it]

Processed: 19230203_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 469/1668 [58:24<1:32:48,  4.64s/it]

Processed: 19380507_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 470/1668 [58:29<1:35:03,  4.76s/it]

Processed: 19340818_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 471/1668 [58:33<1:32:50,  4.65s/it]

Processed: 19271029_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 472/1668 [58:40<1:44:30,  5.24s/it]

Processed: 19280616_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 473/1668 [58:52<2:22:38,  7.16s/it]

Processed: 19480417_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 474/1668 [59:02<2:41:42,  8.13s/it]

Processed: 19380521_1.txt -> Saved to Drive


Processing files:  28%|██▊       | 475/1668 [59:07<2:24:06,  7.25s/it]

Processed: 19501125_1.txt -> Saved to Drive


Processing files:  29%|██▊       | 476/1668 [59:12<2:08:26,  6.47s/it]

Processed: 19410823_1.txt -> Saved to Drive


Processing files:  29%|██▊       | 477/1668 [59:18<2:09:22,  6.52s/it]

Processed: 19270416_1.txt -> Saved to Drive


Processing files:  29%|██▊       | 478/1668 [59:25<2:08:57,  6.50s/it]

Processed: 19410712_1.txt -> Saved to Drive


Processing files:  29%|██▊       | 479/1668 [59:30<1:58:22,  5.97s/it]

Processed: 19270910_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 480/1668 [59:34<1:47:43,  5.44s/it]

Processed: 19320102_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 481/1668 [59:41<1:56:39,  5.90s/it]

Processed: 19311017_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 482/1668 [59:46<1:54:53,  5.81s/it]

Processed: 19440603_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 483/1668 [59:53<1:56:47,  5.91s/it]

Processed: 19200626_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 484/1668 [1:00:01<2:09:08,  6.54s/it]

Processed: 19230428_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 485/1668 [1:00:06<2:05:22,  6.36s/it]

Processed: 19451208_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 486/1668 [1:00:13<2:03:48,  6.28s/it]

Processed: 19440902_1.txt -> Saved to Drive
Progress saved at 2025-02-11 18:41:19 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  29%|██▉       | 487/1668 [1:00:19<2:03:38,  6.28s/it]

Processed: 19480410_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 488/1668 [1:00:23<1:52:55,  5.74s/it]

Processed: 19271119_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 489/1668 [1:00:28<1:46:54,  5.44s/it]

Processed: 19340210_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 490/1668 [1:00:34<1:51:42,  5.69s/it]

Processed: 19430306_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 491/1668 [1:00:40<1:54:03,  5.81s/it]

Processed: 19201113_1.txt -> Saved to Drive


Processing files:  29%|██▉       | 492/1668 [1:00:45<1:45:59,  5.41s/it]

Processed: 19410419_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 493/1668 [1:00:49<1:37:51,  5.00s/it]

Processed: 19320130_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 494/1668 [1:00:55<1:43:11,  5.27s/it]

Processed: 19460223_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 495/1668 [1:01:00<1:41:20,  5.18s/it]

Processed: 19440212_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 496/1668 [1:01:06<1:44:26,  5.35s/it]

Processed: 19400907_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 497/1668 [1:01:11<1:43:33,  5.31s/it]

Processed: 19301011_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 498/1668 [1:01:17<1:47:01,  5.49s/it]

Processed: 19250905_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 499/1668 [1:01:22<1:43:17,  5.30s/it]

Processed: 19420228_1.txt -> Saved to Drive


Processing files:  30%|██▉       | 500/1668 [1:01:26<1:36:32,  4.96s/it]

Processed: 19400413_1.txt -> Saved to Drive


Processing files:  30%|███       | 501/1668 [1:01:31<1:35:36,  4.92s/it]

Processed: 19420627_1.txt -> Saved to Drive


Processing files:  30%|███       | 502/1668 [1:01:34<1:26:20,  4.44s/it]

Processed: 19290601_1.txt -> Saved to Drive


Processing files:  30%|███       | 503/1668 [1:01:39<1:28:44,  4.57s/it]

Processed: 19350323_1.txt -> Saved to Drive


Processing files:  30%|███       | 504/1668 [1:01:45<1:37:44,  5.04s/it]

Processed: 19331125_1.txt -> Saved to Drive


Processing files:  30%|███       | 505/1668 [1:01:52<1:52:20,  5.80s/it]

Processed: 19460406_1.txt -> Saved to Drive


Processing files:  30%|███       | 506/1668 [1:01:57<1:46:57,  5.52s/it]

Processed: 19271203_1.txt -> Saved to Drive


Processing files:  30%|███       | 507/1668 [1:02:02<1:43:22,  5.34s/it]

Processed: 19430313_1.txt -> Saved to Drive


Processing files:  30%|███       | 508/1668 [1:02:12<2:11:24,  6.80s/it]

Processed: 19240216_1.txt -> Saved to Drive


Processing files:  31%|███       | 509/1668 [1:02:19<2:09:07,  6.68s/it]

Processed: 19490226_1.txt -> Saved to Drive


Processing files:  31%|███       | 510/1668 [1:02:25<2:03:04,  6.38s/it]

Processed: 19330909_1.txt -> Saved to Drive


Processing files:  31%|███       | 511/1668 [1:02:29<1:50:41,  5.74s/it]

Processed: 19290202_1.txt -> Saved to Drive


Processing files:  31%|███       | 512/1668 [1:02:33<1:44:01,  5.40s/it]

Processed: 19480103_1.txt -> Saved to Drive


Processing files:  31%|███       | 513/1668 [1:02:39<1:46:52,  5.55s/it]

Processed: 19390923_1.txt -> Saved to Drive


Processing files:  31%|███       | 514/1668 [1:02:43<1:35:42,  4.98s/it]

Processed: 19371030_1.txt -> Saved to Drive


Processing files:  31%|███       | 515/1668 [1:02:47<1:29:54,  4.68s/it]

Processed: 19420620_1.txt -> Saved to Drive


Processing files:  31%|███       | 516/1668 [1:02:52<1:31:06,  4.74s/it]

Processed: 19350302_1.txt -> Saved to Drive


Processing files:  31%|███       | 517/1668 [1:02:56<1:26:30,  4.51s/it]

Processed: 19330114_1.txt -> Saved to Drive


Processing files:  31%|███       | 518/1668 [1:03:00<1:25:31,  4.46s/it]

Processed: 19440304_1.txt -> Saved to Drive


Processing files:  31%|███       | 519/1668 [1:03:06<1:33:44,  4.89s/it]

Processed: 19380716_1.txt -> Saved to Drive


Processing files:  31%|███       | 520/1668 [1:03:13<1:47:08,  5.60s/it]

Processed: 19500225_1.txt -> Saved to Drive


Processing files:  31%|███       | 521/1668 [1:03:16<1:31:11,  4.77s/it]

Processed: 19381029_1.txt -> Saved to Drive


Processing files:  31%|███▏      | 522/1668 [1:03:23<1:41:06,  5.29s/it]

Processed: 19460323_1.txt -> Saved to Drive


Processing files:  31%|███▏      | 523/1668 [1:03:26<1:31:37,  4.80s/it]

Processed: 19280211_1.txt -> Saved to Drive


Processing files:  31%|███▏      | 524/1668 [1:03:32<1:37:33,  5.12s/it]

Processed: 19351214_1.txt -> Saved to Drive


Processing files:  31%|███▏      | 525/1668 [1:03:36<1:31:48,  4.82s/it]

Processed: 19500218_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 526/1668 [1:03:47<2:06:35,  6.65s/it]

Processed: 19470802_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 527/1668 [1:03:55<2:11:31,  6.92s/it]

Processed: 19200814_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 528/1668 [1:04:01<2:06:27,  6.66s/it]

Processed: 19290330_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 529/1668 [1:04:07<2:06:40,  6.67s/it]

Processed: 19480918_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 530/1668 [1:04:12<1:56:57,  6.17s/it]

Processed: 19440812_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 531/1668 [1:04:18<1:51:20,  5.88s/it]

Processed: 19500603_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 532/1668 [1:04:28<2:17:38,  7.27s/it]

Processed: 19200619_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 533/1668 [1:04:34<2:11:40,  6.96s/it]

Processed: 19491029_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 534/1668 [1:04:40<2:02:51,  6.50s/it]

Processed: 19430529_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 535/1668 [1:04:45<1:55:09,  6.10s/it]

Processed: 19261106_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 536/1668 [1:04:49<1:43:21,  5.48s/it]

Processed: 19410426_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 537/1668 [1:04:52<1:27:06,  4.62s/it]

Processed: 19390114_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 538/1668 [1:04:56<1:25:04,  4.52s/it]

Processed: 19380108_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 539/1668 [1:05:02<1:35:44,  5.09s/it]

Processed: 19220204_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 540/1668 [1:05:08<1:40:42,  5.36s/it]

Processed: 19320702_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 541/1668 [1:05:12<1:32:09,  4.91s/it]

Processed: 19251212_1.txt -> Saved to Drive


Processing files:  32%|███▏      | 542/1668 [1:05:17<1:31:15,  4.86s/it]

Processed: 19430619_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 543/1668 [1:05:22<1:31:57,  4.90s/it]

Processed: 19320806_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 544/1668 [1:05:29<1:42:07,  5.45s/it]

Processed: 19320716_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 545/1668 [1:05:34<1:44:09,  5.56s/it]

Processed: 19381015_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 546/1668 [1:05:42<1:52:18,  6.01s/it]

Processed: 19460316_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 547/1668 [1:05:47<1:51:20,  5.96s/it]

Processed: 19200731_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 548/1668 [1:05:51<1:40:22,  5.38s/it]

Processed: 19420530_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 549/1668 [1:05:57<1:43:04,  5.53s/it]

Processed: 19320402_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 550/1668 [1:06:03<1:45:35,  5.67s/it]

Processed: 19491008_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 551/1668 [1:06:10<1:48:53,  5.85s/it]

Processed: 19360905_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 552/1668 [1:06:18<2:05:37,  6.75s/it]

Processed: 19400504_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 553/1668 [1:06:23<1:55:05,  6.19s/it]

Processed: 19511208_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 554/1668 [1:06:30<1:56:10,  6.26s/it]

Processed: 19251226_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 555/1668 [1:06:35<1:51:08,  5.99s/it]

Processed: 19520628_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 556/1668 [1:06:39<1:41:01,  5.45s/it]

Processed: 19360321_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 557/1668 [1:06:45<1:40:56,  5.45s/it]

Processed: 19350810_1.txt -> Saved to Drive


Processing files:  33%|███▎      | 558/1668 [1:06:53<1:55:48,  6.26s/it]

Processed: 19470712_1.txt -> Saved to Drive


Processing files:  34%|███▎      | 559/1668 [1:06:57<1:46:16,  5.75s/it]

Processed: 19510210_1.txt -> Saved to Drive


Processing files:  34%|███▎      | 560/1668 [1:07:04<1:49:53,  5.95s/it]

Processed: 19250912_1.txt -> Saved to Drive


Processing files:  34%|███▎      | 561/1668 [1:07:10<1:52:37,  6.10s/it]

Processed: 19350330_1.txt -> Saved to Drive


Processing files:  34%|███▎      | 562/1668 [1:07:15<1:42:35,  5.57s/it]

Processed: 19500729_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 563/1668 [1:07:20<1:41:53,  5.53s/it]

Processed: 19501104_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 564/1668 [1:07:26<1:45:15,  5.72s/it]

Processed: 19420314_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 565/1668 [1:07:32<1:42:56,  5.60s/it]

Processed: 19230609_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 566/1668 [1:07:36<1:37:05,  5.29s/it]

Processed: 19240105_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 567/1668 [1:07:41<1:33:16,  5.08s/it]

Processed: 19311003_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 568/1668 [1:07:45<1:29:34,  4.89s/it]

Processed: 19501223_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 569/1668 [1:07:51<1:34:29,  5.16s/it]

Processed: 19420516_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 570/1668 [1:07:56<1:32:13,  5.04s/it]

Processed: 19410510_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 571/1668 [1:08:02<1:39:51,  5.46s/it]

Processed: 19491112_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 572/1668 [1:08:07<1:36:59,  5.31s/it]

Processed: 19390916_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 573/1668 [1:08:13<1:38:09,  5.38s/it]

Processed: 19510922_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 574/1668 [1:08:19<1:44:44,  5.74s/it]

Processed: 19520419_1.txt -> Saved to Drive


Processing files:  34%|███▍      | 575/1668 [1:08:26<1:47:58,  5.93s/it]

Processed: 19211001_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 576/1668 [1:08:30<1:40:38,  5.53s/it]

Processed: 19200918_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 577/1668 [1:08:35<1:35:34,  5.26s/it]

Processed: 19320305_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 578/1668 [1:08:39<1:31:21,  5.03s/it]

Processed: 19371009_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 579/1668 [1:08:53<2:17:11,  7.56s/it]

Processed: 19471011_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 580/1668 [1:08:59<2:09:18,  7.13s/it]

Processed: 19390715_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 581/1668 [1:09:07<2:16:20,  7.53s/it]

Processed: 19210924_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 582/1668 [1:09:11<1:54:39,  6.34s/it]

Processed: 19200925_1.txt -> Saved to Drive


Processing files:  35%|███▍      | 583/1668 [1:09:16<1:50:05,  6.09s/it]

Processed: 19440226_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 584/1668 [1:09:20<1:38:04,  5.43s/it]

Processed: 19361128_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 585/1668 [1:09:25<1:36:16,  5.33s/it]

Processed: 19350615_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 586/1668 [1:09:32<1:44:08,  5.77s/it]

Processed: 19410927_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 587/1668 [1:09:37<1:39:51,  5.54s/it]

Processed: 19390415_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 588/1668 [1:09:44<1:43:58,  5.78s/it]

Processed: 19340915_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 589/1668 [1:09:47<1:31:14,  5.07s/it]

Processed: 19300405_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 590/1668 [1:09:51<1:26:05,  4.79s/it]

Processed: 19490402_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 591/1668 [1:10:06<2:22:19,  7.93s/it]

Processed: 19490205_1.txt -> Saved to Drive


Processing files:  35%|███▌      | 592/1668 [1:10:10<2:01:32,  6.78s/it]

Processed: 19250425_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 593/1668 [1:10:14<1:43:39,  5.79s/it]

Processed: 19290817_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 594/1668 [1:10:18<1:35:56,  5.36s/it]

Processed: 19350202_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 595/1668 [1:10:23<1:33:27,  5.23s/it]

Processed: 19440429_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 596/1668 [1:10:27<1:26:13,  4.83s/it]

Processed: 19360328_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 597/1668 [1:10:32<1:26:10,  4.83s/it]

Processed: 19451229_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 598/1668 [1:10:37<1:26:54,  4.87s/it]

Processed: 19520607_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 599/1668 [1:10:44<1:40:46,  5.66s/it]

Processed: 19441216_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 600/1668 [1:10:49<1:35:00,  5.34s/it]

Processed: 19361010_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 601/1668 [1:12:33<10:23:01, 35.03s/it]

Processed: 19290406_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 602/1668 [1:12:41<7:57:26, 26.87s/it] 

Processed: 19220708_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 603/1668 [1:12:54<6:43:33, 22.74s/it]

Processed: 19480327_1.txt -> Saved to Drive


Processing files:  36%|███▌      | 604/1668 [1:13:03<5:26:47, 18.43s/it]

Processed: 19330311_1.txt -> Saved to Drive


Processing files:  36%|███▋      | 605/1668 [1:13:09<4:21:46, 14.78s/it]

Processed: 19220617_1.txt -> Saved to Drive


Processing files:  36%|███▋      | 606/1668 [1:13:23<4:16:55, 14.52s/it]

Processed: 19460216_1.txt -> Saved to Drive


Processing files:  36%|███▋      | 607/1668 [1:13:27<3:20:23, 11.33s/it]

Processed: 19220128_1.txt -> Saved to Drive


Processing files:  36%|███▋      | 608/1668 [1:13:35<3:01:44, 10.29s/it]

Processed: 19400831_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 609/1668 [1:13:40<2:37:21,  8.92s/it]

Processed: 19340428_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 610/1668 [1:13:45<2:13:41,  7.58s/it]

Processed: 19400127_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 611/1668 [1:14:06<3:23:54, 11.57s/it]

Processed: 19441118_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 612/1668 [1:14:10<2:47:29,  9.52s/it]

Processed: 19510310_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 613/1668 [1:14:15<2:20:07,  7.97s/it]

Processed: 19450707_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 614/1668 [1:14:19<2:00:49,  6.88s/it]

Processed: 19360229_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 615/1668 [1:14:24<1:48:19,  6.17s/it]

Processed: 19361031_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 616/1668 [1:14:29<1:46:12,  6.06s/it]

Processed: 19421128_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 617/1668 [1:14:35<1:42:53,  5.87s/it]

Processed: 19480313_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 618/1668 [1:14:42<1:51:54,  6.39s/it]

Processed: 19400330_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 619/1668 [1:14:48<1:47:39,  6.16s/it]

Processed: 19300426_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 620/1668 [1:14:52<1:36:07,  5.50s/it]

Processed: 19331104_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 621/1668 [1:14:57<1:35:51,  5.49s/it]

Processed: 19340224_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 622/1668 [1:15:03<1:34:35,  5.43s/it]

Processed: 19201030_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 623/1668 [1:15:08<1:33:40,  5.38s/it]

Processed: 19260710_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 624/1668 [1:15:12<1:25:28,  4.91s/it]

Processed: 19480925_1.txt -> Saved to Drive


Processing files:  37%|███▋      | 625/1668 [1:15:15<1:15:13,  4.33s/it]

Processed: 19270730_1.txt -> Saved to Drive
Progress saved at 2025-02-11 18:56:21 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  38%|███▊      | 626/1668 [1:15:20<1:21:57,  4.72s/it]

Processed: 19220610_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 627/1668 [1:15:27<1:31:51,  5.29s/it]

Processed: 19350928_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 628/1668 [1:15:34<1:39:33,  5.74s/it]

Processed: 19331007_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 629/1668 [1:15:39<1:38:00,  5.66s/it]

Processed: 19270423_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 630/1668 [1:15:58<2:44:15,  9.50s/it]

Processed: 19420523_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 631/1668 [1:16:01<2:12:55,  7.69s/it]

Processed: 19290608_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 632/1668 [1:16:05<1:53:06,  6.55s/it]

Processed: 19291005_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 633/1668 [1:16:12<1:52:28,  6.52s/it]

Processed: 19210319_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 634/1668 [1:16:20<2:02:12,  7.09s/it]

Processed: 19380903_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 635/1668 [1:16:24<1:46:24,  6.18s/it]

Processed: 19301018_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 636/1668 [1:16:28<1:36:07,  5.59s/it]

Processed: 19330513_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 637/1668 [1:16:35<1:42:48,  5.98s/it]

Processed: 19470628_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 638/1668 [1:16:41<1:40:54,  5.88s/it]

Processed: 19440318_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 639/1668 [1:16:49<1:52:25,  6.55s/it]

Processed: 19300419_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 640/1668 [1:16:55<1:49:56,  6.42s/it]

Processed: 19230512_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 641/1668 [1:17:02<1:51:57,  6.54s/it]

Processed: 19251031_1.txt -> Saved to Drive


Processing files:  38%|███▊      | 642/1668 [1:17:07<1:43:36,  6.06s/it]

Processed: 19440617_1.txt -> Saved to Drive


Processing files:  39%|███▊      | 643/1668 [1:17:11<1:35:24,  5.58s/it]

Processed: 19460615_1.txt -> Saved to Drive


Processing files:  39%|███▊      | 644/1668 [1:17:23<2:07:54,  7.49s/it]

Processed: 19270917_1.txt -> Saved to Drive


Processing files:  39%|███▊      | 645/1668 [1:17:28<1:55:56,  6.80s/it]

Processed: 19511201_1.txt -> Saved to Drive


Processing files:  39%|███▊      | 646/1668 [1:17:35<1:54:02,  6.70s/it]

Processed: 19210212_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 647/1668 [1:17:41<1:53:24,  6.66s/it]

Processed: 19370605_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 648/1668 [1:17:51<2:06:25,  7.44s/it]

Processed: 19340728_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 649/1668 [1:17:55<1:51:12,  6.55s/it]

Processed: 19460713_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 650/1668 [1:18:10<2:34:58,  9.13s/it]

Processed: 19200522_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 651/1668 [1:18:16<2:16:58,  8.08s/it]

Processed: 19250801_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 652/1668 [1:18:21<2:00:14,  7.10s/it]

Processed: 19410621_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 653/1668 [1:18:27<1:54:41,  6.78s/it]

Processed: 19430626_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 654/1668 [1:18:30<1:34:32,  5.59s/it]

Processed: 19391021_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 655/1668 [1:18:34<1:30:00,  5.33s/it]

Processed: 19270827_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 656/1668 [1:18:43<1:45:08,  6.23s/it]

Processed: 19510324_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 657/1668 [1:18:47<1:35:53,  5.69s/it]

Processed: 19480731_1.txt -> Saved to Drive


Processing files:  39%|███▉      | 658/1668 [1:18:51<1:24:41,  5.03s/it]

Processed: 19370522_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 659/1668 [1:18:55<1:19:55,  4.75s/it]

Processed: 19360926_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 660/1668 [1:19:01<1:25:29,  5.09s/it]

Processed: 19330902_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 661/1668 [1:19:05<1:22:17,  4.90s/it]

Processed: 19301129_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 662/1668 [1:19:09<1:19:36,  4.75s/it]

Processed: 19401228_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 663/1668 [1:19:17<1:35:34,  5.71s/it]

Processed: 19290511_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 664/1668 [1:19:24<1:38:40,  5.90s/it]

Processed: 19370327_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 665/1668 [1:19:29<1:34:40,  5.66s/it]

Processed: 19260529_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 666/1668 [1:19:34<1:30:52,  5.44s/it]

Processed: 19350406_1.txt -> Saved to Drive


Processing files:  40%|███▉      | 667/1668 [1:19:40<1:34:00,  5.64s/it]

Processed: 19450818_1.txt -> Saved to Drive


Processing files:  40%|████      | 668/1668 [1:19:44<1:28:24,  5.30s/it]

Processed: 19330304_1.txt -> Saved to Drive


Processing files:  40%|████      | 669/1668 [1:20:03<2:34:27,  9.28s/it]

Processed: 19510127_1.txt -> Saved to Drive


Processing files:  40%|████      | 670/1668 [1:20:10<2:24:05,  8.66s/it]

Processed: 19470920_1.txt -> Saved to Drive


Processing files:  40%|████      | 671/1668 [1:20:15<2:03:33,  7.44s/it]

Processed: 19380827_1.txt -> Saved to Drive


Processing files:  40%|████      | 672/1668 [1:20:25<2:19:36,  8.41s/it]

Processed: 19240510_1.txt -> Saved to Drive


Processing files:  40%|████      | 673/1668 [1:20:44<3:10:28, 11.49s/it]

Processed: 19380611_1.txt -> Saved to Drive


Processing files:  40%|████      | 674/1668 [1:20:50<2:43:07,  9.85s/it]

Processed: 19270305_1.txt -> Saved to Drive


Processing files:  40%|████      | 675/1668 [1:20:59<2:39:32,  9.64s/it]

Processed: 19470125_1.txt -> Saved to Drive


Processing files:  41%|████      | 676/1668 [1:21:07<2:29:54,  9.07s/it]

Processed: 19460727_1.txt -> Saved to Drive


Processing files:  41%|████      | 677/1668 [1:21:11<2:03:21,  7.47s/it]

Processed: 19340512_1.txt -> Saved to Drive


Processing files:  41%|████      | 678/1668 [1:21:16<1:50:47,  6.71s/it]

Processed: 19220225_1.txt -> Saved to Drive


Processing files:  41%|████      | 679/1668 [1:21:20<1:37:19,  5.90s/it]

Processed: 19221118_1.txt -> Saved to Drive


Processing files:  41%|████      | 680/1668 [1:21:24<1:30:27,  5.49s/it]

Processed: 19440408_1.txt -> Saved to Drive


Processing files:  41%|████      | 681/1668 [1:21:29<1:25:21,  5.19s/it]

Processed: 19410830_1.txt -> Saved to Drive


Processing files:  41%|████      | 682/1668 [1:21:33<1:20:12,  4.88s/it]

Processed: 19290119_1.txt -> Saved to Drive


Processing files:  41%|████      | 683/1668 [1:21:40<1:31:49,  5.59s/it]

Processed: 19411227_1.txt -> Saved to Drive


Processing files:  41%|████      | 684/1668 [1:21:45<1:28:41,  5.41s/it]

Processed: 19450331_1.txt -> Saved to Drive


Processing files:  41%|████      | 685/1668 [1:21:51<1:33:05,  5.68s/it]

Processed: 19330107_1.txt -> Saved to Drive


Processing files:  41%|████      | 686/1668 [1:21:57<1:33:10,  5.69s/it]

Processed: 19270521_1.txt -> Saved to Drive


Processing files:  41%|████      | 687/1668 [1:22:02<1:31:13,  5.58s/it]

Processed: 19300215_1.txt -> Saved to Drive


Processing files:  41%|████      | 688/1668 [1:22:09<1:33:52,  5.75s/it]

Processed: 19210122_1.txt -> Saved to Drive


Processing files:  41%|████▏     | 689/1668 [1:22:25<2:25:12,  8.90s/it]

Processed: 19320109_1.txt -> Saved to Drive


Processing files:  41%|████▏     | 690/1668 [1:22:30<2:08:47,  7.90s/it]

Processed: 19221209_1.txt -> Saved to Drive


Processing files:  41%|████▏     | 691/1668 [1:22:37<2:01:34,  7.47s/it]

Processed: 19320514_1.txt -> Saved to Drive


Processing files:  41%|████▏     | 692/1668 [1:22:43<1:54:12,  7.02s/it]

Processed: 19450428_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 693/1668 [1:22:49<1:47:56,  6.64s/it]

Processed: 19240823_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 694/1668 [1:22:54<1:42:49,  6.33s/it]

Processed: 19501014_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 695/1668 [1:23:00<1:40:12,  6.18s/it]

Processed: 19450217_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 696/1668 [1:23:05<1:35:07,  5.87s/it]

Processed: 19340120_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 697/1668 [1:23:12<1:38:24,  6.08s/it]

Processed: 19240802_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 698/1668 [1:23:16<1:30:24,  5.59s/it]

Processed: 19350223_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 699/1668 [1:23:23<1:36:59,  6.01s/it]

Processed: 19500422_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 700/1668 [1:23:32<1:49:06,  6.76s/it]

Processed: 19240329_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 701/1668 [1:23:36<1:38:47,  6.13s/it]

Processed: 19250321_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 702/1668 [1:23:42<1:36:51,  6.02s/it]

Processed: 19481030_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 703/1668 [1:23:49<1:39:06,  6.16s/it]

Processed: 19240726_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 704/1668 [1:24:16<3:21:57, 12.57s/it]

Processed: 19461130_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 705/1668 [1:24:23<2:52:36, 10.75s/it]

Processed: 19400914_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 706/1668 [1:24:28<2:27:19,  9.19s/it]

Processed: 19270820_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 707/1668 [1:24:33<2:06:07,  7.87s/it]

Processed: 19480703_1.txt -> Saved to Drive


Processing files:  42%|████▏     | 708/1668 [1:24:37<1:47:45,  6.73s/it]

Processed: 19290525_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 709/1668 [1:24:42<1:38:38,  6.17s/it]

Processed: 19301004_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 710/1668 [1:24:57<2:19:34,  8.74s/it]

Processed: 19210625_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 711/1668 [1:25:22<3:39:37, 13.77s/it]

Processed: 19490702_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 712/1668 [1:25:34<3:31:29, 13.27s/it]

Processed: 19500610_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 713/1668 [1:25:41<3:00:25, 11.34s/it]

Processed: 19410517_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 714/1668 [1:25:45<2:25:10,  9.13s/it]

Processed: 19360104_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 715/1668 [1:25:51<2:11:56,  8.31s/it]

Processed: 19430417_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 716/1668 [1:25:56<1:54:52,  7.24s/it]

Processed: 19340317_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 717/1668 [1:26:02<1:49:17,  6.90s/it]

Processed: 19320312_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 718/1668 [1:26:15<2:17:14,  8.67s/it]

Processed: 19390826_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 719/1668 [1:26:22<2:07:04,  8.03s/it]

Processed: 19310530_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 720/1668 [1:26:31<2:14:40,  8.52s/it]

Processed: 19501230_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 721/1668 [1:26:49<2:56:51, 11.21s/it]

Processed: 19460504_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 722/1668 [1:26:57<2:41:37, 10.25s/it]

Processed: 19500826_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 723/1668 [1:27:01<2:12:12,  8.39s/it]

Processed: 19200313_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 724/1668 [1:27:29<3:46:22, 14.39s/it]

Processed: 19421212_1.txt -> Saved to Drive


Processing files:  43%|████▎     | 725/1668 [1:27:35<3:03:46, 11.69s/it]

Processed: 19420328_1.txt -> Saved to Drive


Processing files:  44%|████▎     | 726/1668 [1:27:39<2:29:23,  9.52s/it]

Processed: 19250711_1.txt -> Saved to Drive


Processing files:  44%|████▎     | 727/1668 [1:27:46<2:17:53,  8.79s/it]

Processed: 19420613_1.txt -> Saved to Drive


Processing files:  44%|████▎     | 728/1668 [1:27:50<1:53:31,  7.25s/it]

Processed: 19340217_1.txt -> Saved to Drive


Processing files:  44%|████▎     | 729/1668 [1:27:54<1:39:31,  6.36s/it]

Processed: 19211203_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 730/1668 [1:27:59<1:31:50,  5.87s/it]

Processed: 19210903_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 731/1668 [1:28:04<1:27:04,  5.58s/it]

Processed: 19261002_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 732/1668 [1:28:07<1:18:06,  5.01s/it]

Processed: 19241115_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 733/1668 [1:28:13<1:20:49,  5.19s/it]

Processed: 19430123_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 734/1668 [1:28:29<2:12:45,  8.53s/it]

Processed: 19370724_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 735/1668 [1:28:32<1:45:00,  6.75s/it]

Processed: 19290914_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 736/1668 [1:28:37<1:39:00,  6.37s/it]

Processed: 19320528_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 737/1668 [1:28:39<1:18:08,  5.04s/it]

Processed: 19430130_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 738/1668 [1:28:47<1:29:23,  5.77s/it]

Processed: 19490212_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 739/1668 [1:28:51<1:22:54,  5.35s/it]

Processed: 19360627_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 740/1668 [1:29:00<1:38:16,  6.35s/it]

Processed: 19371106_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 741/1668 [1:29:05<1:32:27,  5.98s/it]

Processed: 19451110_1.txt -> Saved to Drive


Processing files:  44%|████▍     | 742/1668 [1:29:11<1:31:24,  5.92s/it]

Processed: 19441202_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 743/1668 [1:29:16<1:29:37,  5.81s/it]

Processed: 19300802_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 744/1668 [1:29:22<1:29:33,  5.82s/it]

Processed: 19490416_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 745/1668 [1:29:27<1:26:24,  5.62s/it]

Processed: 19410802_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 746/1668 [1:29:32<1:22:01,  5.34s/it]

Processed: 19261009_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 747/1668 [1:29:39<1:30:00,  5.86s/it]

Processed: 19200717_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 748/1668 [1:29:45<1:29:03,  5.81s/it]

Processed: 19370529_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 749/1668 [1:29:50<1:25:43,  5.60s/it]

Processed: 19460817_1.txt -> Saved to Drive


Processing files:  45%|████▍     | 750/1668 [1:29:54<1:20:16,  5.25s/it]

Processed: 19241213_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 751/1668 [1:30:00<1:20:32,  5.27s/it]

Processed: 19470621_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 752/1668 [1:30:07<1:29:41,  5.87s/it]

Processed: 19371218_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 753/1668 [1:30:14<1:34:34,  6.20s/it]

Processed: 19441014_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 754/1668 [1:30:22<1:42:47,  6.75s/it]

Processed: 19340623_1.txt -> Saved to Drive
Progress saved at 2025-02-11 19:11:28 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  45%|████▌     | 755/1668 [1:30:30<1:47:39,  7.08s/it]

Processed: 19520405_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 756/1668 [1:30:36<1:42:10,  6.72s/it]

Processed: 19210115_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 757/1668 [1:30:52<2:27:27,  9.71s/it]

Processed: 19371211_1.txt -> Saved to Drive


Processing files:  45%|████▌     | 758/1668 [1:30:56<2:01:43,  8.03s/it]

Processed: 19331028_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 759/1668 [1:31:04<1:59:45,  7.91s/it]

Processed: 19500819_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 760/1668 [1:31:09<1:46:57,  7.07s/it]

Processed: 19330325_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 761/1668 [1:31:17<1:52:15,  7.43s/it]

Processed: 19350629_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 762/1668 [1:31:19<1:23:30,  5.53s/it]

Processed: 19410104_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 763/1668 [1:31:25<1:26:37,  5.74s/it]

Processed: 19480515_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 764/1668 [1:31:30<1:22:17,  5.46s/it]

Processed: 19520209_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 765/1668 [1:31:34<1:18:41,  5.23s/it]

Processed: 19460608_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 766/1668 [1:31:41<1:23:49,  5.58s/it]

Processed: 19500401_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 767/1668 [1:31:46<1:21:49,  5.45s/it]

Processed: 19310411_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 768/1668 [1:31:52<1:23:24,  5.56s/it]

Processed: 19310117_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 769/1668 [1:31:56<1:19:57,  5.34s/it]

Processed: 19410809_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 770/1668 [1:32:01<1:14:52,  5.00s/it]

Processed: 19240308_1.txt -> Saved to Drive


Processing files:  46%|████▌     | 771/1668 [1:32:04<1:09:31,  4.65s/it]

Processed: 19330211_1.txt -> Saved to Drive


Processing files:  46%|████▋     | 772/1668 [1:32:09<1:07:22,  4.51s/it]

Processed: 19320206_1.txt -> Saved to Drive


Processing files:  46%|████▋     | 773/1668 [1:32:14<1:08:59,  4.63s/it]

Processed: 19210611_1.txt -> Saved to Drive


Processing files:  46%|████▋     | 774/1668 [1:32:18<1:09:04,  4.64s/it]

Processed: 19230623_1.txt -> Saved to Drive


Processing files:  46%|████▋     | 775/1668 [1:32:22<1:07:12,  4.52s/it]

Processed: 19430731_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 776/1668 [1:32:27<1:07:56,  4.57s/it]

Processed: 19201204_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 777/1668 [1:32:33<1:13:31,  4.95s/it]

Processed: 19250613_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 778/1668 [1:32:41<1:25:58,  5.80s/it]

Processed: 19200911_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 779/1668 [1:32:45<1:19:31,  5.37s/it]

Processed: 19420131_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 780/1668 [1:32:49<1:13:41,  4.98s/it]

Processed: 19391118_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 781/1668 [1:32:57<1:27:27,  5.92s/it]

Processed: 19221125_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 782/1668 [1:33:01<1:19:22,  5.38s/it]

Processed: 19271126_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 783/1668 [1:33:07<1:19:47,  5.41s/it]

Processed: 19400203_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 784/1668 [1:33:27<2:23:47,  9.76s/it]

Processed: 19510804_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 785/1668 [1:33:33<2:07:32,  8.67s/it]

Processed: 19340630_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 786/1668 [1:33:38<1:52:31,  7.65s/it]

Processed: 19320618_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 787/1668 [1:33:46<1:54:42,  7.81s/it]

Processed: 19310425_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 788/1668 [1:34:03<2:31:15, 10.31s/it]

Processed: 19370424_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 789/1668 [1:34:18<2:52:25, 11.77s/it]

Processed: 19420207_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 790/1668 [1:34:21<2:15:15,  9.24s/it]

Processed: 19300913_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 791/1668 [1:34:25<1:51:06,  7.60s/it]

Processed: 19270702_1.txt -> Saved to Drive


Processing files:  47%|████▋     | 792/1668 [1:34:32<1:50:24,  7.56s/it]

Processed: 19360418_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 793/1668 [1:34:37<1:35:47,  6.57s/it]

Processed: 19211224_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 794/1668 [1:34:43<1:35:21,  6.55s/it]

Processed: 19461214_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 795/1668 [1:34:48<1:28:25,  6.08s/it]

Processed: 19520216_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 796/1668 [1:34:56<1:35:45,  6.59s/it]

Processed: 19370417_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 797/1668 [1:35:03<1:37:09,  6.69s/it]

Processed: 19201009_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 798/1668 [1:35:07<1:27:19,  6.02s/it]

Processed: 19520308_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 799/1668 [1:35:12<1:23:10,  5.74s/it]

Processed: 19370710_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 800/1668 [1:35:18<1:23:44,  5.79s/it]

Processed: 19520105_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 801/1668 [1:35:21<1:12:48,  5.04s/it]

Processed: 19340421_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 802/1668 [1:35:27<1:16:02,  5.27s/it]

Processed: 19210312_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 803/1668 [1:35:31<1:09:56,  4.85s/it]

Processed: 19211126_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 804/1668 [1:35:38<1:16:21,  5.30s/it]

Processed: 19500708_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 805/1668 [1:35:49<1:42:58,  7.16s/it]

Processed: 19500916_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 806/1668 [1:35:54<1:31:32,  6.37s/it]

Processed: 19260102_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 807/1668 [1:35:59<1:27:20,  6.09s/it]

Processed: 19340721_1.txt -> Saved to Drive


Processing files:  48%|████▊     | 808/1668 [1:36:05<1:26:35,  6.04s/it]

Processed: 19370626_1.txt -> Saved to Drive


Processing files:  49%|████▊     | 809/1668 [1:36:10<1:20:33,  5.63s/it]

Processed: 19510623_1.txt -> Saved to Drive


Processing files:  49%|████▊     | 810/1668 [1:36:16<1:23:48,  5.86s/it]

Processed: 19281020_1.txt -> Saved to Drive


Processing files:  49%|████▊     | 811/1668 [1:36:24<1:31:38,  6.42s/it]

Processed: 19470201_1.txt -> Saved to Drive


Processing files:  49%|████▊     | 812/1668 [1:36:28<1:21:38,  5.72s/it]

Processed: 19480626_1.txt -> Saved to Drive


Processing files:  49%|████▊     | 813/1668 [1:36:34<1:24:12,  5.91s/it]

Processed: 19240614_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 814/1668 [1:36:39<1:19:50,  5.61s/it]

Processed: 19250404_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 815/1668 [1:36:44<1:18:02,  5.49s/it]

Processed: 19410524_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 816/1668 [1:36:51<1:23:52,  5.91s/it]

Processed: 19421107_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 817/1668 [1:36:56<1:20:02,  5.64s/it]

Processed: 19510317_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 818/1668 [1:37:01<1:14:30,  5.26s/it]

Processed: 19430327_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 819/1668 [1:37:06<1:15:27,  5.33s/it]

Processed: 19200508_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 820/1668 [1:37:10<1:08:47,  4.87s/it]

Processed: 19360307_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 821/1668 [1:37:16<1:12:50,  5.16s/it]

Processed: 19261023_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 822/1668 [1:37:21<1:14:19,  5.27s/it]

Processed: 19420110_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 823/1668 [1:37:47<2:39:15, 11.31s/it]

Processed: 19281208_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 824/1668 [1:37:52<2:12:30,  9.42s/it]

Processed: 19400525_1.txt -> Saved to Drive


Processing files:  49%|████▉     | 825/1668 [1:37:57<1:54:42,  8.16s/it]

Processed: 19320730_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 826/1668 [1:38:04<1:50:34,  7.88s/it]

Processed: 19461116_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 827/1668 [1:38:08<1:33:36,  6.68s/it]

Processed: 19291207_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 828/1668 [1:38:12<1:23:26,  5.96s/it]

Processed: 19290921_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 829/1668 [1:38:22<1:39:04,  7.09s/it]

Processed: 19291123_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 830/1668 [1:38:29<1:39:07,  7.10s/it]

Processed: 19351207_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 831/1668 [1:38:34<1:30:01,  6.45s/it]

Processed: 19330715_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 832/1668 [1:38:40<1:28:38,  6.36s/it]

Processed: 19261211_1.txt -> Saved to Drive


Processing files:  50%|████▉     | 833/1668 [1:38:46<1:26:40,  6.23s/it]

Processed: 19400928_1.txt -> Saved to Drive


Processing files:  50%|█████     | 834/1668 [1:38:51<1:21:29,  5.86s/it]

Processed: 19431016_1.txt -> Saved to Drive


Processing files:  50%|█████     | 835/1668 [1:38:57<1:20:45,  5.82s/it]

Processed: 19210108_1.txt -> Saved to Drive


Processing files:  50%|█████     | 836/1668 [1:39:01<1:14:07,  5.35s/it]

Processed: 19310124_1.txt -> Saved to Drive


Processing files:  50%|█████     | 837/1668 [1:39:05<1:06:41,  4.82s/it]

Processed: 19341215_1.txt -> Saved to Drive


Processing files:  50%|█████     | 838/1668 [1:39:10<1:07:08,  4.85s/it]

Processed: 19511027_1.txt -> Saved to Drive


Processing files:  50%|█████     | 839/1668 [1:39:13<1:00:29,  4.38s/it]

Processed: 19390902_1.txt -> Saved to Drive


Processing files:  50%|█████     | 840/1668 [1:39:18<1:01:44,  4.47s/it]

Processed: 19450811_1.txt -> Saved to Drive


Processing files:  50%|█████     | 841/1668 [1:39:22<1:03:11,  4.58s/it]

Processed: 19260424_1.txt -> Saved to Drive


Processing files:  50%|█████     | 842/1668 [1:39:30<1:16:58,  5.59s/it]

Processed: 19300118_1.txt -> Saved to Drive


Processing files:  51%|█████     | 843/1668 [1:39:36<1:16:29,  5.56s/it]

Processed: 19421003_1.txt -> Saved to Drive


Processing files:  51%|█████     | 844/1668 [1:39:43<1:24:50,  6.18s/it]

Processed: 19260703_1.txt -> Saved to Drive


Processing files:  51%|█████     | 845/1668 [1:39:46<1:11:35,  5.22s/it]

Processed: 19331111_1.txt -> Saved to Drive


Processing files:  51%|█████     | 846/1668 [1:39:50<1:05:52,  4.81s/it]

Processed: 19331216_1.txt -> Saved to Drive


Processing files:  51%|█████     | 847/1668 [1:39:55<1:04:42,  4.73s/it]

Processed: 19511020_1.txt -> Saved to Drive


Processing files:  51%|█████     | 848/1668 [1:39:59<1:02:05,  4.54s/it]

Processed: 19261113_1.txt -> Saved to Drive


Processing files:  51%|█████     | 849/1668 [1:40:04<1:02:32,  4.58s/it]

Processed: 19510414_1.txt -> Saved to Drive


Processing files:  51%|█████     | 850/1668 [1:40:09<1:07:52,  4.98s/it]

Processed: 19221202_1.txt -> Saved to Drive


Processing files:  51%|█████     | 851/1668 [1:40:20<1:31:45,  6.74s/it]

Processed: 19390819_1.txt -> Saved to Drive


Processing files:  51%|█████     | 852/1668 [1:40:26<1:25:25,  6.28s/it]

Processed: 19500204_1.txt -> Saved to Drive


Processing files:  51%|█████     | 853/1668 [1:40:30<1:16:20,  5.62s/it]

Processed: 19351228_1.txt -> Saved to Drive


Processing files:  51%|█████     | 854/1668 [1:40:35<1:17:13,  5.69s/it]

Processed: 19300719_1.txt -> Saved to Drive


Processing files:  51%|█████▏    | 855/1668 [1:40:41<1:16:48,  5.67s/it]

Processed: 19280721_1.txt -> Saved to Drive


Processing files:  51%|█████▏    | 856/1668 [1:40:48<1:23:13,  6.15s/it]

Processed: 19260522_1.txt -> Saved to Drive


Processing files:  51%|█████▏    | 857/1668 [1:40:58<1:38:35,  7.29s/it]

Processed: 19500325_1.txt -> Saved to Drive


Processing files:  51%|█████▏    | 858/1668 [1:41:02<1:23:13,  6.17s/it]

Processed: 19410208_1.txt -> Saved to Drive


Processing files:  51%|█████▏    | 859/1668 [1:41:07<1:17:29,  5.75s/it]

Processed: 19441125_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 860/1668 [1:41:12<1:15:10,  5.58s/it]

Processed: 19350427_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 861/1668 [1:41:17<1:12:29,  5.39s/it]

Processed: 19320917_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 862/1668 [1:41:23<1:17:08,  5.74s/it]

Processed: 19520112_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 863/1668 [1:41:32<1:27:52,  6.55s/it]

Processed: 19520621_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 864/1668 [1:41:38<1:26:18,  6.44s/it]

Processed: 19270108_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 865/1668 [1:41:43<1:20:47,  6.04s/it]

Processed: 19271008_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 866/1668 [1:41:49<1:20:39,  6.03s/it]

Processed: 19280728_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 867/1668 [1:41:56<1:22:46,  6.20s/it]

Processed: 19220114_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 868/1668 [1:42:01<1:18:13,  5.87s/it]

Processed: 19490129_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 869/1668 [1:42:07<1:17:51,  5.85s/it]

Processed: 19510217_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 870/1668 [1:42:14<1:22:46,  6.22s/it]

Processed: 19281124_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 871/1668 [1:42:18<1:16:14,  5.74s/it]

Processed: 19341229_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 872/1668 [1:42:24<1:17:52,  5.87s/it]

Processed: 19510707_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 873/1668 [1:42:28<1:08:31,  5.17s/it]

Processed: 19430501_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 874/1668 [1:42:33<1:07:30,  5.10s/it]

Processed: 19500715_1.txt -> Saved to Drive


Processing files:  52%|█████▏    | 875/1668 [1:42:45<1:34:46,  7.17s/it]

Processed: 19320123_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 876/1668 [1:42:51<1:32:02,  6.97s/it]

Processed: 19300524_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 877/1668 [1:42:56<1:22:43,  6.27s/it]

Processed: 19480911_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 878/1668 [1:43:02<1:22:32,  6.27s/it]

Processed: 19451013_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 879/1668 [1:43:09<1:22:09,  6.25s/it]

Processed: 19231110_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 880/1668 [1:43:13<1:13:16,  5.58s/it]

Processed: 19280825_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 881/1668 [1:43:19<1:15:11,  5.73s/it]

Processed: 19361017_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 882/1668 [1:43:35<1:55:28,  8.81s/it]

Processed: 19380702_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 883/1668 [1:43:42<1:51:11,  8.50s/it]

Processed: 19220729_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 884/1668 [1:43:51<1:53:14,  8.67s/it]

Processed: 19411108_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 885/1668 [1:43:57<1:40:14,  7.68s/it]

Processed: 19370703_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 886/1668 [1:44:02<1:29:57,  6.90s/it]

Processed: 19270115_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 887/1668 [1:44:13<1:45:51,  8.13s/it]

Processed: 19400817_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 888/1668 [1:44:21<1:45:52,  8.14s/it]

Processed: 19270604_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 889/1668 [1:44:28<1:40:43,  7.76s/it]

Processed: 19420725_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 890/1668 [1:44:33<1:30:22,  6.97s/it]

Processed: 19350309_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 891/1668 [1:44:39<1:24:42,  6.54s/it]

Processed: 19420404_1.txt -> Saved to Drive


Processing files:  53%|█████▎    | 892/1668 [1:44:44<1:20:45,  6.24s/it]

Processed: 19310829_1.txt -> Saved to Drive


Processing files:  54%|█████▎    | 893/1668 [1:44:50<1:20:31,  6.23s/it]

Processed: 19370313_1.txt -> Saved to Drive


Processing files:  54%|█████▎    | 894/1668 [1:44:54<1:10:45,  5.49s/it]

Processed: 19250523_1.txt -> Saved to Drive


Processing files:  54%|█████▎    | 895/1668 [1:44:59<1:10:00,  5.43s/it]

Processed: 19490709_1.txt -> Saved to Drive


Processing files:  54%|█████▎    | 896/1668 [1:45:06<1:15:51,  5.90s/it]

Processed: 19230331_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 897/1668 [1:45:13<1:16:31,  5.95s/it]

Processed: 19290803_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 898/1668 [1:45:19<1:19:44,  6.21s/it]

Processed: 19400217_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 899/1668 [1:45:31<1:38:52,  7.71s/it]

Processed: 19450825_1.txt -> Saved to Drive
Progress saved at 2025-02-11 19:26:37 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  54%|█████▍    | 900/1668 [1:45:34<1:23:09,  6.50s/it]

Processed: 19280310_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 901/1668 [1:45:40<1:20:34,  6.30s/it]

Processed: 19460202_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 902/1668 [1:45:46<1:19:56,  6.26s/it]

Processed: 19460420_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 903/1668 [1:45:51<1:15:10,  5.90s/it]

Processed: 19220819_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 904/1668 [1:45:58<1:18:56,  6.20s/it]

Processed: 19201211_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 905/1668 [1:46:03<1:12:20,  5.69s/it]

Processed: 19280128_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 906/1668 [1:46:11<1:23:14,  6.55s/it]

Processed: 19330506_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 907/1668 [1:46:18<1:23:51,  6.61s/it]

Processed: 19241011_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 908/1668 [1:46:23<1:17:14,  6.10s/it]

Processed: 19380122_1.txt -> Saved to Drive


Processing files:  54%|█████▍    | 909/1668 [1:46:27<1:09:11,  5.47s/it]

Processed: 19250822_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 910/1668 [1:46:32<1:06:08,  5.23s/it]

Processed: 19221230_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 911/1668 [1:46:38<1:12:11,  5.72s/it]

Processed: 19230811_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 912/1668 [1:46:55<1:53:31,  9.01s/it]

Processed: 19351026_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 913/1668 [1:47:00<1:38:38,  7.84s/it]

Processed: 19391014_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 914/1668 [1:47:06<1:29:39,  7.13s/it]

Processed: 19280714_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 915/1668 [1:47:11<1:24:25,  6.73s/it]

Processed: 19350817_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 916/1668 [1:47:24<1:44:33,  8.34s/it]

Processed: 19511103_1.txt -> Saved to Drive


Processing files:  55%|█████▍    | 917/1668 [1:47:31<1:39:22,  7.94s/it]

Processed: 19471122_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 918/1668 [1:47:41<1:50:20,  8.83s/it]

Processed: 19430424_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 919/1668 [1:47:50<1:49:36,  8.78s/it]

Processed: 19490409_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 920/1668 [1:47:55<1:33:08,  7.47s/it]

Processed: 19341201_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 921/1668 [1:48:02<1:33:10,  7.48s/it]

Processed: 19260515_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 922/1668 [1:48:08<1:27:29,  7.04s/it]

Processed: 19300208_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 923/1668 [1:48:14<1:22:36,  6.65s/it]

Processed: 19300308_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 924/1668 [1:48:18<1:13:07,  5.90s/it]

Processed: 19431211_1.txt -> Saved to Drive


Processing files:  55%|█████▌    | 925/1668 [1:48:22<1:06:53,  5.40s/it]

Processed: 19320521_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 926/1668 [1:48:28<1:09:47,  5.64s/it]

Processed: 19290810_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 927/1668 [1:48:34<1:09:09,  5.60s/it]

Processed: 19220415_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 928/1668 [1:48:40<1:10:45,  5.74s/it]

Processed: 19441209_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 929/1668 [1:48:47<1:13:57,  6.01s/it]

Processed: 19490219_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 930/1668 [1:48:54<1:17:39,  6.31s/it]

Processed: 19300621_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 931/1668 [1:48:58<1:09:21,  5.65s/it]

Processed: 19350216_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 932/1668 [1:49:06<1:17:14,  6.30s/it]

Processed: 19380312_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 933/1668 [1:49:16<1:30:56,  7.42s/it]

Processed: 19350126_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 934/1668 [1:49:23<1:29:30,  7.32s/it]

Processed: 19240301_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 935/1668 [1:49:26<1:15:09,  6.15s/it]

Processed: 19260306_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 936/1668 [1:49:32<1:14:21,  6.09s/it]

Processed: 19470315_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 937/1668 [1:49:39<1:15:53,  6.23s/it]

Processed: 19230210_1.txt -> Saved to Drive


Processing files:  56%|█████▌    | 938/1668 [1:49:43<1:10:35,  5.80s/it]

Processed: 19401012_1.txt -> Saved to Drive


Processing files:  56%|█████▋    | 939/1668 [1:49:52<1:20:19,  6.61s/it]

Processed: 19320723_1.txt -> Saved to Drive


Processing files:  56%|█████▋    | 940/1668 [1:49:57<1:13:55,  6.09s/it]

Processed: 19360801_1.txt -> Saved to Drive


Processing files:  56%|█████▋    | 941/1668 [1:50:00<1:03:44,  5.26s/it]

Processed: 19390107_1.txt -> Saved to Drive


Processing files:  56%|█████▋    | 942/1668 [1:50:06<1:06:37,  5.51s/it]

Processed: 19510616_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 943/1668 [1:50:11<1:03:46,  5.28s/it]

Processed: 19280630_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 944/1668 [1:50:16<1:01:03,  5.06s/it]

Processed: 19200124_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 945/1668 [1:50:20<57:59,  4.81s/it]  

Processed: 19380416_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 946/1668 [1:50:27<1:07:42,  5.63s/it]

Processed: 19420711_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 947/1668 [1:50:32<1:03:00,  5.24s/it]

Processed: 19350914_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 948/1668 [1:50:38<1:07:53,  5.66s/it]

Processed: 19231020_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 949/1668 [1:50:44<1:09:11,  5.77s/it]

Processed: 19380326_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 950/1668 [1:50:50<1:10:27,  5.89s/it]

Processed: 19270716_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 951/1668 [1:50:54<1:01:06,  5.11s/it]

Processed: 19331230_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 952/1668 [1:50:59<1:00:54,  5.10s/it]

Processed: 19330729_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 953/1668 [1:51:02<55:30,  4.66s/it]  

Processed: 19391202_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 954/1668 [1:51:08<57:09,  4.80s/it]

Processed: 19251128_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 955/1668 [1:51:11<53:42,  4.52s/it]

Processed: 19280225_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 956/1668 [1:51:16<54:40,  4.61s/it]

Processed: 19290427_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 957/1668 [1:51:19<49:10,  4.15s/it]

Processed: 19380723_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 958/1668 [1:51:23<48:59,  4.14s/it]

Processed: 19310926_1.txt -> Saved to Drive


Processing files:  57%|█████▋    | 959/1668 [1:51:33<1:08:29,  5.80s/it]

Processed: 19221021_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 960/1668 [1:51:41<1:14:59,  6.35s/it]

Processed: 19260619_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 961/1668 [1:51:46<1:10:11,  5.96s/it]

Processed: 19290615_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 962/1668 [1:51:53<1:15:37,  6.43s/it]

Processed: 19491119_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 963/1668 [1:51:59<1:14:11,  6.31s/it]

Processed: 19500527_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 964/1668 [1:52:06<1:16:51,  6.55s/it]

Processed: 19210702_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 965/1668 [1:52:13<1:15:18,  6.43s/it]

Processed: 19210416_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 966/1668 [1:52:17<1:08:07,  5.82s/it]

Processed: 19301122_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 967/1668 [1:52:23<1:07:32,  5.78s/it]

Processed: 19360620_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 968/1668 [1:52:25<55:21,  4.74s/it]  

Processed: 19280929_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 969/1668 [1:52:31<1:00:34,  5.20s/it]

Processed: 19400106_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 970/1668 [1:52:36<57:47,  4.97s/it]  

Processed: 19401207_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 971/1668 [1:52:40<56:24,  4.86s/it]

Processed: 19240209_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 972/1668 [1:52:45<54:54,  4.73s/it]

Processed: 19431113_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 973/1668 [1:52:50<55:05,  4.76s/it]

Processed: 19221216_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 974/1668 [1:52:55<58:16,  5.04s/it]

Processed: 19380625_1.txt -> Saved to Drive


Processing files:  58%|█████▊    | 975/1668 [1:53:00<56:35,  4.90s/it]

Processed: 19390729_1.txt -> Saved to Drive


Processing files:  59%|█████▊    | 976/1668 [1:53:08<1:08:13,  5.92s/it]

Processed: 19420926_1.txt -> Saved to Drive


Processing files:  59%|█████▊    | 977/1668 [1:53:15<1:12:47,  6.32s/it]

Processed: 19251010_1.txt -> Saved to Drive


Processing files:  59%|█████▊    | 978/1668 [1:53:20<1:06:09,  5.75s/it]

Processed: 19440415_1.txt -> Saved to Drive


Processing files:  59%|█████▊    | 979/1668 [1:53:25<1:02:33,  5.45s/it]

Processed: 19400824_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 980/1668 [1:53:30<1:03:32,  5.54s/it]

Processed: 19240405_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 981/1668 [1:53:33<55:01,  4.81s/it]  

Processed: 19420418_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 982/1668 [1:53:39<56:28,  4.94s/it]

Processed: 19220902_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 983/1668 [1:53:48<1:11:00,  6.22s/it]

Processed: 19410125_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 984/1668 [1:53:54<1:11:51,  6.30s/it]

Processed: 19220513_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 985/1668 [1:54:00<1:09:17,  6.09s/it]

Processed: 19330408_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 986/1668 [1:54:05<1:05:31,  5.76s/it]

Processed: 19220916_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 987/1668 [1:54:11<1:05:29,  5.77s/it]

Processed: 19400427_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 988/1668 [1:54:16<1:02:00,  5.47s/it]

Processed: 19231208_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 989/1668 [1:54:22<1:05:52,  5.82s/it]

Processed: 19210604_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 990/1668 [1:54:28<1:05:44,  5.82s/it]

Processed: 19240830_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 991/1668 [1:54:33<1:04:06,  5.68s/it]

Processed: 19491231_1.txt -> Saved to Drive


Processing files:  59%|█████▉    | 992/1668 [1:54:45<1:23:00,  7.37s/it]

Processed: 19240712_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 993/1668 [1:54:49<1:13:36,  6.54s/it]

Processed: 19250815_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 994/1668 [1:54:57<1:17:18,  6.88s/it]

Processed: 19511006_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 995/1668 [1:55:01<1:08:43,  6.13s/it]

Processed: 19280324_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 996/1668 [1:55:08<1:11:37,  6.39s/it]

Processed: 19420912_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 997/1668 [1:55:14<1:08:25,  6.12s/it]

Processed: 19380820_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 998/1668 [1:55:29<1:38:23,  8.81s/it]

Processed: 19441223_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 999/1668 [1:55:37<1:36:30,  8.66s/it]

Processed: 19290302_1.txt -> Saved to Drive


Processing files:  60%|█████▉    | 1000/1668 [1:55:43<1:25:15,  7.66s/it]

Processed: 19480110_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1001/1668 [1:55:49<1:20:43,  7.26s/it]

Processed: 19520503_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1002/1668 [1:55:54<1:15:06,  6.77s/it]

Processed: 19371023_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1003/1668 [1:55:59<1:08:38,  6.19s/it]

Processed: 19350112_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1004/1668 [1:56:04<1:02:55,  5.69s/it]

Processed: 19290105_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1005/1668 [1:56:08<58:53,  5.33s/it]  

Processed: 19410322_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1006/1668 [1:56:13<56:14,  5.10s/it]

Processed: 19250124_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1007/1668 [1:56:18<55:34,  5.04s/it]

Processed: 19470215_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1008/1668 [1:56:24<57:56,  5.27s/it]

Processed: 19240621_1.txt -> Saved to Drive


Processing files:  60%|██████    | 1009/1668 [1:56:29<58:58,  5.37s/it]

Processed: 19471213_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1010/1668 [1:56:35<1:00:15,  5.49s/it]

Processed: 19221014_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1011/1668 [1:56:42<1:04:29,  5.89s/it]

Processed: 19350518_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1012/1668 [1:56:47<1:02:11,  5.69s/it]

Processed: 19411213_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1013/1668 [1:56:52<58:37,  5.37s/it]  

Processed: 19431002_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1014/1668 [1:56:58<1:01:53,  5.68s/it]

Processed: 19421017_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1015/1668 [1:57:03<58:29,  5.38s/it]  

Processed: 19490423_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1016/1668 [1:57:08<56:55,  5.24s/it]

Processed: 19500930_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1017/1668 [1:57:15<1:05:05,  6.00s/it]

Processed: 19210205_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1018/1668 [1:57:20<1:00:05,  5.55s/it]

Processed: 19310620_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1019/1668 [1:57:25<57:41,  5.33s/it]  

Processed: 19261127_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1020/1668 [1:57:31<1:00:27,  5.60s/it]

Processed: 19340707_1.txt -> Saved to Drive


Processing files:  61%|██████    | 1021/1668 [1:57:37<1:00:18,  5.59s/it]

Processed: 19250926_1.txt -> Saved to Drive


Processing files:  61%|██████▏   | 1022/1668 [1:57:44<1:07:53,  6.31s/it]

Processed: 19510721_1.txt -> Saved to Drive


Processing files:  61%|██████▏   | 1023/1668 [1:57:50<1:03:48,  5.94s/it]

Processed: 19370130_1.txt -> Saved to Drive


Processing files:  61%|██████▏   | 1024/1668 [1:57:54<59:03,  5.50s/it]  

Processed: 19260327_1.txt -> Saved to Drive


Processing files:  61%|██████▏   | 1025/1668 [1:58:00<59:15,  5.53s/it]

Processed: 19311121_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1026/1668 [1:58:04<54:15,  5.07s/it]

Processed: 19390225_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1027/1668 [1:58:09<56:02,  5.25s/it]

Processed: 19470329_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1028/1668 [1:58:17<1:03:12,  5.93s/it]

Processed: 19251024_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1029/1668 [1:58:25<1:10:29,  6.62s/it]

Processed: 19230224_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1030/1668 [1:58:29<1:03:15,  5.95s/it]

Processed: 19350921_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1031/1668 [1:58:37<1:08:55,  6.49s/it]

Processed: 19480124_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1032/1668 [1:58:44<1:09:24,  6.55s/it]

Processed: 19261120_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1033/1668 [1:58:57<1:30:35,  8.56s/it]

Processed: 19211015_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1034/1668 [1:59:01<1:14:33,  7.06s/it]

Processed: 19321029_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1035/1668 [1:59:05<1:04:29,  6.11s/it]

Processed: 19340414_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1036/1668 [1:59:09<59:15,  5.63s/it]  

Processed: 19290907_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1037/1668 [1:59:15<59:22,  5.65s/it]

Processed: 19450721_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1038/1668 [1:59:20<59:13,  5.64s/it]

Processed: 19500805_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1039/1668 [1:59:24<54:04,  5.16s/it]

Processed: 19260904_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1040/1668 [1:59:34<1:06:27,  6.35s/it]

Processed: 19480710_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1041/1668 [1:59:39<1:02:31,  5.98s/it]

Processed: 19391125_1.txt -> Saved to Drive


Processing files:  62%|██████▏   | 1042/1668 [1:59:45<1:01:59,  5.94s/it]

Processed: 19201120_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1043/1668 [1:59:51<1:03:13,  6.07s/it]

Processed: 19330617_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1044/1668 [1:59:56<59:47,  5.75s/it]  

Processed: 19390325_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1045/1668 [2:00:01<56:51,  5.48s/it]

Processed: 19301025_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1046/1668 [2:00:03<46:47,  4.51s/it]

Processed: 19280602_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1047/1668 [2:00:09<51:15,  4.95s/it]

Processed: 19390211_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1048/1668 [2:00:17<1:02:00,  6.00s/it]

Processed: 19390429_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1049/1668 [2:00:24<1:02:43,  6.08s/it]

Processed: 19360711_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1050/1668 [2:00:40<1:33:54,  9.12s/it]

Processed: 19450127_1.txt -> Saved to Drive
Progress saved at 2025-02-11 19:41:46 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  63%|██████▎   | 1051/1668 [2:00:46<1:25:27,  8.31s/it]

Processed: 19430227_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1052/1668 [2:00:52<1:17:21,  7.53s/it]

Processed: 19250411_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1053/1668 [2:01:05<1:32:59,  9.07s/it]

Processed: 19410222_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1054/1668 [2:01:14<1:33:29,  9.14s/it]

Processed: 19230526_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1055/1668 [2:01:18<1:18:56,  7.73s/it]

Processed: 19510120_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1056/1668 [2:01:23<1:07:49,  6.65s/it]

Processed: 19220121_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1057/1668 [2:01:30<1:08:47,  6.76s/it]

Processed: 19510407_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1058/1668 [2:01:37<1:10:44,  6.96s/it]

Processed: 19240705_1.txt -> Saved to Drive


Processing files:  63%|██████▎   | 1059/1668 [2:01:44<1:09:58,  6.89s/it]

Processed: 19220304_1.txt -> Saved to Drive


Processing files:  64%|██████▎   | 1060/1668 [2:01:49<1:05:25,  6.46s/it]

Processed: 19510908_1.txt -> Saved to Drive


Processing files:  64%|██████▎   | 1061/1668 [2:03:27<5:42:05, 33.81s/it]

Processed: 19370116_1.txt -> Saved to Drive


Processing files:  64%|██████▎   | 1062/1668 [2:03:32<4:13:15, 25.08s/it]

Processed: 19480424_1.txt -> Saved to Drive


Processing files:  64%|██████▎   | 1063/1668 [2:03:46<3:41:12, 21.94s/it]

Processed: 19490319_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1064/1668 [2:03:51<2:50:20, 16.92s/it]

Processed: 19360516_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1065/1668 [2:03:56<2:12:08, 13.15s/it]

Processed: 19310919_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1066/1668 [2:04:00<1:44:54, 10.46s/it]

Processed: 19300517_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1067/1668 [2:04:04<1:27:02,  8.69s/it]

Processed: 19420411_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1068/1668 [2:04:08<1:12:29,  7.25s/it]

Processed: 19200410_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1069/1668 [2:04:12<1:02:39,  6.28s/it]

Processed: 19230616_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1070/1668 [2:04:18<1:00:25,  6.06s/it]

Processed: 19370306_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1071/1668 [2:04:25<1:03:43,  6.41s/it]

Processed: 19311205_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1072/1668 [2:04:31<1:03:29,  6.39s/it]

Processed: 19460928_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1073/1668 [2:04:36<58:59,  5.95s/it]  

Processed: 19360725_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1074/1668 [2:04:42<58:44,  5.93s/it]

Processed: 19490122_1.txt -> Saved to Drive


Processing files:  64%|██████▍   | 1075/1668 [2:04:49<1:00:50,  6.16s/it]

Processed: 19481204_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1076/1668 [2:05:00<1:15:41,  7.67s/it]

Processed: 19200724_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1077/1668 [2:05:08<1:16:21,  7.75s/it]

Processed: 19470927_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1078/1668 [2:05:16<1:15:51,  7.71s/it]

Processed: 19470111_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1079/1668 [2:05:21<1:08:59,  7.03s/it]

Processed: 19220603_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1080/1668 [2:05:26<1:03:11,  6.45s/it]

Processed: 19250314_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1081/1668 [2:05:31<58:41,  6.00s/it]  

Processed: 19290420_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1082/1668 [2:05:37<56:35,  5.79s/it]

Processed: 19370206_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1083/1668 [2:05:41<53:06,  5.45s/it]

Processed: 19480904_1.txt -> Saved to Drive


Processing files:  65%|██████▍   | 1084/1668 [2:05:47<53:08,  5.46s/it]

Processed: 19440506_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1085/1668 [2:05:53<54:58,  5.66s/it]

Processed: 19320227_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1086/1668 [2:06:04<1:10:37,  7.28s/it]

Processed: 19440325_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1087/1668 [2:06:08<1:01:20,  6.34s/it]

Processed: 19220401_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1088/1668 [2:06:15<1:03:26,  6.56s/it]

Processed: 19460112_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1089/1668 [2:06:21<1:01:24,  6.36s/it]

Processed: 19380423_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1090/1668 [2:06:27<59:27,  6.17s/it]  

Processed: 19310221_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1091/1668 [2:06:32<57:03,  5.93s/it]

Processed: 19350727_1.txt -> Saved to Drive


Processing files:  65%|██████▌   | 1092/1668 [2:06:36<52:37,  5.48s/it]

Processed: 19230901_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1093/1668 [2:06:44<57:48,  6.03s/it]

Processed: 19230929_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1094/1668 [2:06:49<55:02,  5.75s/it]

Processed: 19310725_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1095/1668 [2:07:03<1:17:30,  8.12s/it]

Processed: 19510526_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1096/1668 [2:07:08<1:10:14,  7.37s/it]

Processed: 19350713_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1097/1668 [2:07:14<1:05:48,  6.92s/it]

Processed: 19470419_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1098/1668 [2:07:19<59:37,  6.28s/it]  

Processed: 19500722_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1099/1668 [2:07:29<1:09:55,  7.37s/it]

Processed: 19460126_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1100/1668 [2:07:37<1:12:21,  7.64s/it]

Processed: 19260220_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1101/1668 [2:07:42<1:04:00,  6.77s/it]

Processed: 19381112_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1102/1668 [2:07:47<58:16,  6.18s/it]  

Processed: 19320709_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1103/1668 [2:07:51<54:07,  5.75s/it]

Processed: 19291130_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1104/1668 [2:07:55<47:54,  5.10s/it]

Processed: 19290504_1.txt -> Saved to Drive


Processing files:  66%|██████▌   | 1105/1668 [2:07:59<45:59,  4.90s/it]

Processed: 19391111_1.txt -> Saved to Drive


Processing files:  66%|██████▋   | 1106/1668 [2:08:06<50:44,  5.42s/it]

Processed: 19230707_1.txt -> Saved to Drive


Processing files:  66%|██████▋   | 1107/1668 [2:08:09<44:04,  4.71s/it]

Processed: 19390121_1.txt -> Saved to Drive


Processing files:  66%|██████▋   | 1108/1668 [2:08:14<44:42,  4.79s/it]

Processed: 19200117_1.txt -> Saved to Drive


Processing files:  66%|██████▋   | 1109/1668 [2:08:18<41:32,  4.46s/it]

Processed: 19240426_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1110/1668 [2:08:21<38:37,  4.15s/it]

Processed: 19390128_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1111/1668 [2:08:26<41:54,  4.51s/it]

Processed: 19420502_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1112/1668 [2:08:31<43:20,  4.68s/it]

Processed: 19350504_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1113/1668 [2:08:38<47:13,  5.11s/it]

Processed: 19520329_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1114/1668 [2:08:43<48:36,  5.26s/it]

Processed: 19300412_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1115/1668 [2:08:50<52:37,  5.71s/it]

Processed: 19300823_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1116/1668 [2:08:56<52:54,  5.75s/it]

Processed: 19491224_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1117/1668 [2:09:19<1:41:05, 11.01s/it]

Processed: 19241122_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1118/1668 [2:09:26<1:30:10,  9.84s/it]

Processed: 19510825_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1119/1668 [2:09:31<1:17:09,  8.43s/it]

Processed: 19460622_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1120/1668 [2:09:35<1:05:13,  7.14s/it]

Processed: 19271015_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1121/1668 [2:09:42<1:03:40,  6.98s/it]

Processed: 19470322_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1122/1668 [2:10:00<1:33:54, 10.32s/it]

Processed: 19481023_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1123/1668 [2:10:05<1:19:08,  8.71s/it]

Processed: 19510512_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1124/1668 [2:10:12<1:14:45,  8.25s/it]

Processed: 19270903_1.txt -> Saved to Drive


Processing files:  67%|██████▋   | 1125/1668 [2:10:18<1:06:28,  7.35s/it]

Processed: 19240112_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1126/1668 [2:10:22<57:40,  6.38s/it]  

Processed: 19271001_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1127/1668 [2:10:27<53:22,  5.92s/it]

Processed: 19331202_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1128/1668 [2:10:33<53:22,  5.93s/it]

Processed: 19431106_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1129/1668 [2:10:39<53:57,  6.01s/it]

Processed: 19310808_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1130/1668 [2:10:45<53:56,  6.02s/it]

Processed: 19380813_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1131/1668 [2:10:51<54:54,  6.14s/it]

Processed: 19221223_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1132/1668 [2:10:55<48:25,  5.42s/it]

Processed: 19280707_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1133/1668 [2:11:05<1:00:23,  6.77s/it]

Processed: 19290713_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1134/1668 [2:11:12<1:00:50,  6.84s/it]

Processed: 19220520_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1135/1668 [2:11:18<59:01,  6.64s/it]  

Processed: 19260417_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1136/1668 [2:11:25<59:33,  6.72s/it]

Processed: 19481009_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1137/1668 [2:11:30<54:54,  6.20s/it]

Processed: 19320409_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1138/1668 [2:11:35<52:11,  5.91s/it]

Processed: 19270806_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1139/1668 [2:11:40<48:54,  5.55s/it]

Processed: 19350105_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1140/1668 [2:11:53<1:08:00,  7.73s/it]

Processed: 19450714_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1141/1668 [2:11:57<58:21,  6.64s/it]  

Processed: 19270129_1.txt -> Saved to Drive


Processing files:  68%|██████▊   | 1142/1668 [2:12:03<57:28,  6.56s/it]

Processed: 19490521_1.txt -> Saved to Drive


Processing files:  69%|██████▊   | 1143/1668 [2:12:07<49:13,  5.63s/it]

Processed: 19360425_1.txt -> Saved to Drive


Processing files:  69%|██████▊   | 1144/1668 [2:12:11<46:37,  5.34s/it]

Processed: 19450421_1.txt -> Saved to Drive


Processing files:  69%|██████▊   | 1145/1668 [2:12:15<43:24,  4.98s/it]

Processed: 19331021_1.txt -> Saved to Drive


Processing files:  69%|██████▊   | 1146/1668 [2:12:19<38:49,  4.46s/it]

Processed: 19310307_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1147/1668 [2:12:23<38:06,  4.39s/it]

Processed: 19251017_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1148/1668 [2:12:28<40:56,  4.72s/it]

Processed: 19420808_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1149/1668 [2:12:33<41:26,  4.79s/it]

Processed: 19500128_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1150/1668 [2:12:37<39:42,  4.60s/it]

Processed: 19250214_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1151/1668 [2:12:43<42:55,  4.98s/it]

Processed: 19360829_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1152/1668 [2:12:56<1:01:29,  7.15s/it]

Processed: 19470719_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1153/1668 [2:13:00<55:02,  6.41s/it]  

Processed: 19360530_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1154/1668 [2:13:05<49:44,  5.81s/it]

Processed: 19501202_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1155/1668 [2:13:16<1:04:10,  7.51s/it]

Processed: 19390401_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1156/1668 [2:13:22<1:00:55,  7.14s/it]

Processed: 19411101_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1157/1668 [2:13:31<1:04:18,  7.55s/it]

Processed: 19260123_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1158/1668 [2:13:38<1:03:19,  7.45s/it]

Processed: 19500121_1.txt -> Saved to Drive


Processing files:  69%|██████▉   | 1159/1668 [2:13:43<57:45,  6.81s/it]  

Processed: 19511222_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1160/1668 [2:13:48<52:56,  6.25s/it]

Processed: 19461102_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1161/1668 [2:13:53<48:11,  5.70s/it]

Processed: 19370109_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1162/1668 [2:14:00<50:40,  6.01s/it]

Processed: 19430918_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1163/1668 [2:14:07<54:15,  6.45s/it]

Processed: 19440729_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1164/1668 [2:14:12<50:44,  6.04s/it]

Processed: 19480501_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1165/1668 [2:14:16<46:25,  5.54s/it]

Processed: 19320220_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1166/1668 [2:14:20<41:07,  4.92s/it]

Processed: 19310103_1.txt -> Saved to Drive


Processing files:  70%|██████▉   | 1167/1668 [2:14:26<44:38,  5.35s/it]

Processed: 19481211_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1168/1668 [2:14:31<44:07,  5.30s/it]

Processed: 19510929_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1169/1668 [2:14:38<48:02,  5.78s/it]

Processed: 19241227_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1170/1668 [2:14:43<46:04,  5.55s/it]

Processed: 19260717_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1171/1668 [2:14:51<51:32,  6.22s/it]

Processed: 19260925_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1172/1668 [2:14:56<47:51,  5.79s/it]

Processed: 19281013_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1173/1668 [2:15:03<50:50,  6.16s/it]

Processed: 19520126_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1174/1668 [2:15:15<1:04:45,  7.86s/it]

Processed: 19430116_1.txt -> Saved to Drive


Processing files:  70%|███████   | 1175/1668 [2:15:19<56:43,  6.90s/it]  

Processed: 19331014_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1176/1668 [2:15:24<50:59,  6.22s/it]

Processed: 19500211_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1177/1668 [2:15:29<47:18,  5.78s/it]

Processed: 19250502_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1178/1668 [2:15:36<49:35,  6.07s/it]

Processed: 19460511_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1179/1668 [2:15:40<46:14,  5.67s/it]

Processed: 19480207_1.txt -> Saved to Drive
Progress saved at 2025-02-11 19:56:46 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  71%|███████   | 1180/1668 [2:15:45<43:41,  5.37s/it]

Processed: 19340526_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1181/1668 [2:16:04<1:17:36,  9.56s/it]

Processed: 19410301_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1182/1668 [2:16:10<1:07:14,  8.30s/it]

Processed: 19430710_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1183/1668 [2:16:14<56:39,  7.01s/it]  

Processed: 19370821_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1184/1668 [2:16:19<52:25,  6.50s/it]

Processed: 19481113_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1185/1668 [2:16:57<2:07:45, 15.87s/it]

Processed: 19470913_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1186/1668 [2:17:02<1:41:24, 12.62s/it]

Processed: 19500107_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1187/1668 [2:17:06<1:21:34, 10.18s/it]

Processed: 19480814_1.txt -> Saved to Drive


Processing files:  71%|███████   | 1188/1668 [2:17:10<1:06:50,  8.36s/it]

Processed: 19280526_1.txt -> Saved to Drive


Processing files:  71%|███████▏  | 1189/1668 [2:17:16<59:33,  7.46s/it]  

Processed: 19440205_1.txt -> Saved to Drive


Processing files:  71%|███████▏  | 1190/1668 [2:17:20<50:50,  6.38s/it]

Processed: 19310418_1.txt -> Saved to Drive


Processing files:  71%|███████▏  | 1191/1668 [2:17:24<45:45,  5.75s/it]

Processed: 19311114_1.txt -> Saved to Drive


Processing files:  71%|███████▏  | 1192/1668 [2:17:30<45:29,  5.73s/it]

Processed: 19211112_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1193/1668 [2:17:35<44:02,  5.56s/it]

Processed: 19471115_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1194/1668 [2:17:39<40:49,  5.17s/it]

Processed: 19200103_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1195/1668 [2:17:44<40:30,  5.14s/it]

Processed: 19230922_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1196/1668 [2:17:47<34:10,  4.34s/it]

Processed: 19390930_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1197/1668 [2:17:54<40:21,  5.14s/it]

Processed: 19250919_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1198/1668 [2:18:04<52:26,  6.70s/it]

Processed: 19291116_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1199/1668 [2:18:08<46:08,  5.90s/it]

Processed: 19280901_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1200/1668 [2:18:12<41:27,  5.31s/it]

Processed: 19480529_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1201/1668 [2:18:17<40:46,  5.24s/it]

Processed: 19441021_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1202/1668 [2:18:21<37:43,  4.86s/it]

Processed: 19450602_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1203/1668 [2:18:26<38:44,  5.00s/it]

Processed: 19341110_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1204/1668 [2:18:31<38:51,  5.03s/it]

Processed: 19451027_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1205/1668 [2:18:40<46:53,  6.08s/it]

Processed: 19251003_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1206/1668 [2:18:50<55:39,  7.23s/it]

Processed: 19390610_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1207/1668 [2:18:55<50:36,  6.59s/it]

Processed: 19510203_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1208/1668 [2:19:00<47:23,  6.18s/it]

Processed: 19441104_1.txt -> Saved to Drive


Processing files:  72%|███████▏  | 1209/1668 [2:19:05<43:46,  5.72s/it]

Processed: 19421010_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1210/1668 [2:19:10<42:11,  5.53s/it]

Processed: 19310523_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1211/1668 [2:19:14<39:18,  5.16s/it]

Processed: 19230317_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1212/1668 [2:19:20<39:49,  5.24s/it]

Processed: 19470405_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1213/1668 [2:19:27<45:00,  5.93s/it]

Processed: 19260130_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1214/1668 [2:19:33<44:40,  5.90s/it]

Processed: 19400420_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1215/1668 [2:19:39<46:03,  6.10s/it]

Processed: 19260320_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1216/1668 [2:19:45<45:31,  6.04s/it]

Processed: 19250516_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1217/1668 [2:19:53<49:19,  6.56s/it]

Processed: 19340331_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1218/1668 [2:19:58<44:31,  5.94s/it]

Processed: 19271224_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1219/1668 [2:20:02<40:43,  5.44s/it]

Processed: 19260213_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1220/1668 [2:20:06<37:17,  4.99s/it]

Processed: 19361107_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1221/1668 [2:20:11<37:21,  5.01s/it]

Processed: 19240503_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1222/1668 [2:20:17<40:18,  5.42s/it]

Processed: 19241206_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1223/1668 [2:20:23<40:54,  5.52s/it]

Processed: 19480508_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1224/1668 [2:20:29<41:32,  5.61s/it]

Processed: 19370410_1.txt -> Saved to Drive


Processing files:  73%|███████▎  | 1225/1668 [2:20:39<50:47,  6.88s/it]

Processed: 19290629_1.txt -> Saved to Drive


Processing files:  74%|███████▎  | 1226/1668 [2:20:45<49:56,  6.78s/it]

Processed: 19220429_1.txt -> Saved to Drive


Processing files:  74%|███████▎  | 1227/1668 [2:20:50<45:17,  6.16s/it]

Processed: 19410201_1.txt -> Saved to Drive


Processing files:  74%|███████▎  | 1228/1668 [2:20:55<43:20,  5.91s/it]

Processed: 19260821_1.txt -> Saved to Drive


Processing files:  74%|███████▎  | 1229/1668 [2:21:00<40:25,  5.52s/it]

Processed: 19351102_1.txt -> Saved to Drive


Processing files:  74%|███████▎  | 1230/1668 [2:22:37<4:00:07, 32.89s/it]

Processed: 19230127_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1231/1668 [2:22:41<2:58:11, 24.47s/it]

Processed: 19420117_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1232/1668 [2:22:45<2:12:10, 18.19s/it]

Processed: 19510901_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1233/1668 [2:22:50<1:42:52, 14.19s/it]

Processed: 19271217_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1234/1668 [2:22:55<1:21:53, 11.32s/it]

Processed: 19360919_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1235/1668 [2:23:00<1:09:19,  9.61s/it]

Processed: 19450120_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1236/1668 [2:23:03<55:09,  7.66s/it]  

Processed: 19311107_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1237/1668 [2:23:09<50:29,  7.03s/it]

Processed: 19341117_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1238/1668 [2:23:25<1:10:34,  9.85s/it]

Processed: 19261016_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1239/1668 [2:23:33<1:05:26,  9.15s/it]

Processed: 19441028_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1240/1668 [2:23:47<1:15:27, 10.58s/it]

Processed: 19360606_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1241/1668 [2:23:55<1:10:37,  9.92s/it]

Processed: 19460706_1.txt -> Saved to Drive


Processing files:  74%|███████▍  | 1242/1668 [2:24:01<1:02:25,  8.79s/it]

Processed: 19370213_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1243/1668 [2:24:07<54:53,  7.75s/it]  

Processed: 19250530_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1244/1668 [2:24:15<55:41,  7.88s/it]

Processed: 19470614_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1245/1668 [2:24:26<1:02:43,  8.90s/it]

Processed: 19440115_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1246/1668 [2:24:30<52:13,  7.43s/it]  

Processed: 19201023_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1247/1668 [2:24:39<55:06,  7.85s/it]

Processed: 19370508_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1248/1668 [2:24:43<47:54,  6.85s/it]

Processed: 19290518_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1249/1668 [2:26:22<4:01:05, 34.52s/it]

Processed: 19270122_1.txt -> Saved to Drive


Processing files:  75%|███████▍  | 1250/1668 [2:26:28<2:59:53, 25.82s/it]

Processed: 19400224_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1251/1668 [2:26:36<2:21:48, 20.40s/it]

Processed: 19490903_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1252/1668 [2:26:40<1:48:59, 15.72s/it]

Processed: 19340811_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1253/1668 [2:26:49<1:33:06, 13.46s/it]

Processed: 19341103_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1254/1668 [2:26:53<1:14:53, 10.85s/it]

Processed: 19241101_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1255/1668 [2:26:58<1:02:43,  9.11s/it]

Processed: 19430220_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1256/1668 [2:27:15<1:17:26, 11.28s/it]

Processed: 19470607_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1257/1668 [2:27:21<1:06:00,  9.64s/it]

Processed: 19291102_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1258/1668 [2:27:26<56:59,  8.34s/it]  

Processed: 19250307_1.txt -> Saved to Drive


Processing files:  75%|███████▌  | 1259/1668 [2:27:30<48:43,  7.15s/it]

Processed: 19350316_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1260/1668 [2:27:40<54:37,  8.03s/it]

Processed: 19320116_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1261/1668 [2:27:48<53:02,  7.82s/it]

Processed: 19440715_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1262/1668 [2:27:54<48:59,  7.24s/it]

Processed: 19491022_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1263/1668 [2:28:00<47:19,  7.01s/it]

Processed: 19450623_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1264/1668 [2:28:04<41:29,  6.16s/it]

Processed: 19500415_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1265/1668 [2:28:08<37:13,  5.54s/it]

Processed: 19400120_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1266/1668 [2:28:19<46:38,  6.96s/it]

Processed: 19461005_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1267/1668 [2:28:25<46:17,  6.93s/it]

Processed: 19360509_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1268/1668 [2:28:34<49:09,  7.37s/it]

Processed: 19260828_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1269/1668 [2:28:38<42:23,  6.37s/it]

Processed: 19441230_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1270/1668 [2:28:43<38:51,  5.86s/it]

Processed: 19290216_1.txt -> Saved to Drive


Processing files:  76%|███████▌  | 1271/1668 [2:28:48<37:26,  5.66s/it]

Processed: 19450310_1.txt -> Saved to Drive


Processing files:  76%|███████▋  | 1272/1668 [2:28:53<35:43,  5.41s/it]

Processed: 19270409_1.txt -> Saved to Drive


Processing files:  76%|███████▋  | 1273/1668 [2:29:08<55:55,  8.49s/it]

Processed: 19451215_1.txt -> Saved to Drive


Processing files:  76%|███████▋  | 1274/1668 [2:29:13<49:01,  7.47s/it]

Processed: 19200710_1.txt -> Saved to Drive


Processing files:  76%|███████▋  | 1275/1668 [2:29:20<46:16,  7.06s/it]

Processed: 19270924_1.txt -> Saved to Drive


Processing files:  76%|███████▋  | 1276/1668 [2:29:24<41:25,  6.34s/it]

Processed: 19410906_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1277/1668 [2:29:33<45:48,  7.03s/it]

Processed: 19260911_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1278/1668 [2:29:40<45:17,  6.97s/it]

Processed: 19410913_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1279/1668 [2:31:35<4:15:44, 39.44s/it]

Processed: 19381217_1.txt -> Saved to Drive
Progress saved at 2025-02-11 20:12:41 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  77%|███████▋  | 1280/1668 [2:31:39<3:05:58, 28.76s/it]

Processed: 19330128_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1281/1668 [2:31:45<2:21:51, 21.99s/it]

Processed: 19210409_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1282/1668 [2:31:51<1:51:13, 17.29s/it]

Processed: 19210716_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1283/1668 [2:31:56<1:27:24, 13.62s/it]

Processed: 19290706_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1284/1668 [2:32:01<1:09:29, 10.86s/it]

Processed: 19390722_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1285/1668 [2:32:04<55:29,  8.69s/it]  

Processed: 19290831_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1286/1668 [2:32:09<47:30,  7.46s/it]

Processed: 19471108_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1287/1668 [2:32:16<46:27,  7.32s/it]

Processed: 19470809_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1288/1668 [2:32:19<38:54,  6.14s/it]

Processed: 19200214_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1289/1668 [2:32:26<39:50,  6.31s/it]

Processed: 19270205_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1290/1668 [2:32:31<37:17,  5.92s/it]

Processed: 19250131_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1291/1668 [2:32:35<34:23,  5.47s/it]

Processed: 19490514_1.txt -> Saved to Drive


Processing files:  77%|███████▋  | 1292/1668 [2:32:42<36:26,  5.81s/it]

Processed: 19470510_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1293/1668 [2:32:50<39:27,  6.31s/it]

Processed: 19270212_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1294/1668 [2:32:54<35:31,  5.70s/it]

Processed: 19390805_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1295/1668 [2:33:05<46:02,  7.41s/it]

Processed: 19500520_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1296/1668 [2:33:11<42:14,  6.81s/it]

Processed: 19430605_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1297/1668 [2:33:15<36:51,  5.96s/it]

Processed: 19501028_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1298/1668 [2:33:19<33:03,  5.36s/it]

Processed: 19350119_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1299/1668 [2:33:42<1:06:48, 10.86s/it]

Processed: 19460105_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1300/1668 [2:33:48<56:22,  9.19s/it]  

Processed: 19200403_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1301/1668 [2:33:54<50:56,  8.33s/it]

Processed: 19310328_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1302/1668 [2:33:58<42:50,  7.02s/it]

Processed: 19270312_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1303/1668 [2:34:05<43:00,  7.07s/it]

Processed: 19421226_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1304/1668 [2:34:11<40:46,  6.72s/it]

Processed: 19230818_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1305/1668 [2:34:17<38:39,  6.39s/it]

Processed: 19430703_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1306/1668 [2:34:23<38:38,  6.41s/it]

Processed: 19500318_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1307/1668 [2:34:28<36:14,  6.02s/it]

Processed: 19200828_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1308/1668 [2:34:33<33:30,  5.59s/it]

Processed: 19311031_1.txt -> Saved to Drive


Processing files:  78%|███████▊  | 1309/1668 [2:34:37<31:05,  5.20s/it]

Processed: 19280505_1.txt -> Saved to Drive


Processing files:  79%|███████▊  | 1310/1668 [2:34:40<27:32,  4.62s/it]

Processed: 19281222_1.txt -> Saved to Drive


Processing files:  79%|███████▊  | 1311/1668 [2:34:49<35:10,  5.91s/it]

Processed: 19231229_1.txt -> Saved to Drive


Processing files:  79%|███████▊  | 1312/1668 [2:34:57<39:09,  6.60s/it]

Processed: 19440610_1.txt -> Saved to Drive


Processing files:  79%|███████▊  | 1313/1668 [2:35:03<37:12,  6.29s/it]

Processed: 19211022_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1314/1668 [2:35:07<33:37,  5.70s/it]

Processed: 19350720_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1315/1668 [2:35:16<38:55,  6.62s/it]

Processed: 19400316_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1316/1668 [2:35:22<38:03,  6.49s/it]

Processed: 19471227_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1317/1668 [2:35:27<35:42,  6.10s/it]

Processed: 19210709_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1318/1668 [2:35:42<51:20,  8.80s/it]

Processed: 19430925_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1319/1668 [2:35:47<42:59,  7.39s/it]

Processed: 19360215_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1320/1668 [2:35:52<40:06,  6.92s/it]

Processed: 19231201_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1321/1668 [2:35:58<37:16,  6.45s/it]

Processed: 19230120_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1322/1668 [2:36:02<33:48,  5.86s/it]

Processed: 19250221_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1323/1668 [2:36:07<31:33,  5.49s/it]

Processed: 19200605_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1324/1668 [2:36:14<34:00,  5.93s/it]

Processed: 19210326_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1325/1668 [2:36:21<35:33,  6.22s/it]

Processed: 19230324_1.txt -> Saved to Drive


Processing files:  79%|███████▉  | 1326/1668 [2:36:26<34:08,  5.99s/it]

Processed: 19310606_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1327/1668 [2:36:31<32:23,  5.70s/it]

Processed: 19231117_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1328/1668 [2:36:36<30:45,  5.43s/it]

Processed: 19500513_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1329/1668 [2:36:46<37:52,  6.70s/it]

Processed: 19300927_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1330/1668 [2:36:50<33:04,  5.87s/it]

Processed: 19360815_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1331/1668 [2:36:54<29:45,  5.30s/it]

Processed: 19390909_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1332/1668 [2:37:00<31:31,  5.63s/it]

Processed: 19470517_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1333/1668 [2:37:05<30:55,  5.54s/it]

Processed: 19340127_1.txt -> Saved to Drive


Processing files:  80%|███████▉  | 1334/1668 [2:37:12<32:35,  5.86s/it]

Processed: 19270226_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1335/1668 [2:37:17<31:04,  5.60s/it]

Processed: 19250103_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1336/1668 [2:37:25<34:54,  6.31s/it]

Processed: 19501209_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1337/1668 [2:37:57<1:18:04, 14.15s/it]

Processed: 19461221_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1338/1668 [2:38:04<1:06:17, 12.05s/it]

Processed: 19310516_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1339/1668 [2:38:09<53:49,  9.82s/it]  

Processed: 19330923_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1340/1668 [2:38:13<44:36,  8.16s/it]

Processed: 19240315_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1341/1668 [2:38:18<39:17,  7.21s/it]

Processed: 19490813_1.txt -> Saved to Drive


Processing files:  80%|████████  | 1342/1668 [2:38:23<35:18,  6.50s/it]

Processed: 19260612_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1343/1668 [2:38:30<35:20,  6.52s/it]

Processed: 19470208_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1344/1668 [2:38:35<32:30,  6.02s/it]

Processed: 19381008_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1345/1668 [2:38:39<29:11,  5.42s/it]

Processed: 19431009_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1346/1668 [2:38:44<28:55,  5.39s/it]

Processed: 19410614_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1347/1668 [2:38:49<28:08,  5.26s/it]

Processed: 19431127_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1348/1668 [2:38:54<27:26,  5.15s/it]

Processed: 19500701_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1349/1668 [2:39:02<32:15,  6.07s/it]

Processed: 19230217_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1350/1668 [2:39:05<27:59,  5.28s/it]

Processed: 19361121_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1351/1668 [2:39:11<28:30,  5.40s/it]

Processed: 19300222_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1352/1668 [2:39:14<24:05,  4.58s/it]

Processed: 19430911_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1353/1668 [2:39:18<23:11,  4.42s/it]

Processed: 19510609_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1354/1668 [2:39:22<23:05,  4.41s/it]

Processed: 19500311_1.txt -> Saved to Drive


Processing files:  81%|████████  | 1355/1668 [2:39:27<24:02,  4.61s/it]

Processed: 19440819_1.txt -> Saved to Drive


Processing files:  81%|████████▏ | 1356/1668 [2:39:32<23:22,  4.49s/it]

Processed: 19510728_1.txt -> Saved to Drive


Processing files:  81%|████████▏ | 1357/1668 [2:39:37<24:24,  4.71s/it]

Processed: 19481106_1.txt -> Saved to Drive


Processing files:  81%|████████▏ | 1358/1668 [2:39:41<23:57,  4.64s/it]

Processed: 19471101_1.txt -> Saved to Drive


Processing files:  81%|████████▏ | 1359/1668 [2:39:45<22:31,  4.38s/it]

Processed: 19280331_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1360/1668 [2:39:49<21:37,  4.21s/it]

Processed: 19330812_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1361/1668 [2:39:53<21:22,  4.18s/it]

Processed: 19320903_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1362/1668 [2:40:05<32:52,  6.45s/it]

Processed: 19260605_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1363/1668 [2:40:08<28:14,  5.56s/it]

Processed: 19500923_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1364/1668 [2:40:16<31:25,  6.20s/it]

Processed: 19420214_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1365/1668 [2:40:20<28:39,  5.68s/it]

Processed: 19450224_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1366/1668 [2:40:30<34:01,  6.76s/it]

Processed: 19470823_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1367/1668 [2:40:35<31:14,  6.23s/it]

Processed: 19490507_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1368/1668 [2:40:38<27:44,  5.55s/it]

Processed: 19300111_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1369/1668 [2:40:43<26:45,  5.37s/it]

Processed: 19300315_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1370/1668 [2:40:51<29:54,  6.02s/it]

Processed: 19220218_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1371/1668 [2:40:56<28:24,  5.74s/it]

Processed: 19501118_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1372/1668 [2:41:02<28:28,  5.77s/it]

Processed: 19211008_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1373/1668 [2:41:06<25:53,  5.27s/it]

Processed: 19500909_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1374/1668 [2:41:11<25:54,  5.29s/it]

Processed: 19430904_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1375/1668 [2:41:17<26:52,  5.50s/it]

Processed: 19310801_1.txt -> Saved to Drive


Processing files:  82%|████████▏ | 1376/1668 [2:41:22<25:28,  5.23s/it]

Processed: 19201218_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1377/1668 [2:41:25<22:46,  4.70s/it]

Processed: 19400601_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1378/1668 [2:41:30<23:05,  4.78s/it]

Processed: 19420221_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1379/1668 [2:41:34<22:03,  4.58s/it]

Processed: 19240126_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1380/1668 [2:41:38<20:32,  4.28s/it]

Processed: 19360125_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1381/1668 [2:41:43<21:51,  4.57s/it]

Processed: 19270402_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1382/1668 [2:41:49<22:40,  4.76s/it]

Processed: 19450407_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1383/1668 [2:41:53<22:54,  4.82s/it]

Processed: 19510421_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1384/1668 [2:41:58<22:24,  4.74s/it]

Processed: 19460629_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1385/1668 [2:42:05<25:44,  5.46s/it]

Processed: 19400302_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1386/1668 [2:42:09<23:38,  5.03s/it]

Processed: 19421024_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1387/1668 [2:42:13<22:08,  4.73s/it]

Processed: 19321217_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1388/1668 [2:42:19<23:48,  5.10s/it]

Processed: 19260724_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1389/1668 [2:42:25<24:59,  5.37s/it]

Processed: 19200703_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1390/1668 [2:42:31<24:59,  5.39s/it]

Processed: 19471018_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1391/1668 [2:42:35<23:17,  5.05s/it]

Processed: 19231006_1.txt -> Saved to Drive


Processing files:  83%|████████▎ | 1392/1668 [2:42:38<20:27,  4.45s/it]

Processed: 19380917_1.txt -> Saved to Drive


Processing files:  84%|████████▎ | 1393/1668 [2:42:46<25:29,  5.56s/it]

Processed: 19430403_1.txt -> Saved to Drive


Processing files:  84%|████████▎ | 1394/1668 [2:42:51<24:25,  5.35s/it]

Processed: 19420606_1.txt -> Saved to Drive


Processing files:  84%|████████▎ | 1395/1668 [2:42:55<23:12,  5.10s/it]

Processed: 19341027_1.txt -> Saved to Drive


Processing files:  84%|████████▎ | 1396/1668 [2:43:00<22:26,  4.95s/it]

Processed: 19470906_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1397/1668 [2:43:05<22:42,  5.03s/it]

Processed: 19280428_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1398/1668 [2:43:13<26:00,  5.78s/it]

Processed: 19230519_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1399/1668 [2:43:18<25:48,  5.76s/it]

Processed: 19481120_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1400/1668 [2:43:22<23:20,  5.22s/it]

Processed: 19300906_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1401/1668 [2:43:33<30:09,  6.78s/it]

Processed: 19250620_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1402/1668 [2:43:38<27:24,  6.18s/it]

Processed: 19430724_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1403/1668 [2:43:44<27:09,  6.15s/it]

Processed: 19471220_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1404/1668 [2:43:50<26:41,  6.07s/it]

Processed: 19231124_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1405/1668 [2:43:54<23:52,  5.45s/it]

Processed: 19430102_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1406/1668 [2:43:57<21:35,  4.95s/it]

Processed: 19290622_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1407/1668 [2:44:01<20:02,  4.61s/it]

Processed: 19360808_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1408/1668 [2:44:05<19:07,  4.41s/it]

Processed: 19340505_1.txt -> Saved to Drive


Processing files:  84%|████████▍ | 1409/1668 [2:44:11<21:05,  4.89s/it]

Processed: 19230630_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1410/1668 [2:44:19<24:40,  5.74s/it]

Processed: 19250117_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1411/1668 [2:44:23<22:53,  5.35s/it]

Processed: 19481002_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1412/1668 [2:44:27<21:11,  4.97s/it]

Processed: 19470412_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1413/1668 [2:44:31<19:02,  4.48s/it]

Processed: 19520412_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1414/1668 [2:44:34<17:48,  4.21s/it]

Processed: 19200110_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1415/1668 [2:44:38<17:40,  4.19s/it]

Processed: 19320319_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1416/1668 [2:44:43<18:26,  4.39s/it]

Processed: 19480117_1.txt -> Saved to Drive


Processing files:  85%|████████▍ | 1417/1668 [2:44:48<18:53,  4.52s/it]

Processed: 19300712_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1418/1668 [2:44:54<19:57,  4.79s/it]

Processed: 19270430_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1419/1668 [2:44:59<20:18,  4.89s/it]

Processed: 19330930_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1420/1668 [2:45:10<27:34,  6.67s/it]

Processed: 19520614_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1421/1668 [2:45:17<28:43,  6.98s/it]

Processed: 19321001_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1422/1668 [2:45:25<30:09,  7.36s/it]

Processed: 19411011_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1423/1668 [2:45:31<28:09,  6.89s/it]

Processed: 19510428_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1424/1668 [2:45:36<25:47,  6.34s/it]

Processed: 19310110_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1425/1668 [2:45:41<23:53,  5.90s/it]

Processed: 19321231_1.txt -> Saved to Drive


Processing files:  85%|████████▌ | 1426/1668 [2:45:48<25:02,  6.21s/it]

Processed: 19250207_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1427/1668 [2:45:52<22:13,  5.53s/it]

Processed: 19371127_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1428/1668 [2:45:59<24:04,  6.02s/it]

Processed: 19330916_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1429/1668 [2:46:00<17:54,  4.50s/it]

Processed: 19380101_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1430/1668 [2:46:06<18:55,  4.77s/it]

Processed: 19490820_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1431/1668 [2:46:10<18:57,  4.80s/it]

Processed: 19500304_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1432/1668 [2:46:17<20:24,  5.19s/it]

Processed: 19421114_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1433/1668 [2:46:22<20:05,  5.13s/it]

Processed: 19440219_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1434/1668 [2:46:26<19:04,  4.89s/it]

Processed: 19430410_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1435/1668 [2:46:29<17:09,  4.42s/it]

Processed: 19350803_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1436/1668 [2:46:33<16:44,  4.33s/it]

Processed: 19321112_1.txt -> Saved to Drive


Processing files:  86%|████████▌ | 1437/1668 [2:46:38<17:17,  4.49s/it]

Processed: 19451124_1.txt -> Saved to Drive
Progress saved at 2025-02-11 20:27:44 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  86%|████████▌ | 1438/1668 [2:46:44<18:39,  4.87s/it]

Processed: 19460601_1.txt -> Saved to Drive


Processing files:  86%|████████▋ | 1439/1668 [2:46:49<19:09,  5.02s/it]

Processed: 19380402_1.txt -> Saved to Drive


Processing files:  86%|████████▋ | 1440/1668 [2:46:57<21:58,  5.78s/it]

Processed: 19381001_1.txt -> Saved to Drive


Processing files:  86%|████████▋ | 1441/1668 [2:47:07<26:39,  7.05s/it]

Processed: 19400803_1.txt -> Saved to Drive


Processing files:  86%|████████▋ | 1442/1668 [2:47:11<23:06,  6.13s/it]

Processed: 19220211_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1443/1668 [2:47:20<26:00,  6.94s/it]

Processed: 19300628_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1444/1668 [2:47:27<26:24,  7.07s/it]

Processed: 19450908_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1445/1668 [2:47:35<27:08,  7.30s/it]

Processed: 19491210_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1446/1668 [2:47:41<25:52,  6.99s/it]

Processed: 19440805_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1447/1668 [2:47:46<23:27,  6.37s/it]

Processed: 19410412_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1448/1668 [2:47:51<21:39,  5.91s/it]

Processed: 19390311_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1449/1668 [2:47:55<19:11,  5.26s/it]

Processed: 19360201_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1450/1668 [2:48:03<22:43,  6.26s/it]

Processed: 19410705_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1451/1668 [2:48:09<21:39,  5.99s/it]

Processed: 19480320_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1452/1668 [2:48:14<21:15,  5.90s/it]

Processed: 19290126_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1453/1668 [2:48:21<22:18,  6.23s/it]

Processed: 19381203_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1454/1668 [2:48:25<19:42,  5.53s/it]

Processed: 19310404_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1455/1668 [2:48:29<18:00,  5.07s/it]

Processed: 19320820_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1456/1668 [2:48:34<17:52,  5.06s/it]

Processed: 19480619_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1457/1668 [2:48:39<17:28,  4.97s/it]

Processed: 19490910_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1458/1668 [2:48:43<16:32,  4.73s/it]

Processed: 19520322_1.txt -> Saved to Drive


Processing files:  87%|████████▋ | 1459/1668 [2:48:51<19:41,  5.65s/it]

Processed: 19370828_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1460/1668 [2:48:55<17:49,  5.14s/it]

Processed: 19380205_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1461/1668 [2:49:01<18:16,  5.30s/it]

Processed: 19390701_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1462/1668 [2:49:06<18:46,  5.47s/it]

Processed: 19250606_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1463/1668 [2:49:12<18:30,  5.42s/it]

Processed: 19450512_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1464/1668 [2:49:17<18:40,  5.49s/it]

Processed: 19310321_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1465/1668 [2:49:23<18:31,  5.48s/it]

Processed: 19210917_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1466/1668 [2:49:28<17:59,  5.34s/it]

Processed: 19360613_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1467/1668 [2:49:34<18:29,  5.52s/it]

Processed: 19221028_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1468/1668 [2:49:39<18:13,  5.47s/it]

Processed: 19350525_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1469/1668 [2:49:45<18:30,  5.58s/it]

Processed: 19220506_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1470/1668 [2:49:52<19:51,  6.02s/it]

Processed: 19520510_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1471/1668 [2:50:01<22:38,  6.90s/it]

Processed: 19250829_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1472/1668 [2:50:06<20:35,  6.30s/it]

Processed: 19391230_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1473/1668 [2:50:35<43:07, 13.27s/it]

Processed: 19321105_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1474/1668 [2:50:40<34:49, 10.77s/it]

Processed: 19390603_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1475/1668 [2:50:46<29:49,  9.27s/it]

Processed: 19260626_1.txt -> Saved to Drive


Processing files:  88%|████████▊ | 1476/1668 [2:50:53<27:43,  8.67s/it]

Processed: 19440930_1.txt -> Saved to Drive


Processing files:  89%|████████▊ | 1477/1668 [2:51:01<26:15,  8.25s/it]

Processed: 19290323_1.txt -> Saved to Drive


Processing files:  89%|████████▊ | 1478/1668 [2:51:06<23:21,  7.38s/it]

Processed: 19250627_1.txt -> Saved to Drive


Processing files:  89%|████████▊ | 1479/1668 [2:51:12<21:55,  6.96s/it]

Processed: 19410111_1.txt -> Saved to Drive


Processing files:  89%|████████▊ | 1480/1668 [2:51:17<19:43,  6.30s/it]

Processed: 19370904_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1481/1668 [2:51:21<17:23,  5.58s/it]

Processed: 19351116_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1482/1668 [2:51:32<23:06,  7.45s/it]

Processed: 19391216_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1483/1668 [2:51:36<19:44,  6.40s/it]

Processed: 19360411_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1484/1668 [2:51:42<18:28,  6.02s/it]

Processed: 19370814_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1485/1668 [2:51:47<17:24,  5.71s/it]

Processed: 19230721_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1486/1668 [2:51:54<19:00,  6.27s/it]

Processed: 19460427_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1487/1668 [2:51:59<17:51,  5.92s/it]

Processed: 19380319_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1488/1668 [2:52:04<16:49,  5.61s/it]

Processed: 19290309_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1489/1668 [2:52:09<16:17,  5.46s/it]

Processed: 19410308_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1490/1668 [2:52:17<18:17,  6.17s/it]

Processed: 19470531_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1491/1668 [2:52:29<23:20,  7.91s/it]

Processed: 19400810_1.txt -> Saved to Drive


Processing files:  89%|████████▉ | 1492/1668 [2:52:41<26:30,  9.04s/it]

Processed: 19460720_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1493/1668 [2:52:49<25:20,  8.69s/it]

Processed: 19211105_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1494/1668 [2:52:55<23:07,  7.97s/it]

Processed: 19360404_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1495/1668 [2:53:01<21:37,  7.50s/it]

Processed: 19370717_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1496/1668 [2:53:09<21:54,  7.64s/it]

Processed: 19260731_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1497/1668 [2:53:14<19:25,  6.81s/it]

Processed: 19300920_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1498/1668 [2:53:21<19:46,  6.98s/it]

Processed: 19270813_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1499/1668 [2:53:26<17:34,  6.24s/it]

Processed: 19300301_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1500/1668 [2:53:31<16:17,  5.82s/it]

Processed: 19340106_1.txt -> Saved to Drive


Processing files:  90%|████████▉ | 1501/1668 [2:53:36<15:41,  5.64s/it]

Processed: 19310502_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1502/1668 [2:53:43<17:00,  6.15s/it]

Processed: 19301227_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1503/1668 [2:53:48<15:38,  5.69s/it]

Processed: 19271105_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1504/1668 [2:53:53<15:08,  5.54s/it]

Processed: 19371113_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1505/1668 [2:53:59<15:29,  5.70s/it]

Processed: 19251205_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1506/1668 [2:54:08<18:13,  6.75s/it]

Processed: 19210507_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1507/1668 [2:54:14<16:46,  6.25s/it]

Processed: 19381105_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1508/1668 [2:54:17<14:46,  5.54s/it]

Processed: 19490806_1.txt -> Saved to Drive


Processing files:  90%|█████████ | 1509/1668 [2:54:21<13:30,  5.10s/it]

Processed: 19280922_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1510/1668 [2:54:32<17:43,  6.73s/it]

Processed: 19490924_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1511/1668 [2:54:39<17:41,  6.76s/it]

Processed: 19280421_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1512/1668 [2:54:44<16:00,  6.16s/it]

Processed: 19400511_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1513/1668 [2:54:48<14:16,  5.53s/it]

Processed: 19210430_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1514/1668 [2:54:55<15:48,  6.16s/it]

Processed: 19520531_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1515/1668 [2:55:01<15:30,  6.08s/it]

Processed: 19220107_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1516/1668 [2:55:14<20:49,  8.22s/it]

Processed: 19460302_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1517/1668 [2:55:18<17:12,  6.84s/it]

Processed: 19230915_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1518/1668 [2:55:24<16:37,  6.65s/it]

Processed: 19270618_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1519/1668 [2:55:29<15:07,  6.09s/it]

Processed: 19350824_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1520/1668 [2:55:43<21:13,  8.60s/it]

Processed: 19430515_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1521/1668 [2:55:49<18:45,  7.66s/it]

Processed: 19490528_1.txt -> Saved to Drive


Processing files:  91%|█████████ | 1522/1668 [2:55:54<16:22,  6.73s/it]

Processed: 19450203_1.txt -> Saved to Drive


Processing files:  91%|█████████▏| 1523/1668 [2:55:59<15:04,  6.24s/it]

Processed: 19291019_1.txt -> Saved to Drive


Processing files:  91%|█████████▏| 1524/1668 [2:56:04<14:36,  6.09s/it]

Processed: 19370731_1.txt -> Saved to Drive


Processing files:  91%|█████████▏| 1525/1668 [2:56:09<13:29,  5.66s/it]

Processed: 19330429_1.txt -> Saved to Drive


Processing files:  91%|█████████▏| 1526/1668 [2:56:13<12:26,  5.25s/it]

Processed: 19210226_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1527/1668 [2:56:16<10:48,  4.60s/it]

Processed: 19360111_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1528/1668 [2:56:21<10:41,  4.58s/it]

Processed: 19311010_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1529/1668 [2:56:25<10:26,  4.50s/it]

Processed: 19410816_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1530/1668 [2:56:30<10:46,  4.68s/it]

Processed: 19310509_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1531/1668 [2:56:35<10:43,  4.70s/it]

Processed: 19230505_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1532/1668 [2:56:42<12:22,  5.46s/it]

Processed: 19341020_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1533/1668 [2:56:46<11:15,  5.01s/it]

Processed: 19200424_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1534/1668 [2:56:51<11:12,  5.02s/it]

Processed: 19420905_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1535/1668 [2:56:55<10:16,  4.63s/it]

Processed: 19511117_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1536/1668 [2:57:01<11:17,  5.14s/it]

Processed: 19370320_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1537/1668 [2:57:06<10:39,  4.88s/it]

Processed: 19311226_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1538/1668 [2:57:12<11:35,  5.35s/it]

Processed: 19490108_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1539/1668 [2:57:18<11:54,  5.54s/it]

Processed: 19520301_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1540/1668 [2:57:25<12:47,  6.00s/it]

Processed: 19241004_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1541/1668 [2:57:30<12:15,  5.79s/it]

Processed: 19440122_1.txt -> Saved to Drive


Processing files:  92%|█████████▏| 1542/1668 [2:57:36<12:01,  5.73s/it]

Processed: 19390304_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1543/1668 [2:57:41<11:25,  5.49s/it]

Processed: 19440624_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1544/1668 [2:57:45<10:30,  5.09s/it]

Processed: 19210521_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1545/1668 [2:57:49<09:45,  4.76s/it]

Processed: 19310627_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1546/1668 [2:57:55<10:18,  5.07s/it]

Processed: 19291221_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1547/1668 [2:58:03<12:11,  6.05s/it]

Processed: 19500429_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1548/1668 [2:58:08<11:13,  5.62s/it]

Processed: 19220805_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1549/1668 [2:58:17<13:27,  6.79s/it]

Processed: 19220311_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1550/1668 [2:58:22<11:56,  6.07s/it]

Processed: 19460824_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1551/1668 [2:58:27<11:11,  5.74s/it]

Processed: 19280317_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1552/1668 [2:58:35<12:37,  6.53s/it]

Processed: 19461019_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1553/1668 [2:58:42<12:47,  6.67s/it]

Processed: 19441111_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1554/1668 [2:58:47<11:34,  6.09s/it]

Processed: 19401109_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1555/1668 [2:58:53<11:37,  6.18s/it]

Processed: 19240419_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1556/1668 [2:59:00<11:55,  6.39s/it]

Processed: 19240517_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1557/1668 [2:59:05<11:02,  5.97s/it]

Processed: 19500812_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1558/1668 [2:59:09<09:38,  5.26s/it]

Processed: 19390617_1.txt -> Saved to Drive


Processing files:  93%|█████████▎| 1559/1668 [2:59:12<08:30,  4.68s/it]

Processed: 19200131_1.txt -> Saved to Drive


Processing files:  94%|█████████▎| 1560/1668 [2:59:20<10:06,  5.62s/it]

Processed: 19340901_1.txt -> Saved to Drive


Processing files:  94%|█████████▎| 1561/1668 [2:59:25<10:01,  5.62s/it]

Processed: 19330624_1.txt -> Saved to Drive


Processing files:  94%|█████████▎| 1562/1668 [2:59:30<09:19,  5.28s/it]

Processed: 19330610_1.txt -> Saved to Drive


Processing files:  94%|█████████▎| 1563/1668 [2:59:37<10:02,  5.73s/it]

Processed: 19511110_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1564/1668 [2:59:44<10:49,  6.25s/it]

Processed: 19480131_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1565/1668 [2:59:56<13:23,  7.80s/it]

Processed: 19440108_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1566/1668 [3:00:16<19:27, 11.44s/it]

Processed: 19520119_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1567/1668 [3:00:22<16:51, 10.02s/it]

Processed: 19491001_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1568/1668 [3:00:27<13:51,  8.32s/it]

Processed: 19311219_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1569/1668 [3:00:31<12:00,  7.28s/it]

Processed: 19220624_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1570/1668 [3:00:36<10:42,  6.55s/it]

Processed: 19390624_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1571/1668 [3:00:42<10:06,  6.25s/it]

Processed: 19431023_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1572/1668 [3:00:46<09:01,  5.64s/it]

Processed: 19340714_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1573/1668 [3:00:50<08:05,  5.12s/it]

Processed: 19251219_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1574/1668 [3:00:58<09:29,  6.06s/it]

Processed: 19411129_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1575/1668 [3:01:01<08:01,  5.18s/it]

Processed: 19281201_1.txt -> Saved to Drive


Processing files:  94%|█████████▍| 1576/1668 [3:01:14<11:22,  7.42s/it]

Processed: 19471025_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1577/1668 [3:01:18<09:49,  6.48s/it]

Processed: 19420718_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1578/1668 [3:01:27<10:35,  7.06s/it]

Processed: 19261030_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1579/1668 [3:01:32<09:38,  6.50s/it]

Processed: 19390708_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1580/1668 [3:01:37<08:46,  5.98s/it]

Processed: 19440527_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1581/1668 [3:01:41<07:51,  5.41s/it]

Processed: 19321210_1.txt -> Saved to Drive
Progress saved at 2025-02-11 20:42:47 to /content/drive/MyDrive/cleaned_articles1/all_docs.pkl


Processing files:  95%|█████████▍| 1582/1668 [3:01:47<08:19,  5.81s/it]

Processed: 19290928_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1583/1668 [3:01:55<08:55,  6.30s/it]

Processed: 19450915_1.txt -> Saved to Drive


Processing files:  95%|█████████▍| 1584/1668 [3:01:59<07:52,  5.62s/it]

Processed: 19281103_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1585/1668 [3:02:03<07:06,  5.14s/it]

Processed: 19251114_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1586/1668 [3:02:07<06:42,  4.90s/it]

Processed: 19490312_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1587/1668 [3:02:12<06:24,  4.74s/it]

Processed: 19420425_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1588/1668 [3:02:18<06:49,  5.12s/it]

Processed: 19451222_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1589/1668 [3:02:23<06:38,  5.04s/it]

Processed: 19401026_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1590/1668 [3:02:28<06:38,  5.11s/it]

Processed: 19500624_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1591/1668 [3:02:35<07:33,  5.89s/it]

Processed: 19400608_1.txt -> Saved to Drive


Processing files:  95%|█████████▌| 1592/1668 [3:02:43<08:15,  6.52s/it]

Processed: 19480612_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1593/1668 [3:02:50<08:03,  6.45s/it]

Processed: 19290727_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1594/1668 [3:02:53<06:56,  5.62s/it]

Processed: 19440401_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1595/1668 [3:02:59<06:41,  5.50s/it]

Processed: 19430814_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1596/1668 [3:03:04<06:30,  5.42s/it]

Processed: 19330520_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1597/1668 [3:03:11<06:56,  5.87s/it]

Processed: 19260918_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1598/1668 [3:03:16<06:31,  5.59s/it]

Processed: 19281110_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1599/1668 [3:03:21<06:11,  5.38s/it]

Processed: 19240809_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1600/1668 [3:03:25<05:42,  5.03s/it]

Processed: 19260501_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1601/1668 [3:03:30<05:29,  4.92s/it]

Processed: 19520426_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1602/1668 [3:03:35<05:39,  5.15s/it]

Processed: 19410628_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1603/1668 [3:03:39<05:08,  4.75s/it]

Processed: 19520223_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1604/1668 [3:03:43<04:54,  4.60s/it]

Processed: 19210813_1.txt -> Saved to Drive


Processing files:  96%|█████████▌| 1605/1668 [3:03:50<05:22,  5.12s/it]

Processed: 19480821_1.txt -> Saved to Drive


Processing files:  96%|█████████▋| 1606/1668 [3:03:59<06:45,  6.54s/it]

Processed: 19431204_1.txt -> Saved to Drive


Processing files:  96%|█████████▋| 1607/1668 [3:04:07<06:50,  6.73s/it]

Processed: 19370619_1.txt -> Saved to Drive


Processing files:  96%|█████████▋| 1608/1668 [3:04:13<06:35,  6.59s/it]

Processed: 19490827_1.txt -> Saved to Drive


Processing files:  96%|█████████▋| 1609/1668 [3:04:18<05:57,  6.06s/it]

Processed: 19331209_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1610/1668 [3:04:23<05:38,  5.84s/it]

Processed: 19480807_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1611/1668 [3:04:27<05:06,  5.38s/it]

Processed: 19211217_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1612/1668 [3:04:32<04:50,  5.19s/it]

Processed: 19381231_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1613/1668 [3:04:37<04:38,  5.07s/it]

Processed: 19210219_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1614/1668 [3:04:45<05:18,  5.90s/it]

Processed: 19361003_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1615/1668 [3:04:51<05:22,  6.08s/it]

Processed: 19450804_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1616/1668 [3:04:56<04:49,  5.56s/it]

Processed: 19240628_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1617/1668 [3:05:04<05:27,  6.43s/it]

Processed: 19220923_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1618/1668 [3:05:08<04:51,  5.83s/it]

Processed: 19300125_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1619/1668 [3:05:16<05:07,  6.29s/it]

Processed: 19241108_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1620/1668 [3:05:22<04:57,  6.20s/it]

Processed: 19351123_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1621/1668 [3:05:31<05:32,  7.08s/it]

Processed: 19300104_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1622/1668 [3:05:37<05:13,  6.82s/it]

Processed: 19241018_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1623/1668 [3:05:42<04:36,  6.14s/it]

Processed: 19401005_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1624/1668 [3:05:49<04:44,  6.46s/it]

Processed: 19441007_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1625/1668 [3:05:52<03:59,  5.58s/it]

Processed: 19290316_1.txt -> Saved to Drive


Processing files:  97%|█████████▋| 1626/1668 [3:05:59<04:05,  5.85s/it]

Processed: 19480306_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1627/1668 [3:06:03<03:40,  5.37s/it]

Processed: 19510602_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1628/1668 [3:06:10<03:50,  5.76s/it]

Processed: 19400921_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1629/1668 [3:06:15<03:35,  5.52s/it]

Processed: 19440311_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1630/1668 [3:06:21<03:35,  5.68s/it]

Processed: 19210827_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1631/1668 [3:06:29<03:56,  6.38s/it]

Processed: 19240202_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1632/1668 [3:06:33<03:25,  5.70s/it]

Processed: 19200501_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1633/1668 [3:06:39<03:17,  5.65s/it]

Processed: 19340804_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1634/1668 [3:06:43<03:02,  5.36s/it]

Processed: 19211210_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1635/1668 [3:06:48<02:52,  5.23s/it]

Processed: 19260227_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1636/1668 [3:06:53<02:40,  5.01s/it]

Processed: 19260313_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1637/1668 [3:06:57<02:33,  4.96s/it]

Processed: 19290112_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1638/1668 [3:07:02<02:27,  4.93s/it]

Processed: 19280623_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1639/1668 [3:07:08<02:27,  5.08s/it]

Processed: 19270507_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1640/1668 [3:07:12<02:17,  4.89s/it]

Processed: 19330318_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1641/1668 [3:07:19<02:25,  5.40s/it]

Processed: 19390408_1.txt -> Saved to Drive


Processing files:  98%|█████████▊| 1642/1668 [3:07:27<02:45,  6.37s/it]

Processed: 19480221_1.txt -> Saved to Drive


Processing files:  99%|█████████▊| 1643/1668 [3:07:32<02:27,  5.90s/it]

Processed: 19210730_1.txt -> Saved to Drive


Processing files:  99%|█████████▊| 1644/1668 [3:07:38<02:17,  5.74s/it]

Processed: 19481127_1.txt -> Saved to Drive


Processing files:  99%|█████████▊| 1645/1668 [3:07:43<02:07,  5.54s/it]

Processed: 19370807_1.txt -> Saved to Drive


Processing files:  99%|█████████▊| 1646/1668 [3:07:47<01:52,  5.12s/it]

Processed: 19351019_1.txt -> Saved to Drive


Processing files:  99%|█████████▊| 1647/1668 [3:07:51<01:40,  4.77s/it]

Processed: 19260410_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1648/1668 [3:07:57<01:45,  5.29s/it]

Processed: 19400720_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1649/1668 [3:08:01<01:29,  4.74s/it]

Processed: 19340908_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1650/1668 [3:08:11<01:56,  6.46s/it]

Processed: 19230728_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1651/1668 [3:08:16<01:41,  5.96s/it]

Processed: 19281027_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1652/1668 [3:08:20<01:26,  5.39s/it]

Processed: 19360523_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1653/1668 [3:08:25<01:19,  5.27s/it]

Processed: 19320827_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1654/1668 [3:08:30<01:12,  5.20s/it]

Processed: 19300607_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1655/1668 [3:08:39<01:22,  6.33s/it]

Processed: 19400629_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1656/1668 [3:08:44<01:11,  5.92s/it]

Processed: 19300830_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1657/1668 [3:08:50<01:06,  6.07s/it]

Processed: 19370911_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1658/1668 [3:08:56<00:58,  5.82s/it]

Processed: 19460907_1.txt -> Saved to Drive


Processing files:  99%|█████████▉| 1659/1668 [3:09:01<00:49,  5.55s/it]

Processed: 19300322_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1660/1668 [3:09:06<00:43,  5.38s/it]

Processed: 19440826_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1661/1668 [3:09:12<00:38,  5.54s/it]

Processed: 19310131_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1662/1668 [3:09:17<00:32,  5.42s/it]

Processed: 19430717_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1663/1668 [3:09:25<00:31,  6.27s/it]

Processed: 19430213_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1664/1668 [3:09:31<00:24,  6.13s/it]

Processed: 19490115_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1665/1668 [3:09:37<00:18,  6.13s/it]

Processed: 19221104_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1666/1668 [3:09:41<00:11,  5.54s/it]

Processed: 19280303_1.txt -> Saved to Drive


Processing files: 100%|█████████▉| 1667/1668 [3:09:45<00:05,  5.19s/it]

Processed: 19360208_1.txt -> Saved to Drive


Processing files: 100%|██████████| 1668/1668 [3:10:02<00:00,  6.84s/it]

Processed: 19460209_1.txt -> Saved to Drive





In [None]:
## ========== Chroma ==========
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

sentence_transformer_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
print("Initialized SentenceTransformer embeddings.")

# Load all documents into Chroma
db = Chroma.from_documents(all_docs, sentence_transformer_embeddings, persist_directory="./chroma_db_clean_huggingface")
print('All documents loaded and embedded.(huggingface)')

In [None]:
## trying to import the thing in my head

In [35]:
all_docs

[Document(metadata={'filename': '19220527_1.txt', 'date': 'May 27, 1922', 'locations': ['San Juan', 'Caguas', 'Arecibo']}, page_content="In the office of the Free Federation, we found Senator and socialist leader Santiago Iglesias, with whom we discussed various economic and political issues. He praised the Worker Indemnity Commission, referring to it as one of the laws with the most humanitarian spirit. Iglesias mentioned that critics claimed 86% of the Commission's income goes to salaries, while only 14% is for worker indemnities; he deemed this assertion exaggerated and called for a clear report on the Commission's finances. He also expressed concern that shipping companies are not contributing to the Commission, arguing that since Puerto Rico is not incorporated into the United States, local laws should apply. Iglesias urged lawyers to address this matter in court so that dock workers receive the benefits of the law. We hope to interview the Commission's president for his perspecti