In [None]:
import tqdm

file_path = "ol_dump_editions_2024-12-19.txt"  # Update with actual file path

# Estimate total rows for progress tracking
print("Estimating total rows...")
with open(file_path, "r", encoding="utf-8") as f:
    total_rows = sum(1 for _ in f)

print(f"Total estimated rows: {total_rows:,}")


In [28]:
import duckdb
import json
import pandas as pd

# Path to your large Open Library dump file
file_path = "ol_dump_2025-01-08.txt"  # Update with actual path
# select rows with type /type/edition

# Connect to DuckDB in memory
conn = duckdb.connect()

# Extract a 10-row sample
sample_query = f"""
SELECT * FROM read_csv_auto('{file_path}', delim='\t', header=False, columns={{
    'type': 'TEXT', 'book_id': 'TEXT', 'revision': 'INTEGER', 'timestamp': 'TEXT', 'json_data': 'TEXT'
}})
WHERE type = '/type/edition'
LIMIT 10
"""

# Fetch the sample data
sample_df = conn.execute(sample_query).fetchdf()


In [29]:
sample_df

Unnamed: 0,type,book_id,revision,timestamp,json_data
0,/type/edition,/books/OL10000299M,3,2011-04-27T14:29:38.147197,"{""publishers"": [""Stationery Office Books""], ""p..."
1,/type/edition,/books/OL10000402M,3,2011-04-27T11:28:52.494785,"{""publishers"": [""Stationery Office Books""], ""p..."
2,/type/edition,/books/OL10000661M,3,2011-04-25T22:07:29.549200,"{""publishers"": [""Stationery Office Books""], ""p..."
3,/type/edition,/books/OL10000852M,3,2011-04-27T04:54:31.855299,"{""publishers"": [""Stationery Office Books""], ""p..."
4,/type/edition,/books/OL10001003M,3,2011-06-08T04:28:03.025393,"{""publishers"": [""Stationery Office Books""], ""p..."
5,/type/edition,/books/OL10001092M,3,2011-04-28T09:07:33.035640,"{""publishers"": [""Stationery Office Books""], ""p..."
6,/type/edition,/books/OL10001125M,3,2011-04-26T15:08:38.906519,"{""publishers"": [""Stationery Office Books""], ""p..."
7,/type/edition,/books/OL10001268M,3,2011-04-26T18:02:19.907101,"{""publishers"": [""Stationery Office Books""], ""p..."
8,/type/edition,/books/OL10001455M,3,2022-07-17T23:09:07.154959,"{""publishers"": [""Stationery Office""], ""languag..."
9,/type/edition,/books/OL10001465M,3,2022-07-17T22:01:18.764450,"{""publishers"": [""Stationery Office""], ""languag..."


In [None]:
sample_df["json_data"] = sample_df["json_data"].apply(lambda x: json.loads(x) if isinstance(x, str) else {})

# Normalize the JSON into a structured format
normalized_df = pd.json_normalize(sample_df["json_data"])

# Merge the normalized JSON with the original DataFrame (excluding raw JSON column)
final_df = pd.concat([sample_df.drop(columns=["json_data"]), normalized_df], axis=1)

# Display the structured DataFrame
final_df

In [1]:
import duckdb
import json
import pandas as pd
from tqdm import tqdm
import os

In [30]:
import duckdb
import pandas as pd
from tqdm import tqdm

file_path = "ol_dump_2025-01-08.txt"  # Update with actual path
output_file = "processed_open_library_data_dump.csv"

conn = duckdb.connect()

chunk_size = 100000
processed_rows = 0
first_write = True


In [10]:
print("Estimating total rows...")
try:
    with open(file_path, "r", encoding="utf-8") as f:
        total_rows = sum(1 for _ in f)
    print(f"Total estimated rows: {total_rows:,}")

Estimating total rows...
Total estimated rows: 53,073,982


In [33]:
import time  # Import the time module for sleep functionality

processed_rows = 0
sleep_time = 1  # Set the sleep time in seconds (adjust as needed)

while True:
    query = f"""
    SELECT
        book_id, revision, timestamp, json_data
    FROM read_csv_auto('{file_path}',
        delim='\t',
        header=False,
        max_line_size=10000000,  -- Increase the limit to just above the largest line size
        columns={{
            'type': 'TEXT',
            'book_id': 'TEXT',
            'revision': 'BIGINT',  -- Use BIGINT to match sniffed type
            'timestamp': 'TIMESTAMP',  -- Use TIMESTAMP to match sniffed type
            'json_data': 'TEXT'
        }}
    )
    WHERE type = '/type/edition'
    LIMIT {chunk_size} OFFSET {processed_rows}
    """

    try:
        chunk_df = conn.execute(query).fetchdf()
    except duckdb.BinderException as e:
        print(f"Query execution failed: {e}")
        break

    if chunk_df.empty:
        break

    if first_write:
        chunk_df.to_csv(output_file, index=False, mode="w")  # Write with headers
        first_write = False
    else:
        chunk_df.to_csv(output_file, index=False, mode="a", header=False)  # Append without headers

    rows_processed = len(chunk_df)
    del chunk_df

    processed_rows += rows_processed

    print(f"Processed {processed_rows} rows", end="\r")  # 'end="\r"' ensures it overwrites the previous line

    time.sleep(sleep_time)

print(f"Processing complete! Data saved to: {output_file}")

Processed 400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 1900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 2900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 3900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 4900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 5900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 6900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 7900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 8900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 9900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 10900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 11900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 12900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 13900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 14900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 15900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 16900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 17900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 18900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 19900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 20900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 21900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 22900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 23900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 24900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 25900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 26900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 27900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 28900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 29900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 30900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 31900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 32900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 33900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 34900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 35900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 36900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 37900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 38900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 39900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 40900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 41900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 42900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 43900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 44900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 45900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 46900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 47900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 48900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 49900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 50900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 51900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52200000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52300000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52400000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52500000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52600000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52700000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52800000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 52900000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 53000000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 53100000 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processed 53148103 rows

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Processing complete! Data saved to: processed_open_library_data_dump.csv


Try to process the 10000 rows of the data to check the conponents of the data

In [90]:
import pandas as pd
import json

output_file = "processed_open_library_data_dump.csv"
column_names = ["book_id", "revision", "timestamp", "json_data"]

sample_df = pd.read_csv(output_file, nrows=100000, names=column_names, header=None)


In [91]:
sample_df

Unnamed: 0,book_id,revision,timestamp,json_data
0,/books/OL10000299M,3,2011-04-27 14:29:38.147197,"{""publishers"": [""Stationery Office Books""], ""p..."
1,/books/OL10000402M,3,2011-04-27 11:28:52.494785,"{""publishers"": [""Stationery Office Books""], ""p..."
2,/books/OL10000661M,3,2011-04-25 22:07:29.549200,"{""publishers"": [""Stationery Office Books""], ""p..."
3,/books/OL10000852M,3,2011-04-27 04:54:31.855299,"{""publishers"": [""Stationery Office Books""], ""p..."
4,/books/OL10001003M,3,2011-06-08 04:28:03.025393,"{""publishers"": [""Stationery Office Books""], ""p..."
...,...,...,...,...
99995,/books/OL35738257M,2,2023-02-01 15:25:47.290397,"{""type"": {""key"": ""/type/edition""}, ""publish_da..."
99996,/books/OL357382M,7,2024-07-15 01:39:27.737325,"{""publishers"": [""St. Martin's Press""], ""identi..."
99997,/books/OL35738372M,6,2023-02-28 18:40:28.640066,"{""notes"": {""type"": ""/type/text"", ""value"": ""\""P..."
99998,/books/OL35738535M,1,2021-12-24 06:04:15.305092,"{""type"": {""key"": ""/type/edition""}, ""publish_da..."


In [92]:

sample_df["json_data"] = sample_df["json_data"].apply(lambda x: json.loads(x) if isinstance(x, str) else {})

normalized_df = pd.json_normalize(sample_df["json_data"])

final_df = pd.concat([sample_df.drop(columns=["json_data"]), normalized_df], axis=1)


In [93]:
 # read the processed data
final_df

Unnamed: 0,book_id,revision,timestamp,publishers,physical_format,title,number_of_pages,isbn_13,isbn_10,publish_date,...,identifiers.freebase,identifiers.bayerische_staatsbibliothek,identifiers.british_national_bibliography,identifiers.amazon.ca_asin,identifiers.zdb-id,identifiers.librivox,identifiers.bibliothèque_nationale_de_france,identifiers.project_gutenberg,identifiers.scribd,identifiers.isfdb
0,/books/OL10000299M,3,2011-04-27 14:29:38.147197,[Stationery Office Books],Paperback,Human Rights Bill [H.L.],22.0,[9780108360688],[0108360687],"December 3, 1997",...,,,,,,,,,,
1,/books/OL10000402M,3,2011-04-27 11:28:52.494785,[Stationery Office Books],Paperback,Northern Ireland (Sentences) Bill,4.0,[9780108361821],[0108361829],"July 17, 1998",...,,,,,,,,,,
2,/books/OL10000661M,3,2011-04-25 22:07:29.549200,[Stationery Office Books],Paperback,Bank of England Bill,8.0,[9780108365706],[0108365700],"March 6, 1998",...,,,,,,,,,,
3,/books/OL10000852M,3,2011-04-27 04:54:31.855299,[Stationery Office Books],Paperback,Regional Development Agencies Bill,2.0,[9780108367625],[0108367622],"June 17, 1998",...,,,,,,,,,,
4,/books/OL10001003M,3,2011-06-08 04:28:03.025393,[Stationery Office Books],Paperback,Northern Ireland Bill,12.0,[9780108369155],[0108369153],"October 21, 1998",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,/books/OL35738257M,2,2023-02-01 15:25:47.290397,[Chang jiang shao nian er tong chu ban she],,Kuang shen,,[9787556006380],[7556006387],2014,...,,,,,,,,,,
99996,/books/OL357382M,7,2024-07-15 01:39:27.737325,[St. Martin's Press],,World orders in the making,276.0,,[0312215487],1998,...,,,,,,,,,,
99997,/books/OL35738372M,6,2023-02-28 18:40:28.640066,"[DK Publishing, DK Eyewitness Travel]",,Top 10 Orlando,160.0,[9781465402776],[1465402772],2014,...,,,,,,,,,,
99998,/books/OL35738535M,1,2021-12-24 06:04:15.305092,[Bei fang fu nü er tong chu ban she],,Yong you jiu shi xing fu,,[9787538588248],[7538588248],2015,...,,,,,,,,,,


There are still many dictionaries in the data, so we need to normalize the data again

convert the authors, works, isbn_13, isbn_10, publishers, and similar columns into plain text (string) format, the following changes were made:

In [94]:
if "isbn_13" in final_df.columns:
    final_df["isbn_13"] = final_df["isbn_13"].apply(lambda x: ", ".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "")

if "isbn_10" in final_df.columns:
    final_df["isbn_10"] = final_df["isbn_10"].apply(lambda x: ", ".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "")

if "publishers" in final_df.columns:
    final_df["publishers"] = final_df["publishers"].apply(lambda x: ", ".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "")

if "oclc_numbers" in final_df.columns:
    final_df["oclc_numbers"] = final_df["oclc_numbers"].apply(lambda x: ", ".join(map(str, x)) if isinstance(x, list) else str(x) if pd.notna(x) else "")

# Extract 'key' values from lists of dictionaries
if "authors" in final_df.columns:
    final_df["authors"] = final_df["authors"].apply(lambda x: ", ".join(d["key"] for d in x if isinstance(d, dict) and "key" in d)
                                                    if isinstance(x, list) else str(x) if pd.notna(x) else "")

if "works" in final_df.columns:
    final_df["works"] = final_df["works"].apply(lambda x: ", ".join(d["key"] for d in x if isinstance(d, dict) and "key" in d)
                                                if isinstance(x, list) else str(x) if pd.notna(x) else "")

In [95]:
final_df

Unnamed: 0,book_id,revision,timestamp,publishers,physical_format,title,number_of_pages,isbn_13,isbn_10,publish_date,...,identifiers.freebase,identifiers.bayerische_staatsbibliothek,identifiers.british_national_bibliography,identifiers.amazon.ca_asin,identifiers.zdb-id,identifiers.librivox,identifiers.bibliothèque_nationale_de_france,identifiers.project_gutenberg,identifiers.scribd,identifiers.isfdb
0,/books/OL10000299M,3,2011-04-27 14:29:38.147197,Stationery Office Books,Paperback,Human Rights Bill [H.L.],22.0,9780108360688,0108360687,"December 3, 1997",...,,,,,,,,,,
1,/books/OL10000402M,3,2011-04-27 11:28:52.494785,Stationery Office Books,Paperback,Northern Ireland (Sentences) Bill,4.0,9780108361821,0108361829,"July 17, 1998",...,,,,,,,,,,
2,/books/OL10000661M,3,2011-04-25 22:07:29.549200,Stationery Office Books,Paperback,Bank of England Bill,8.0,9780108365706,0108365700,"March 6, 1998",...,,,,,,,,,,
3,/books/OL10000852M,3,2011-04-27 04:54:31.855299,Stationery Office Books,Paperback,Regional Development Agencies Bill,2.0,9780108367625,0108367622,"June 17, 1998",...,,,,,,,,,,
4,/books/OL10001003M,3,2011-06-08 04:28:03.025393,Stationery Office Books,Paperback,Northern Ireland Bill,12.0,9780108369155,0108369153,"October 21, 1998",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,/books/OL35738257M,2,2023-02-01 15:25:47.290397,Chang jiang shao nian er tong chu ban she,,Kuang shen,,9787556006380,7556006387,2014,...,,,,,,,,,,
99996,/books/OL357382M,7,2024-07-15 01:39:27.737325,St. Martin's Press,,World orders in the making,276.0,,0312215487,1998,...,,,,,,,,,,
99997,/books/OL35738372M,6,2023-02-28 18:40:28.640066,"DK Publishing, DK Eyewitness Travel",,Top 10 Orlando,160.0,9781465402776,1465402772,2014,...,,,,,,,,,,
99998,/books/OL35738535M,1,2021-12-24 06:04:15.305092,Bei fang fu nü er tong chu ban she,,Yong you jiu shi xing fu,,9787538588248,7538588248,2015,...,,,,,,,,,,


In [96]:
# check the columns
final_df.columns

Index(['book_id', 'revision', 'timestamp', 'publishers', 'physical_format',
       'title', 'number_of_pages', 'isbn_13', 'isbn_10', 'publish_date',
       ...
       'identifiers.freebase', 'identifiers.bayerische_staatsbibliothek',
       'identifiers.british_national_bibliography',
       'identifiers.amazon.ca_asin', 'identifiers.zdb-id',
       'identifiers.librivox', 'identifiers.bibliothèque_nationale_de_france',
       'identifiers.project_gutenberg', 'identifiers.scribd',
       'identifiers.isfdb'],
      dtype='object', length=120)

In [97]:
final_df.dtypes

book_id                                         object
revision                                         int64
timestamp                                       object
publishers                                      object
physical_format                                 object
                                                 ...  
identifiers.librivox                            object
identifiers.bibliothèque_nationale_de_france    object
identifiers.project_gutenberg                   object
identifiers.scribd                              object
identifiers.isfdb                               object
Length: 120, dtype: object

In [98]:
main_columns = [
    "book_id", "title", "full_title", "isbn_13", "isbn_10", "key", "works",
    "publishers", "publish_date", "publish_country", "edition_name",
    "number_of_pages", "physical_format", "physical_dimensions", "weight",
    "authors", "by_statement",
    "subjects", "genres", "identifiers.goodreads",
    "identifiers.google", "identifiers.amazon", "identifiers.doi",
    "identifiers.wikidata", "identifiers.librarything", "identifiers.better_world_books",
    "url", "first_sentence", "description"
]

filtered_df = final_df[main_columns]

In [99]:
filtered_df

Unnamed: 0,book_id,title,full_title,isbn_13,isbn_10,key,works,publishers,publish_date,publish_country,...,identifiers.goodreads,identifiers.google,identifiers.amazon,identifiers.doi,identifiers.wikidata,identifiers.librarything,identifiers.better_world_books,url,first_sentence,description
0,/books/OL10000299M,Human Rights Bill [H.L.],,9780108360688,0108360687,/books/OL10000299M,/works/OL14903285W,Stationery Office Books,"December 3, 1997",,...,,,,,,,,,,
1,/books/OL10000402M,Northern Ireland (Sentences) Bill,,9780108361821,0108361829,/books/OL10000402M,/works/OL14902858W,Stationery Office Books,"July 17, 1998",,...,,,,,,,,,,
2,/books/OL10000661M,Bank of England Bill,,9780108365706,0108365700,/books/OL10000661M,/works/OL14903105W,Stationery Office Books,"March 6, 1998",,...,,,,,,,,,,
3,/books/OL10000852M,Regional Development Agencies Bill,,9780108367625,0108367622,/books/OL10000852M,/works/OL14903245W,Stationery Office Books,"June 17, 1998",,...,,,,,,,,,,
4,/books/OL10001003M,Northern Ireland Bill,,9780108369155,0108369153,/books/OL10001003M,/works/OL14903191W,Stationery Office Books,"October 21, 1998",,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,/books/OL35738257M,Kuang shen,Kuang shen Shen fen yi yun,9787556006380,7556006387,/books/OL35738257M,/works/OL19649579W,Chang jiang shao nian er tong chu ban she,2014,cc,...,,,,,,,,,,
99996,/books/OL357382M,World orders in the making,,,0312215487,/books/OL357382M,/works/OL18948578W,St. Martin's Press,1998,nyu,...,[1357099],,,,,,,,,
99997,/books/OL35738372M,Top 10 Orlando,Top 10 Orlando,9781465402776,1465402772,/books/OL35738372M,/works/OL21709257W,"DK Publishing, DK Eyewitness Travel",2014,nyu,...,,,[],,,,,,,
99998,/books/OL35738535M,Yong you jiu shi xing fu,,9787538588248,7538588248,/books/OL35738535M,/works/OL26451835W,Bei fang fu nü er tong chu ban she,2015,cc,...,,,,,,,,,,


Check the missing values in the data

In [114]:
print(filtered_df["book_id"].count())
print(filtered_df["identifiers.goodreads"].isna().sum())
print(filtered_df["isbn_13"].isna().sum())
print(filtered_df["isbn_10"].isna().sum())
print(filtered_df["works"].isna().sum())
print(filtered_df["publish_country"].isna().sum())
print(filtered_df["url"].isna().sum())

100000
86378
0
0
0
39930
98292


In [115]:
key_column=["book_id", "title", "full_title", "isbn_13", "isbn_10", "key", "works","publishers","publish_country","subjects", "genres", "identifiers.goodreads","identifiers.google", "identifiers.amazon", "first_sentence", "description"]
key_df=filtered_df[key_column].sample(100)
key_df

Unnamed: 0,book_id,title,full_title,isbn_13,isbn_10,key,works,publishers,publish_country,subjects,genres,identifiers.goodreads,identifiers.google,identifiers.amazon,first_sentence,description
27551,/books/OL16783561M,gramophone record library,,,,/books/OL16783561M,/works/OL11924670W,Grafton,xxk,,,,,,,
67161,/books/OL261935M,Maternity care at public expense in six counti...,,,,/books/OL261935M,/works/OL1588687W,U.S Govt. Print. Off.,dcu,"[Obstetrics, Mothers, Public health -- New Yor...",,,,,,
1983,/books/OL10458120M,"Bonnard, Peintre du Merveilleux",,9780320058165,0320058166,/books/OL10458120M,/works/OL1136797W,French & European Pubns,,"[Individual Artist, Art]",,,,,,
89545,/books/OL32799906M,Passage to Avalon,,9780999511022,0999511025,/books/OL32799906M,/works/OL24710064W,Michael Thayer,,,,,,,,
33069,/books/OL1807333M,Raum und Volkstum,,,340206930X,/books/OL1807333M,/works/OL4393102W,Aschendorff,gw,[Westphalia (Germany) -- Cultural policy.],,[4629743],,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65706,/books/OL25655137M,la mudez de las piedras,,9788416234431,,/books/OL25655137M,/works/OL17084686W,Uno Editorial,,,,,,,,
60565,/books/OL24429509M,Zakon o svojinsko-pravnim odnosima,,9788674420607,,/books/OL24429509M,/works/OL15463336W,JU Službeni list Crne Gore,mo,,,,,,,
50999,/books/OL22264749M,Rally once again,,,1852232226,/books/OL22264749M,/works/OL1809143W,"Crowood in association with Anthony Bird, The ...",xxk,"[United States -- History -- Civil War, 1861-1...",,,,,,
60163,/books/OL2433699M,The sin and the sinners,,,,/books/OL2433699M,/works/OL3149476W,Jarrolds,enk,,,,,,,


Skimpy does not return anything

In [116]:
from skimpy import skim
key_df = pd.DataFrame(key_df).reset_index(drop=True)
skim(key_df)

In [117]:
total_book_ids = filtered_df["book_id"].count()

unique_book_ids = filtered_df["book_id"].nunique()

# Print the results
print(f"Total book_id values: {total_book_ids}")
print(f"Unique book_id values: {unique_book_ids}")


Total book_id values: 100000
Unique book_id values: 100000


In [118]:
total_works = filtered_df["works"].count()

unique_works = filtered_df["works"].nunique()

print(f"Total works values: {total_works}")
print(f"Unique works values: {unique_works}")


Total works values: 100000
Unique works values: 92309


In [69]:
# skim the data
from skimpy import skim
filtered_df = pd.DataFrame(filtered_df)


# Sample 100 rows and reset index correctly
test_df = filtered_df.sample(100, random_state=42).reset_index(drop=True)
test_df

Unnamed: 0,book_id,title,full_title,isbn_13,isbn_10,key,works,publishers,publish_date,publish_country,...,identifiers.google,identifiers.amazon,identifiers.doi,identifiers.wikidata,identifiers.librarything,identifiers.better_world_books,url,first_sentence,description,table_of_contents
0,/books/OL11458883M,Hotel & Restaurant Dictionary Fachworterbuch ...,,9780828814782,0828814783,/books/OL11458883M,/works/OL9250608W,French & European Pubns,"October 1, 1981",,...,,,,,,,,,,
1,/books/OL11088269M,Mary Higgins Clark Presents Malice Domestic 2,,9780787105693,0787105694,/books/OL11088269M,/works/OL31189742W,Audio Literature,July 1996,,...,,,,,[359866],,,,,
2,/books/OL10401622M,The 2007-2012 Outlook for Indian Breads in Japan,,9780497467661,0497467666,/books/OL10401622M,/works/OL3309893W,"ICON Group International, Inc.","September 28, 2006",,...,,,,,,,,,,
3,/books/OL11102775M,Pregnancy-Care & Physiology,,9780788301896,0788301896,/books/OL11102775M,/works/OL9542721W,Abbe Pub Assn of Washington Dc,June 1994,,...,,,,,,,,,,
4,/books/OL11050043M,Harvard Planner,,9780865020580,0865020582,/books/OL11050043M,/works/OL9155050W,Executive.org,"December 21, 1994",,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,/books/OL10887803M,2000 Import and Export Market for Nickel in Vi...,,9780597709159,0597709157,/books/OL10887803M,/works/OL9461531W,Icon Group International,January 2001,,...,,,,,,,,,,
96,/books/OL12136146M,Win HMO/PPO Combo Pack with CDROM,,9781563633201,1563633205,/books/OL12136146M,/works/OL8539593W,Thomson Healthcare,October 1998,,...,,,,,,,,,,
97,/books/OL11824378M,The Rover Boys at School,,9781421841328,1421841320,/books/OL11824378M,/works/OL234952W,1st World Library - Literary Society,"June 15, 2007",,...,,,,,,,,,,
98,/books/OL11754548M,The Inside Of The Cup,,9781414284859,1414284853,/books/OL11754548M,/works/OL18521604W,IndyPublish.com,"May 31, 2004",,...,,,,,,,,,,


In [72]:
skim(filtered_df.sample(100).reset_index(drop=True))

In [71]:
print(filtered_df.columns.duplicated().any())  # Check for duplicate column names


False


In [62]:
sampled_df = filtered_df.sample(100, random_state=42)

In [60]:
filtered_df = filtered_df.reset_index(drop=True)
skimpy.skim(filtered_df.sample(10))  # Use only 100 rows

KeyError: np.int64(0)

In [None]:

# Run skimpy.skim() safely
skimpy.skim(final_df)

In [12]:
import skimpy
skimpy.skim(final_df)

AttributeError: 'DataFrame' object has no attribute 'dtype'