In [41]:
!pip install pandas xlsxwriter

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [42]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("records").getOrCreate()

In [43]:
spark.sql("DROP TABLE IF EXISTS demo.nyc.taxis_5_100M")

DataFrame[]

In [44]:
import os
iceberg_table_dir = "../warehouse/nyc/taxis_5_100M"
metadata_dir = f"{iceberg_table_dir}/metadata"
data_dir = f"{iceberg_table_dir}/data"
input_data_dir = f"../input_data"
analysis_info = []
records_before_op = 0

def append_to_file(file_path, msg):
    open_mode = "a"
    if not os.path.exists(file_path):
        open_mode = "w"

    # Open the CSV file in write mode
    with open(file_path, open_mode) as file:
        writer = csv.writer(file)
        
        if open_mode=="w":
            #writing header of the columns
            writer.writerows([list(msg.keys())])    

        row_values = [list(msg.values())]
        # Write the data to the CSV file
        writer.writerows(row_values)

def get_size():
    # List the metadata files
    manifest_pattern = re.compile(r".*-m\d+\.avro$")
    metadata_files = os.listdir(metadata_dir)
    
    # Initialize variables to store the sizes of different types of metadata files
    snap_avro_size = 0
    metadata_json_size = 0
    m_avro_size = 0

    data_dir_size = 0
    # get data dir size
    data_dir_files = os.listdir(data_dir)
    # print(data_dir_files)
    for filename in data_dir_files:
        file_path = os.path.join(data_dir, filename)
        data_dir_size += os.path.getsize(file_path) / 1024  # Convert size to KB
    
    # Iterate through the metadata files and calculate their sizes
    for file in metadata_files:
        file_path = os.path.join(metadata_dir, file)
        file_size_kb = os.path.getsize(file_path) / 1024  # Convert size to KB
        
        if file.startswith("snap-") and file.endswith(".avro"):
            snap_avro_size += file_size_kb
        elif file.endswith(".metadata.json"):
            metadata_json_size += file_size_kb
        elif manifest_pattern.match(file):
            m_avro_size += file_size_kb
    
    # Print the time taken and the sizes of the metadata files
    # print(f"Time taken to read Parquet files: {time_taken:.2f} seconds")
    # print(f"Size of snap-*.avro files: {snap_avro_size:.2f} KB")
    # print(f"Size of *.metadata.json files: {metadata_json_size:.2f} KB")
    # print(f"Size of *m{0-9}{1,}.avro files: {m_avro_size:.2f} KB")

    return {"data_dir_size": data_dir_size,"metadata_size": metadata_json_size,"snapshot_size": snap_avro_size,"manifest_size": m_avro_size}


In [45]:
from pyspark.sql.types import DoubleType, FloatType, LongType, StructType,StructField, StringType
schema = StructType([
  StructField("vendor_id", LongType(), True),
  StructField("trip_id", LongType(), True),
  StructField("trip_distance", FloatType(), True),
  StructField("fare_amount", DoubleType(), True),
  StructField("store_and_fwd_flag", StringType(), True)
])

df = spark.createDataFrame([], schema)
df.writeTo("demo.nyc.taxis_5_100M").create()

In [46]:
df = spark.table("demo.nyc.taxis_5_100M")
df.show()

+---------+-------+-------------+-----------+------------------+
|vendor_id|trip_id|trip_distance|fare_amount|store_and_fwd_flag|
+---------+-------+-------------+-----------+------------------+
+---------+-------+-------------+-----------+------------------+



## Perform operations

In [47]:
import time, csv
from pyspark.sql.functions import col, when
from pyspark.sql import functions as F
import os

input_data_dir = f"../input_data"
output_dir = f"../output"
analysis_info = []
records_before_op = 0
    
file_type = input("Enter input file type csv or parquet? : ")
file_type = file_type.lower().strip()
input_data_dir = os.path.join(input_data_dir, file_type)
input_files = os.listdir(input_data_dir)

analysis_file = os.path.join(output_dir, f"analysis_info_{file_type}.csv")
if os.path.exists(analysis_file):
    os.remove(analysis_file)

df = spark.table("demo.nyc.taxis_5_100M")
records_before_op = df.count()

digits = len(str(records_before_op))

# Track overall start time
for file in input_files:
    print(f"Started with file={file}")
    file_path = os.path.join(input_data_dir, file)

    # Record the start time for insertion
    insertion_start_time = time.time()
    
    if file_type == "parquet":
        # Read the Parquet files into a DataFrame
        df = spark.read.parquet(file_path)
    else:
        df = spark.read.csv(file_path, header=True)
        df = df.select(
            F.col("vendor_id").cast("long").alias("vendor_id"),
            F.col("trip_id").cast("long").alias("trip_id"),
            F.col("trip_distance").cast("float").alias("trip_distance"),
            F.col("fare_amount").cast("double").alias("fare_amount"),
            F.col("store_and_fwd_flag").cast("string").alias("store_and_fwd_flag")
        )
    
    rows = df.count()
    
    # Write the DataFrame to Parquet format (insert operation)
    df.writeTo("demo.nyc.taxis_5_100M").append()
    
    # Record the end time for insertion
    insertion_end_time = time.time()
    
    # Calculate total insertion time
    insertion_time_taken = insertion_end_time - insertion_start_time

    # Get details
    details = get_size()
    details["time_taken"] = f"{insertion_time_taken:.2f} sec"
    details["Operation"] = f"Inserted {rows} records"
    details["records_after_op"] = records_before_op + rows

    # Add insertion times to the details
    details["insertion_start_time"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(insertion_start_time))
    details["insertion_end_time"] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(insertion_end_time))
    details["total_insertion_time"] = f"{insertion_time_taken:.2f} sec"

    # Print details before storing to CSV
    print(f"Details before appending to CSV: {details}")
    
    records_before_op += rows
    del insertion_start_time, insertion_end_time, df

    # Store this info in the CSV file
    append_to_file(analysis_file, details)
    analysis_info.append(details)
    
    print(f"Inserted {rows} records..")
    
    ##### Updated records #####
    current_digit = len(str(records_before_op))

    #### Don't perform update operation if the digit has not increased
    if current_digit <= digits:
        continue
    else:
        digits = current_digit
        df = spark.table("demo.nyc.taxis_5_100M")
    
    # Perform the update operation for the first 10 distinct vendor_ids
    for vendor_id in df.select('vendor_id').distinct().collect()[:10]:
        vendor_id = vendor_id[0]
        df = spark.table("demo.nyc.taxis_5_100M")

        st = time.time()
        # Perform an update operation: Set fare_amount to fare_amount+40.0 for vendor_id
        updated_df = df.withColumn("fare_amount", 
                                  when(col("vendor_id") == vendor_id, col("fare_amount")+40)
                                  .otherwise(col("fare_amount")))
        
        # Overwrite the updated DataFrame back to the table
        updated_df.writeTo("demo.nyc.taxis_5_100M").overwritePartitions()
        
        end = time.time() - st
        rows = updated_df.filter(updated_df['vendor_id']==vendor_id).count()

        # Get details
        details = get_size()
        details["time_taken"] = f"{end:.2f} sec"
        details["Operation"] = f"Updated {rows} records"
        details["records_after_op"] = records_before_op

        append_to_file(analysis_file, details)

        del df, st, end
        analysis_info.append(details)

# Function to get the size of the current table (e.g., row count or any other relevant metric)
def get_size():
    # Example: Return the size of the table and other useful metrics.
    # Update this function based on your needs. This is just an example.
    table_size = spark.table("demo.nyc.taxis_5_100M").count()  # Placeholder for table size retrieval
    return {"table_size": table_size}

# Function to append details to the CSV file
def append_to_file(file, details):
    with open(file, mode='a', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=details.keys())
        if f.tell() == 0:  # If file is empty, write headers
            writer.writeheader()
        writer.writerow(details)
        print(f"Appended details to {file}: {details}")  # Log the details being appended


Enter input file type csv or parquet? :  parquet


Started with file=records_1000000_part_100_1740479562.9411988.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 0.5, 'metadata_size': 1.0, 'snapshot_size': 1.0, 'manifest_size': 0.5, 'time_taken': '2.49 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 1000000, 'insertion_start_time': '2025-02-25 10:34:46', 'insertion_end_time': '2025-02-25 10:34:49', 'total_insertion_time': '2.49 sec'}
Inserted 1000000 records..


                                                                                

Started with file=records_1000000_part_10_1740479017.2732255.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 6.0, 'metadata_size': 6.5, 'snapshot_size': 6.5, 'manifest_size': 11.0, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 2000000, 'insertion_start_time': '2025-02-25 10:35:11', 'insertion_end_time': '2025-02-25 10:35:12', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_11_1740479024.0898237.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 6.5, 'metadata_size': 7.0, 'snapshot_size': 7.0, 'manifest_size': 11.5, 'time_taken': '1.71 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 3000000, 'insertion_start_time': '2025-02-25 10:35:12', 'insertion_end_time': '2025-02-25 10:35:14', 'total_insertion_time': '1.71 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_12_1740479030.8139675.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 7.0, 'metadata_size': 7.5, 'snapshot_size': 7.5, 'manifest_size': 12.0, 'time_taken': '1.98 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 4000000, 'insertion_start_time': '2025-02-25 10:35:14', 'insertion_end_time': '2025-02-25 10:35:16', 'total_insertion_time': '1.98 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_13_1740479037.5945997.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 7.5, 'metadata_size': 8.0, 'snapshot_size': 8.0, 'manifest_size': 12.5, 'time_taken': '1.86 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 5000000, 'insertion_start_time': '2025-02-25 10:35:16', 'insertion_end_time': '2025-02-25 10:35:18', 'total_insertion_time': '1.86 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_14_1740479044.2930903.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 8.0, 'metadata_size': 8.5, 'snapshot_size': 8.5, 'manifest_size': 13.0, 'time_taken': '1.70 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 6000000, 'insertion_start_time': '2025-02-25 10:35:18', 'insertion_end_time': '2025-02-25 10:35:20', 'total_insertion_time': '1.70 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_15_1740479050.5188167.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 8.5, 'metadata_size': 9.0, 'snapshot_size': 9.0, 'manifest_size': 13.5, 'time_taken': '1.95 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 7000000, 'insertion_start_time': '2025-02-25 10:35:20', 'insertion_end_time': '2025-02-25 10:35:22', 'total_insertion_time': '1.95 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_16_1740479056.7795022.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 9.0, 'metadata_size': 9.5, 'snapshot_size': 9.5, 'manifest_size': 14.0, 'time_taken': '1.74 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 8000000, 'insertion_start_time': '2025-02-25 10:35:22', 'insertion_end_time': '2025-02-25 10:35:24', 'total_insertion_time': '1.74 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_17_1740479063.0324295.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 9.5, 'metadata_size': 10.0, 'snapshot_size': 10.0, 'manifest_size': 14.5, 'time_taken': '1.81 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 9000000, 'insertion_start_time': '2025-02-25 10:35:24', 'insertion_end_time': '2025-02-25 10:35:26', 'total_insertion_time': '1.81 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_18_1740479069.140782.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 10.0, 'metadata_size': 10.5, 'snapshot_size': 10.5, 'manifest_size': 15.0, 'time_taken': '1.82 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 10000000, 'insertion_start_time': '2025-02-25 10:35:26', 'insertion_end_time': '2025-02-25 10:35:28', 'total_insertion_time': '1.82 sec'}
Inserted 1000000 records..


                                                                                

Started with file=records_1000000_part_19_1740479075.2074947.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 60.5, 'metadata_size': 16.0, 'snapshot_size': 16.0, 'manifest_size': 30.0, 'time_taken': '1.98 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 11000000, 'insertion_start_time': '2025-02-25 10:37:02', 'insertion_end_time': '2025-02-25 10:37:04', 'total_insertion_time': '1.98 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_1_1740478958.676483.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 61.0, 'metadata_size': 16.5, 'snapshot_size': 16.5, 'manifest_size': 30.5, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 12000000, 'insertion_start_time': '2025-02-25 10:37:04', 'insertion_end_time': '2025-02-25 10:37:06', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_20_1740479081.4518008.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 61.5, 'metadata_size': 17.0, 'snapshot_size': 17.0, 'manifest_size': 31.0, 'time_taken': '1.91 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 13000000, 'insertion_start_time': '2025-02-25 10:37:06', 'insertion_end_time': '2025-02-25 10:37:08', 'total_insertion_time': '1.91 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_21_1740479087.641574.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 62.0, 'metadata_size': 17.5, 'snapshot_size': 17.5, 'manifest_size': 31.5, 'time_taken': '1.74 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 14000000, 'insertion_start_time': '2025-02-25 10:37:09', 'insertion_end_time': '2025-02-25 10:37:10', 'total_insertion_time': '1.74 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_22_1740479093.7348895.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 62.5, 'metadata_size': 18.0, 'snapshot_size': 18.0, 'manifest_size': 32.0, 'time_taken': '1.73 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 15000000, 'insertion_start_time': '2025-02-25 10:37:11', 'insertion_end_time': '2025-02-25 10:37:12', 'total_insertion_time': '1.73 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_23_1740479099.6942494.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 63.0, 'metadata_size': 18.5, 'snapshot_size': 18.5, 'manifest_size': 32.5, 'time_taken': '1.67 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 16000000, 'insertion_start_time': '2025-02-25 10:37:13', 'insertion_end_time': '2025-02-25 10:37:15', 'total_insertion_time': '1.67 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_24_1740479105.6955447.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 63.5, 'metadata_size': 19.0, 'snapshot_size': 19.0, 'manifest_size': 33.0, 'time_taken': '1.75 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 17000000, 'insertion_start_time': '2025-02-25 10:37:15', 'insertion_end_time': '2025-02-25 10:37:17', 'total_insertion_time': '1.75 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_25_1740479111.7442427.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 64.0, 'metadata_size': 19.5, 'snapshot_size': 19.5, 'manifest_size': 33.5, 'time_taken': '1.81 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 18000000, 'insertion_start_time': '2025-02-25 10:37:17', 'insertion_end_time': '2025-02-25 10:37:19', 'total_insertion_time': '1.81 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_26_1740479117.7690225.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 64.5, 'metadata_size': 20.0, 'snapshot_size': 20.0, 'manifest_size': 34.0, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 19000000, 'insertion_start_time': '2025-02-25 10:37:19', 'insertion_end_time': '2025-02-25 10:37:21', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_27_1740479123.752309.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 65.0, 'metadata_size': 20.5, 'snapshot_size': 20.5, 'manifest_size': 34.5, 'time_taken': '1.75 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 20000000, 'insertion_start_time': '2025-02-25 10:37:21', 'insertion_end_time': '2025-02-25 10:37:23', 'total_insertion_time': '1.75 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_28_1740479129.7383063.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 65.5, 'metadata_size': 21.0, 'snapshot_size': 21.0, 'manifest_size': 35.0, 'time_taken': '1.69 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 21000000, 'insertion_start_time': '2025-02-25 10:37:24', 'insertion_end_time': '2025-02-25 10:37:25', 'total_insertion_time': '1.69 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_29_1740479135.6430395.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 66.0, 'metadata_size': 21.5, 'snapshot_size': 21.5, 'manifest_size': 35.5, 'time_taken': '1.90 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 22000000, 'insertion_start_time': '2025-02-25 10:37:26', 'insertion_end_time': '2025-02-25 10:37:28', 'total_insertion_time': '1.90 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_2_1740478965.0064826.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 66.5, 'metadata_size': 22.0, 'snapshot_size': 22.0, 'manifest_size': 36.0, 'time_taken': '2.72 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 23000000, 'insertion_start_time': '2025-02-25 10:37:28', 'insertion_end_time': '2025-02-25 10:37:31', 'total_insertion_time': '2.72 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_30_1740479141.7463803.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 67.0, 'metadata_size': 22.5, 'snapshot_size': 22.5, 'manifest_size': 36.5, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 24000000, 'insertion_start_time': '2025-02-25 10:37:31', 'insertion_end_time': '2025-02-25 10:37:33', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_31_1740479147.6707687.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 67.5, 'metadata_size': 23.0, 'snapshot_size': 23.0, 'manifest_size': 37.0, 'time_taken': '1.75 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 25000000, 'insertion_start_time': '2025-02-25 10:37:33', 'insertion_end_time': '2025-02-25 10:37:35', 'total_insertion_time': '1.75 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_32_1740479153.630442.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 68.0, 'metadata_size': 23.5, 'snapshot_size': 23.5, 'manifest_size': 37.5, 'time_taken': '1.75 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 26000000, 'insertion_start_time': '2025-02-25 10:37:36', 'insertion_end_time': '2025-02-25 10:37:37', 'total_insertion_time': '1.75 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_33_1740479159.7014382.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 68.5, 'metadata_size': 24.0, 'snapshot_size': 24.0, 'manifest_size': 38.0, 'time_taken': '1.91 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 27000000, 'insertion_start_time': '2025-02-25 10:37:38', 'insertion_end_time': '2025-02-25 10:37:40', 'total_insertion_time': '1.91 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_34_1740479165.6889257.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 69.0, 'metadata_size': 24.5, 'snapshot_size': 24.5, 'manifest_size': 38.5, 'time_taken': '1.81 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 28000000, 'insertion_start_time': '2025-02-25 10:37:40', 'insertion_end_time': '2025-02-25 10:37:42', 'total_insertion_time': '1.81 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_35_1740479171.6570647.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 69.5, 'metadata_size': 25.0, 'snapshot_size': 25.0, 'manifest_size': 39.0, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 29000000, 'insertion_start_time': '2025-02-25 10:37:42', 'insertion_end_time': '2025-02-25 10:37:44', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_36_1740479177.6566036.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 70.0, 'metadata_size': 25.5, 'snapshot_size': 25.5, 'manifest_size': 39.5, 'time_taken': '3.21 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 30000000, 'insertion_start_time': '2025-02-25 10:37:45', 'insertion_end_time': '2025-02-25 10:37:48', 'total_insertion_time': '3.21 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_37_1740479183.6467981.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 70.5, 'metadata_size': 26.0, 'snapshot_size': 26.0, 'manifest_size': 40.0, 'time_taken': '1.83 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 31000000, 'insertion_start_time': '2025-02-25 10:37:48', 'insertion_end_time': '2025-02-25 10:37:50', 'total_insertion_time': '1.83 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_38_1740479189.635899.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 71.0, 'metadata_size': 26.5, 'snapshot_size': 26.5, 'manifest_size': 40.5, 'time_taken': '1.75 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 32000000, 'insertion_start_time': '2025-02-25 10:37:51', 'insertion_end_time': '2025-02-25 10:37:52', 'total_insertion_time': '1.75 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_39_1740479195.6901538.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 71.5, 'metadata_size': 27.0, 'snapshot_size': 27.0, 'manifest_size': 41.0, 'time_taken': '1.67 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 33000000, 'insertion_start_time': '2025-02-25 10:37:53', 'insertion_end_time': '2025-02-25 10:37:55', 'total_insertion_time': '1.67 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_3_1740478971.4056692.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 72.0, 'metadata_size': 27.5, 'snapshot_size': 27.5, 'manifest_size': 41.5, 'time_taken': '2.12 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 34000000, 'insertion_start_time': '2025-02-25 10:37:55', 'insertion_end_time': '2025-02-25 10:37:57', 'total_insertion_time': '2.12 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_40_1740479201.7212226.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 72.5, 'metadata_size': 28.0, 'snapshot_size': 28.0, 'manifest_size': 42.0, 'time_taken': '1.55 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 35000000, 'insertion_start_time': '2025-02-25 10:37:58', 'insertion_end_time': '2025-02-25 10:37:59', 'total_insertion_time': '1.55 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_41_1740479207.6827612.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 73.0, 'metadata_size': 28.5, 'snapshot_size': 28.5, 'manifest_size': 42.5, 'time_taken': '1.71 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 36000000, 'insertion_start_time': '2025-02-25 10:38:00', 'insertion_end_time': '2025-02-25 10:38:02', 'total_insertion_time': '1.71 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_42_1740479213.6259525.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 73.5, 'metadata_size': 29.0, 'snapshot_size': 29.0, 'manifest_size': 43.0, 'time_taken': '1.65 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 37000000, 'insertion_start_time': '2025-02-25 10:38:02', 'insertion_end_time': '2025-02-25 10:38:04', 'total_insertion_time': '1.65 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_43_1740479219.5911868.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 74.0, 'metadata_size': 29.5, 'snapshot_size': 29.5, 'manifest_size': 43.5, 'time_taken': '1.59 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 38000000, 'insertion_start_time': '2025-02-25 10:38:04', 'insertion_end_time': '2025-02-25 10:38:06', 'total_insertion_time': '1.59 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_44_1740479225.5836053.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 74.5, 'metadata_size': 30.0, 'snapshot_size': 30.0, 'manifest_size': 44.0, 'time_taken': '1.83 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 39000000, 'insertion_start_time': '2025-02-25 10:38:07', 'insertion_end_time': '2025-02-25 10:38:08', 'total_insertion_time': '1.83 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_45_1740479231.5547447.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 75.0, 'metadata_size': 30.5, 'snapshot_size': 30.5, 'manifest_size': 44.5, 'time_taken': '1.97 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 40000000, 'insertion_start_time': '2025-02-25 10:38:09', 'insertion_end_time': '2025-02-25 10:38:11', 'total_insertion_time': '1.97 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_46_1740479237.8844259.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 75.5, 'metadata_size': 31.0, 'snapshot_size': 31.0, 'manifest_size': 45.0, 'time_taken': '1.86 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 41000000, 'insertion_start_time': '2025-02-25 10:38:12', 'insertion_end_time': '2025-02-25 10:38:13', 'total_insertion_time': '1.86 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_47_1740479244.9865994.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 76.0, 'metadata_size': 31.5, 'snapshot_size': 31.5, 'manifest_size': 45.5, 'time_taken': '1.76 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 42000000, 'insertion_start_time': '2025-02-25 10:38:14', 'insertion_end_time': '2025-02-25 10:38:16', 'total_insertion_time': '1.76 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_48_1740479251.771145.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 76.5, 'metadata_size': 32.0, 'snapshot_size': 32.0, 'manifest_size': 46.0, 'time_taken': '2.05 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 43000000, 'insertion_start_time': '2025-02-25 10:38:16', 'insertion_end_time': '2025-02-25 10:38:18', 'total_insertion_time': '2.05 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_49_1740479257.7559133.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 77.0, 'metadata_size': 32.5, 'snapshot_size': 32.5, 'manifest_size': 46.5, 'time_taken': '2.14 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 44000000, 'insertion_start_time': '2025-02-25 10:38:19', 'insertion_end_time': '2025-02-25 10:38:21', 'total_insertion_time': '2.14 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_4_1740478977.8232985.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 77.5, 'metadata_size': 33.0, 'snapshot_size': 33.0, 'manifest_size': 47.0, 'time_taken': '1.89 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 45000000, 'insertion_start_time': '2025-02-25 10:38:22', 'insertion_end_time': '2025-02-25 10:38:24', 'total_insertion_time': '1.89 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_50_1740479263.7538955.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 78.0, 'metadata_size': 33.5, 'snapshot_size': 33.5, 'manifest_size': 47.5, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 46000000, 'insertion_start_time': '2025-02-25 10:38:24', 'insertion_end_time': '2025-02-25 10:38:26', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_51_1740479269.7061412.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 78.5, 'metadata_size': 34.0, 'snapshot_size': 34.0, 'manifest_size': 48.0, 'time_taken': '2.00 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 47000000, 'insertion_start_time': '2025-02-25 10:38:27', 'insertion_end_time': '2025-02-25 10:38:29', 'total_insertion_time': '2.00 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_52_1740479275.6108172.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 79.0, 'metadata_size': 34.5, 'snapshot_size': 34.5, 'manifest_size': 48.5, 'time_taken': '1.90 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 48000000, 'insertion_start_time': '2025-02-25 10:38:29', 'insertion_end_time': '2025-02-25 10:38:31', 'total_insertion_time': '1.90 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_53_1740479281.611671.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 79.5, 'metadata_size': 35.0, 'snapshot_size': 35.0, 'manifest_size': 49.0, 'time_taken': '1.86 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 49000000, 'insertion_start_time': '2025-02-25 10:38:32', 'insertion_end_time': '2025-02-25 10:38:34', 'total_insertion_time': '1.86 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_54_1740479287.6296926.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 80.0, 'metadata_size': 35.5, 'snapshot_size': 35.5, 'manifest_size': 49.5, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 50000000, 'insertion_start_time': '2025-02-25 10:38:34', 'insertion_end_time': '2025-02-25 10:38:36', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_55_1740479293.5684905.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 80.5, 'metadata_size': 36.0, 'snapshot_size': 36.0, 'manifest_size': 50.0, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 51000000, 'insertion_start_time': '2025-02-25 10:38:37', 'insertion_end_time': '2025-02-25 10:38:39', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_56_1740479299.7135344.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 81.0, 'metadata_size': 36.5, 'snapshot_size': 36.5, 'manifest_size': 50.5, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 52000000, 'insertion_start_time': '2025-02-25 10:38:39', 'insertion_end_time': '2025-02-25 10:38:41', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_57_1740479305.6371973.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 81.5, 'metadata_size': 37.0, 'snapshot_size': 37.0, 'manifest_size': 51.0, 'time_taken': '1.73 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 53000000, 'insertion_start_time': '2025-02-25 10:38:42', 'insertion_end_time': '2025-02-25 10:38:43', 'total_insertion_time': '1.73 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_58_1740479311.6046054.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 82.0, 'metadata_size': 37.5, 'snapshot_size': 37.5, 'manifest_size': 51.5, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 54000000, 'insertion_start_time': '2025-02-25 10:38:44', 'insertion_end_time': '2025-02-25 10:38:46', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_59_1740479317.5237834.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 82.5, 'metadata_size': 38.0, 'snapshot_size': 38.0, 'manifest_size': 52.0, 'time_taken': '1.87 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 55000000, 'insertion_start_time': '2025-02-25 10:38:47', 'insertion_end_time': '2025-02-25 10:38:48', 'total_insertion_time': '1.87 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_5_1740478983.9570854.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 83.0, 'metadata_size': 38.5, 'snapshot_size': 38.5, 'manifest_size': 52.5, 'time_taken': '2.02 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 56000000, 'insertion_start_time': '2025-02-25 10:38:49', 'insertion_end_time': '2025-02-25 10:38:51', 'total_insertion_time': '2.02 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_60_1740479323.544201.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 83.5, 'metadata_size': 39.0, 'snapshot_size': 39.0, 'manifest_size': 53.0, 'time_taken': '2.14 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 57000000, 'insertion_start_time': '2025-02-25 10:38:52', 'insertion_end_time': '2025-02-25 10:38:54', 'total_insertion_time': '2.14 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_61_1740479329.472506.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 84.0, 'metadata_size': 39.5, 'snapshot_size': 39.5, 'manifest_size': 53.5, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 58000000, 'insertion_start_time': '2025-02-25 10:38:55', 'insertion_end_time': '2025-02-25 10:38:56', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_62_1740479335.399298.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 84.5, 'metadata_size': 40.0, 'snapshot_size': 40.0, 'manifest_size': 54.0, 'time_taken': '1.80 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 59000000, 'insertion_start_time': '2025-02-25 10:38:57', 'insertion_end_time': '2025-02-25 10:38:59', 'total_insertion_time': '1.80 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_63_1740479341.4143083.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 85.0, 'metadata_size': 40.5, 'snapshot_size': 40.5, 'manifest_size': 54.5, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 60000000, 'insertion_start_time': '2025-02-25 10:39:00', 'insertion_end_time': '2025-02-25 10:39:01', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_64_1740479347.381167.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 85.5, 'metadata_size': 41.0, 'snapshot_size': 41.0, 'manifest_size': 55.0, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 61000000, 'insertion_start_time': '2025-02-25 10:39:02', 'insertion_end_time': '2025-02-25 10:39:04', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_65_1740479353.325199.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 86.0, 'metadata_size': 41.5, 'snapshot_size': 41.5, 'manifest_size': 55.5, 'time_taken': '1.89 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 62000000, 'insertion_start_time': '2025-02-25 10:39:05', 'insertion_end_time': '2025-02-25 10:39:06', 'total_insertion_time': '1.89 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_66_1740479359.3133848.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 86.5, 'metadata_size': 42.0, 'snapshot_size': 42.0, 'manifest_size': 56.0, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 63000000, 'insertion_start_time': '2025-02-25 10:39:07', 'insertion_end_time': '2025-02-25 10:39:09', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_67_1740479365.3033757.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 87.0, 'metadata_size': 42.5, 'snapshot_size': 42.5, 'manifest_size': 56.5, 'time_taken': '1.69 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 64000000, 'insertion_start_time': '2025-02-25 10:39:10', 'insertion_end_time': '2025-02-25 10:39:11', 'total_insertion_time': '1.69 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_68_1740479371.2812757.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 87.5, 'metadata_size': 43.0, 'snapshot_size': 43.0, 'manifest_size': 57.0, 'time_taken': '2.03 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 65000000, 'insertion_start_time': '2025-02-25 10:39:12', 'insertion_end_time': '2025-02-25 10:39:14', 'total_insertion_time': '2.03 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_69_1740479377.252577.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 88.0, 'metadata_size': 43.5, 'snapshot_size': 43.5, 'manifest_size': 57.5, 'time_taken': '2.14 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 66000000, 'insertion_start_time': '2025-02-25 10:39:15', 'insertion_end_time': '2025-02-25 10:39:17', 'total_insertion_time': '2.14 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_6_1740478990.108612.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 88.5, 'metadata_size': 44.0, 'snapshot_size': 44.0, 'manifest_size': 58.0, 'time_taken': '2.26 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 67000000, 'insertion_start_time': '2025-02-25 10:39:18', 'insertion_end_time': '2025-02-25 10:39:20', 'total_insertion_time': '2.26 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_70_1740479383.2026403.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 89.0, 'metadata_size': 44.5, 'snapshot_size': 44.5, 'manifest_size': 58.5, 'time_taken': '1.62 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 68000000, 'insertion_start_time': '2025-02-25 10:39:21', 'insertion_end_time': '2025-02-25 10:39:22', 'total_insertion_time': '1.62 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_71_1740479389.1730509.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 89.5, 'metadata_size': 45.0, 'snapshot_size': 45.0, 'manifest_size': 59.0, 'time_taken': '1.88 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 69000000, 'insertion_start_time': '2025-02-25 10:39:23', 'insertion_end_time': '2025-02-25 10:39:25', 'total_insertion_time': '1.88 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_72_1740479395.1066487.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 90.0, 'metadata_size': 45.5, 'snapshot_size': 45.5, 'manifest_size': 59.5, 'time_taken': '1.80 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 70000000, 'insertion_start_time': '2025-02-25 10:39:26', 'insertion_end_time': '2025-02-25 10:39:27', 'total_insertion_time': '1.80 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_73_1740479401.0903814.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 90.5, 'metadata_size': 46.0, 'snapshot_size': 46.0, 'manifest_size': 60.0, 'time_taken': '1.86 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 71000000, 'insertion_start_time': '2025-02-25 10:39:28', 'insertion_end_time': '2025-02-25 10:39:30', 'total_insertion_time': '1.86 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_74_1740479407.1024837.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 91.0, 'metadata_size': 46.5, 'snapshot_size': 46.5, 'manifest_size': 60.5, 'time_taken': '1.97 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 72000000, 'insertion_start_time': '2025-02-25 10:39:31', 'insertion_end_time': '2025-02-25 10:39:33', 'total_insertion_time': '1.97 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_75_1740479413.0137076.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 91.5, 'metadata_size': 47.0, 'snapshot_size': 47.0, 'manifest_size': 61.0, 'time_taken': '1.83 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 73000000, 'insertion_start_time': '2025-02-25 10:39:33', 'insertion_end_time': '2025-02-25 10:39:35', 'total_insertion_time': '1.83 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_76_1740479419.3097925.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 92.0, 'metadata_size': 47.5, 'snapshot_size': 47.5, 'manifest_size': 61.5, 'time_taken': '1.82 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 74000000, 'insertion_start_time': '2025-02-25 10:39:36', 'insertion_end_time': '2025-02-25 10:39:38', 'total_insertion_time': '1.82 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_77_1740479425.2705803.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 92.5, 'metadata_size': 48.0, 'snapshot_size': 48.0, 'manifest_size': 62.0, 'time_taken': '1.75 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 75000000, 'insertion_start_time': '2025-02-25 10:39:38', 'insertion_end_time': '2025-02-25 10:39:40', 'total_insertion_time': '1.75 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_78_1740479431.3048425.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 93.0, 'metadata_size': 48.5, 'snapshot_size': 48.5, 'manifest_size': 62.5, 'time_taken': '1.79 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 76000000, 'insertion_start_time': '2025-02-25 10:39:41', 'insertion_end_time': '2025-02-25 10:39:43', 'total_insertion_time': '1.79 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_79_1740479437.2539167.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 93.5, 'metadata_size': 49.0, 'snapshot_size': 49.0, 'manifest_size': 63.0, 'time_taken': '1.74 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 77000000, 'insertion_start_time': '2025-02-25 10:39:43', 'insertion_end_time': '2025-02-25 10:39:45', 'total_insertion_time': '1.74 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_7_1740478996.495656.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 94.0, 'metadata_size': 49.5, 'snapshot_size': 49.5, 'manifest_size': 63.5, 'time_taken': '4.01 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 78000000, 'insertion_start_time': '2025-02-25 10:39:46', 'insertion_end_time': '2025-02-25 10:39:50', 'total_insertion_time': '4.01 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_80_1740479443.1998882.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 94.5, 'metadata_size': 50.0, 'snapshot_size': 50.0, 'manifest_size': 64.0, 'time_taken': '1.73 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 79000000, 'insertion_start_time': '2025-02-25 10:39:51', 'insertion_end_time': '2025-02-25 10:39:52', 'total_insertion_time': '1.73 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_81_1740479449.116602.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 95.0, 'metadata_size': 50.5, 'snapshot_size': 50.5, 'manifest_size': 64.5, 'time_taken': '2.63 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 80000000, 'insertion_start_time': '2025-02-25 10:39:53', 'insertion_end_time': '2025-02-25 10:39:56', 'total_insertion_time': '2.63 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_82_1740479455.0647976.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 95.5, 'metadata_size': 51.0, 'snapshot_size': 51.0, 'manifest_size': 65.0, 'time_taken': '1.81 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 81000000, 'insertion_start_time': '2025-02-25 10:39:56', 'insertion_end_time': '2025-02-25 10:39:58', 'total_insertion_time': '1.81 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_83_1740479461.0464404.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 96.0, 'metadata_size': 51.5, 'snapshot_size': 51.5, 'manifest_size': 65.5, 'time_taken': '1.95 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 82000000, 'insertion_start_time': '2025-02-25 10:39:59', 'insertion_end_time': '2025-02-25 10:40:01', 'total_insertion_time': '1.95 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_84_1740479466.998438.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 96.5, 'metadata_size': 52.0, 'snapshot_size': 52.0, 'manifest_size': 66.0, 'time_taken': '2.14 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 83000000, 'insertion_start_time': '2025-02-25 10:40:02', 'insertion_end_time': '2025-02-25 10:40:04', 'total_insertion_time': '2.14 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_85_1740479472.9264255.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 97.0, 'metadata_size': 52.5, 'snapshot_size': 52.5, 'manifest_size': 66.5, 'time_taken': '1.76 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 84000000, 'insertion_start_time': '2025-02-25 10:40:05', 'insertion_end_time': '2025-02-25 10:40:07', 'total_insertion_time': '1.76 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_86_1740479478.9221923.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 97.5, 'metadata_size': 53.0, 'snapshot_size': 53.0, 'manifest_size': 67.0, 'time_taken': '1.67 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 85000000, 'insertion_start_time': '2025-02-25 10:40:08', 'insertion_end_time': '2025-02-25 10:40:09', 'total_insertion_time': '1.67 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_87_1740479484.8694992.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 98.0, 'metadata_size': 53.5, 'snapshot_size': 53.5, 'manifest_size': 67.5, 'time_taken': '1.71 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 86000000, 'insertion_start_time': '2025-02-25 10:40:10', 'insertion_end_time': '2025-02-25 10:40:12', 'total_insertion_time': '1.71 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_88_1740479491.0889428.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 98.5, 'metadata_size': 54.0, 'snapshot_size': 54.0, 'manifest_size': 68.0, 'time_taken': '1.70 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 87000000, 'insertion_start_time': '2025-02-25 10:40:13', 'insertion_end_time': '2025-02-25 10:40:14', 'total_insertion_time': '1.70 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_89_1740479497.0018194.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 99.0, 'metadata_size': 54.5, 'snapshot_size': 54.5, 'manifest_size': 68.5, 'time_taken': '1.97 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 88000000, 'insertion_start_time': '2025-02-25 10:40:15', 'insertion_end_time': '2025-02-25 10:40:17', 'total_insertion_time': '1.97 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_8_1740479003.5467188.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 99.5, 'metadata_size': 55.0, 'snapshot_size': 55.0, 'manifest_size': 69.0, 'time_taken': '1.78 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 89000000, 'insertion_start_time': '2025-02-25 10:40:18', 'insertion_end_time': '2025-02-25 10:40:20', 'total_insertion_time': '1.78 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_90_1740479503.047581.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 100.0, 'metadata_size': 55.5, 'snapshot_size': 55.5, 'manifest_size': 69.5, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 90000000, 'insertion_start_time': '2025-02-25 10:40:21', 'insertion_end_time': '2025-02-25 10:40:22', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_91_1740479509.0142138.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 100.5, 'metadata_size': 56.0, 'snapshot_size': 56.0, 'manifest_size': 70.0, 'time_taken': '1.56 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 91000000, 'insertion_start_time': '2025-02-25 10:40:23', 'insertion_end_time': '2025-02-25 10:40:25', 'total_insertion_time': '1.56 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_92_1740479515.0333776.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 101.0, 'metadata_size': 56.5, 'snapshot_size': 56.5, 'manifest_size': 70.5, 'time_taken': '1.70 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 92000000, 'insertion_start_time': '2025-02-25 10:40:26', 'insertion_end_time': '2025-02-25 10:40:27', 'total_insertion_time': '1.70 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_93_1740479521.0087852.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 101.5, 'metadata_size': 57.0, 'snapshot_size': 57.0, 'manifest_size': 71.0, 'time_taken': '1.59 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 93000000, 'insertion_start_time': '2025-02-25 10:40:28', 'insertion_end_time': '2025-02-25 10:40:30', 'total_insertion_time': '1.59 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_94_1740479526.9835107.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 102.0, 'metadata_size': 57.5, 'snapshot_size': 57.5, 'manifest_size': 71.5, 'time_taken': '1.79 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 94000000, 'insertion_start_time': '2025-02-25 10:40:31', 'insertion_end_time': '2025-02-25 10:40:33', 'total_insertion_time': '1.79 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_95_1740479532.9442458.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 102.5, 'metadata_size': 58.0, 'snapshot_size': 58.0, 'manifest_size': 72.0, 'time_taken': '1.77 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 95000000, 'insertion_start_time': '2025-02-25 10:40:33', 'insertion_end_time': '2025-02-25 10:40:35', 'total_insertion_time': '1.77 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_96_1740479538.9636757.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 103.0, 'metadata_size': 58.5, 'snapshot_size': 58.5, 'manifest_size': 72.5, 'time_taken': '1.79 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 96000000, 'insertion_start_time': '2025-02-25 10:40:36', 'insertion_end_time': '2025-02-25 10:40:38', 'total_insertion_time': '1.79 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_97_1740479544.9426436.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 103.5, 'metadata_size': 59.0, 'snapshot_size': 59.0, 'manifest_size': 73.0, 'time_taken': '1.82 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 97000000, 'insertion_start_time': '2025-02-25 10:40:39', 'insertion_end_time': '2025-02-25 10:40:41', 'total_insertion_time': '1.82 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_98_1740479550.9369109.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 104.0, 'metadata_size': 59.5, 'snapshot_size': 59.5, 'manifest_size': 73.5, 'time_taken': '2.05 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 98000000, 'insertion_start_time': '2025-02-25 10:40:42', 'insertion_end_time': '2025-02-25 10:40:44', 'total_insertion_time': '2.05 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_99_1740479556.937529.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 104.5, 'metadata_size': 60.0, 'snapshot_size': 60.0, 'manifest_size': 74.0, 'time_taken': '1.63 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 99000000, 'insertion_start_time': '2025-02-25 10:40:45', 'insertion_end_time': '2025-02-25 10:40:46', 'total_insertion_time': '1.63 sec'}
Inserted 1000000 records..
Started with file=records_1000000_part_9_1740479010.487397.parquet


                                                                                

Details before appending to CSV: {'data_dir_size': 105.0, 'metadata_size': 60.5, 'snapshot_size': 60.5, 'manifest_size': 74.5, 'time_taken': '1.63 sec', 'Operation': 'Inserted 1000000 records', 'records_after_op': 100000000, 'insertion_start_time': '2025-02-25 10:40:47', 'insertion_end_time': '2025-02-25 10:40:49', 'total_insertion_time': '1.63 sec'}
Inserted 1000000 records..


                                                                                