### Download Need Libraries

In [3]:
#pyspark==3.4.1
!pip install python-dotenv
!pip install faker
#pandas==2.0.3
!pip install azure-storage-file-datalake

## Import Necessary Libraries
For this project we will be using the Faker Library to generate random data.

In [None]:
from datetime import datetime
from typing import List, Dict
from faker import Faker
import pandas as pd
import os
import uuid

StatementMeta(, 88a22cf6-2786-4de4-bf71-3e9b5d3b2d5d, 28, Finished, Available, Finished)

## Function Generate Random Data
This function/method is used in generating the random data used for this project

In [None]:
def generate_task_logs(num_records: int, batch_size: int = 100000) -> List[Dict]:
    """Generate fake task log records using Faker"""
    fake = Faker()
    records = []
    
    priority_levels = ['High', 'Medium', 'Low']
    status_options = ['Completed', 'Failed', 'In Progress']
    
    for _ in range(num_records):
        # Generate random timestamps with end_time always after start_time
        start_time = fake.date_time_this_year().timestamp() * 1000  # Convert to milliseconds
        end_time = start_time + (fake.random_int(min=1800, max=28800) * 1000)  # Add 30 mins to 8 hours
        
        record = {
            'task_id': str(uuid.uuid4()),
            'project_name': fake.word(),
            'employee_id': fake.random_int(min=1000, max=9999),
            'task_type': fake.random_element(['Development', 'Testing', 'Design', 'Documentation']),
            'priority': fake.random_element(priority_levels),
            'status': fake.random_element(status_options),
            'hours_logged': fake.random_int(min=1, max=8),
            'start_time': start_time,
            'end_time': end_time
        }
        records.append(record)
        
        if len(records) >= batch_size:
            yield records
            records = []
    
    if records:
        yield records

StatementMeta(, 88a22cf6-2786-4de4-bf71-3e9b5d3b2d5d, 29, Finished, Available, Finished)

## Save as Parquet in Lakehouse

In [None]:
def save_to_parquet(df: pd.DataFrame, output_dir: str, batch_num: int):
    """Save DataFrame as parquet file"""
    try:
        # Create directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        # Generate filename with timestamp
        timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
        file_name = f"task_logs_{timestamp}_batch_{batch_num:04d}.parquet"
        file_path = os.path.join(output_dir, file_name)
        
        # Save as parquet
        df.to_parquet(file_path, index=False)
        
        print(f"Successfully saved batch {batch_num:04d} to {file_path}")
        
    except Exception as e:
        print(f"Error saving parquet file: {str(e)}")
        raise

StatementMeta(, 88a22cf6-2786-4de4-bf71-3e9b5d3b2d5d, 30, Finished, Available, Finished)

## Run Entire Function

In [None]:
def main():
    # Configuration
    total_records = 20_000_000  # 20 million records
    batch_size = 100_000  # 100k records per batch
    output_directory = "/lakehouse/default/Files/data_log/"
    
    try:
        processed_records = 0
        for batch_num, batch_records in enumerate(generate_task_logs(total_records, batch_size)):
            # Convert batch to DataFrame
            pandas_df = pd.DataFrame(batch_records)
            
            # Save to parquet
            save_to_parquet(pandas_df, output_directory, batch_num)
            
            processed_records += len(batch_records)
            completion_percentage = (processed_records / total_records) * 100
            
            print(f"Progress: {completion_percentage:.2f}% ({processed_records:,} / {total_records:,} records)")
            
    except Exception as e:
        print(f"Error processing data: {str(e)}")
        raise


StatementMeta(, 88a22cf6-2786-4de4-bf71-3e9b5d3b2d5d, 31, Finished, Available, Finished)

In [None]:
if __name__ == "__main__":
    main()

StatementMeta(, 88a22cf6-2786-4de4-bf71-3e9b5d3b2d5d, 32, Submitted, Running, Running)

Successfully saved batch 0000 to /lakehouse/default/Files/data_log/task_logs_20250202_171518_batch_0000.parquet
Progress: 0.50% (100,000 / 20,000,000 records)
Successfully saved batch 0001 to /lakehouse/default/Files/data_log/task_logs_20250202_171526_batch_0001.parquet
Progress: 1.00% (200,000 / 20,000,000 records)
Successfully saved batch 0002 to /lakehouse/default/Files/data_log/task_logs_20250202_171532_batch_0002.parquet
Progress: 1.50% (300,000 / 20,000,000 records)
Successfully saved batch 0003 to /lakehouse/default/Files/data_log/task_logs_20250202_171538_batch_0003.parquet
Progress: 2.00% (400,000 / 20,000,000 records)
Successfully saved batch 0004 to /lakehouse/default/Files/data_log/task_logs_20250202_171545_batch_0004.parquet
Progress: 2.50% (500,000 / 20,000,000 records)
Successfully saved batch 0005 to /lakehouse/default/Files/data_log/task_logs_20250202_171551_batch_0005.parquet
Progress: 3.00% (600,000 / 20,000,000 records)
Successfully saved batch 0006 to /lakehouse/de

Successfully saved batch 0016 to /lakehouse/default/Files/data_log/task_logs_20250202_171659_batch_0016.parquet
Progress: 8.50% (1,700,000 / 20,000,000 records)


Successfully saved batch 0017 to /lakehouse/default/Files/data_log/task_logs_20250202_171706_batch_0017.parquet
Progress: 9.00% (1,800,000 / 20,000,000 records)


Successfully saved batch 0018 to /lakehouse/default/Files/data_log/task_logs_20250202_171712_batch_0018.parquet
Progress: 9.50% (1,900,000 / 20,000,000 records)


Successfully saved batch 0019 to /lakehouse/default/Files/data_log/task_logs_20250202_171718_batch_0019.parquet
Progress: 10.00% (2,000,000 / 20,000,000 records)


Successfully saved batch 0020 to /lakehouse/default/Files/data_log/task_logs_20250202_171725_batch_0020.parquet
Progress: 10.50% (2,100,000 / 20,000,000 records)


Successfully saved batch 0021 to /lakehouse/default/Files/data_log/task_logs_20250202_171731_batch_0021.parquet
Progress: 11.00% (2,200,000 / 20,000,000 records)


Successfully saved batch 0022 to /lakehouse/default/Files/data_log/task_logs_20250202_171737_batch_0022.parquet
Progress: 11.50% (2,300,000 / 20,000,000 records)


Successfully saved batch 0023 to /lakehouse/default/Files/data_log/task_logs_20250202_171744_batch_0023.parquet
Progress: 12.00% (2,400,000 / 20,000,000 records)


Successfully saved batch 0024 to /lakehouse/default/Files/data_log/task_logs_20250202_171750_batch_0024.parquet
Progress: 12.50% (2,500,000 / 20,000,000 records)


Successfully saved batch 0025 to /lakehouse/default/Files/data_log/task_logs_20250202_171756_batch_0025.parquet
Progress: 13.00% (2,600,000 / 20,000,000 records)


Successfully saved batch 0026 to /lakehouse/default/Files/data_log/task_logs_20250202_171802_batch_0026.parquet
Progress: 13.50% (2,700,000 / 20,000,000 records)


Successfully saved batch 0027 to /lakehouse/default/Files/data_log/task_logs_20250202_171808_batch_0027.parquet
Progress: 14.00% (2,800,000 / 20,000,000 records)


Successfully saved batch 0028 to /lakehouse/default/Files/data_log/task_logs_20250202_171815_batch_0028.parquet
Progress: 14.50% (2,900,000 / 20,000,000 records)


Successfully saved batch 0029 to /lakehouse/default/Files/data_log/task_logs_20250202_171821_batch_0029.parquet
Progress: 15.00% (3,000,000 / 20,000,000 records)


Successfully saved batch 0030 to /lakehouse/default/Files/data_log/task_logs_20250202_171827_batch_0030.parquet
Progress: 15.50% (3,100,000 / 20,000,000 records)


Successfully saved batch 0031 to /lakehouse/default/Files/data_log/task_logs_20250202_171833_batch_0031.parquet
Progress: 16.00% (3,200,000 / 20,000,000 records)


Successfully saved batch 0032 to /lakehouse/default/Files/data_log/task_logs_20250202_171839_batch_0032.parquet
Progress: 16.50% (3,300,000 / 20,000,000 records)


Successfully saved batch 0033 to /lakehouse/default/Files/data_log/task_logs_20250202_171846_batch_0033.parquet
Progress: 17.00% (3,400,000 / 20,000,000 records)


Successfully saved batch 0034 to /lakehouse/default/Files/data_log/task_logs_20250202_171852_batch_0034.parquet
Progress: 17.50% (3,500,000 / 20,000,000 records)


Successfully saved batch 0035 to /lakehouse/default/Files/data_log/task_logs_20250202_171858_batch_0035.parquet
Progress: 18.00% (3,600,000 / 20,000,000 records)


Successfully saved batch 0036 to /lakehouse/default/Files/data_log/task_logs_20250202_171905_batch_0036.parquet
Progress: 18.50% (3,700,000 / 20,000,000 records)


Successfully saved batch 0037 to /lakehouse/default/Files/data_log/task_logs_20250202_171911_batch_0037.parquet
Progress: 19.00% (3,800,000 / 20,000,000 records)


Successfully saved batch 0038 to /lakehouse/default/Files/data_log/task_logs_20250202_171917_batch_0038.parquet
Progress: 19.50% (3,900,000 / 20,000,000 records)


Successfully saved batch 0039 to /lakehouse/default/Files/data_log/task_logs_20250202_171923_batch_0039.parquet
Progress: 20.00% (4,000,000 / 20,000,000 records)


Successfully saved batch 0040 to /lakehouse/default/Files/data_log/task_logs_20250202_171930_batch_0040.parquet
Progress: 20.50% (4,100,000 / 20,000,000 records)


Successfully saved batch 0041 to /lakehouse/default/Files/data_log/task_logs_20250202_171936_batch_0041.parquet
Progress: 21.00% (4,200,000 / 20,000,000 records)


Successfully saved batch 0042 to /lakehouse/default/Files/data_log/task_logs_20250202_171942_batch_0042.parquet
Progress: 21.50% (4,300,000 / 20,000,000 records)


Successfully saved batch 0043 to /lakehouse/default/Files/data_log/task_logs_20250202_171948_batch_0043.parquet
Progress: 22.00% (4,400,000 / 20,000,000 records)


Successfully saved batch 0044 to /lakehouse/default/Files/data_log/task_logs_20250202_171954_batch_0044.parquet
Progress: 22.50% (4,500,000 / 20,000,000 records)


Successfully saved batch 0045 to /lakehouse/default/Files/data_log/task_logs_20250202_172001_batch_0045.parquet
Progress: 23.00% (4,600,000 / 20,000,000 records)
Successfully saved batch 0046 to /lakehouse/default/Files/data_log/task_logs_20250202_172007_batch_0046.parquet
Progress: 23.50% (4,700,000 / 20,000,000 records)
Successfully saved batch 0047 to /lakehouse/default/Files/data_log/task_logs_20250202_172013_batch_0047.parquet
Progress: 24.00% (4,800,000 / 20,000,000 records)
Successfully saved batch 0048 to /lakehouse/default/Files/data_log/task_logs_20250202_172019_batch_0048.parquet
Progress: 24.50% (4,900,000 / 20,000,000 records)


Successfully saved batch 0049 to /lakehouse/default/Files/data_log/task_logs_20250202_172026_batch_0049.parquet
Progress: 25.00% (5,000,000 / 20,000,000 records)


Successfully saved batch 0050 to /lakehouse/default/Files/data_log/task_logs_20250202_172032_batch_0050.parquet
Progress: 25.50% (5,100,000 / 20,000,000 records)


Successfully saved batch 0051 to /lakehouse/default/Files/data_log/task_logs_20250202_172038_batch_0051.parquet
Progress: 26.00% (5,200,000 / 20,000,000 records)


Successfully saved batch 0052 to /lakehouse/default/Files/data_log/task_logs_20250202_172044_batch_0052.parquet
Progress: 26.50% (5,300,000 / 20,000,000 records)


Successfully saved batch 0053 to /lakehouse/default/Files/data_log/task_logs_20250202_172050_batch_0053.parquet
Progress: 27.00% (5,400,000 / 20,000,000 records)


Successfully saved batch 0054 to /lakehouse/default/Files/data_log/task_logs_20250202_172056_batch_0054.parquet
Progress: 27.50% (5,500,000 / 20,000,000 records)


Successfully saved batch 0055 to /lakehouse/default/Files/data_log/task_logs_20250202_172103_batch_0055.parquet
Progress: 28.00% (5,600,000 / 20,000,000 records)


Successfully saved batch 0056 to /lakehouse/default/Files/data_log/task_logs_20250202_172109_batch_0056.parquet
Progress: 28.50% (5,700,000 / 20,000,000 records)


Successfully saved batch 0057 to /lakehouse/default/Files/data_log/task_logs_20250202_172116_batch_0057.parquet
Progress: 29.00% (5,800,000 / 20,000,000 records)


Successfully saved batch 0058 to /lakehouse/default/Files/data_log/task_logs_20250202_172122_batch_0058.parquet
Progress: 29.50% (5,900,000 / 20,000,000 records)


Successfully saved batch 0059 to /lakehouse/default/Files/data_log/task_logs_20250202_172128_batch_0059.parquet
Progress: 30.00% (6,000,000 / 20,000,000 records)


Successfully saved batch 0060 to /lakehouse/default/Files/data_log/task_logs_20250202_172135_batch_0060.parquet
Progress: 30.50% (6,100,000 / 20,000,000 records)


Successfully saved batch 0061 to /lakehouse/default/Files/data_log/task_logs_20250202_172141_batch_0061.parquet
Progress: 31.00% (6,200,000 / 20,000,000 records)


Successfully saved batch 0062 to /lakehouse/default/Files/data_log/task_logs_20250202_172147_batch_0062.parquet
Progress: 31.50% (6,300,000 / 20,000,000 records)
Successfully saved batch 0063 to /lakehouse/default/Files/data_log/task_logs_20250202_172154_batch_0063.parquet
Progress: 32.00% (6,400,000 / 20,000,000 records)
Successfully saved batch 0064 to /lakehouse/default/Files/data_log/task_logs_20250202_172200_batch_0064.parquet
Progress: 32.50% (6,500,000 / 20,000,000 records)


Successfully saved batch 0065 to /lakehouse/default/Files/data_log/task_logs_20250202_172206_batch_0065.parquet
Progress: 33.00% (6,600,000 / 20,000,000 records)


Successfully saved batch 0066 to /lakehouse/default/Files/data_log/task_logs_20250202_172212_batch_0066.parquet
Progress: 33.50% (6,700,000 / 20,000,000 records)


Successfully saved batch 0067 to /lakehouse/default/Files/data_log/task_logs_20250202_172219_batch_0067.parquet
Progress: 34.00% (6,800,000 / 20,000,000 records)


Successfully saved batch 0068 to /lakehouse/default/Files/data_log/task_logs_20250202_172225_batch_0068.parquet
Progress: 34.50% (6,900,000 / 20,000,000 records)


Successfully saved batch 0069 to /lakehouse/default/Files/data_log/task_logs_20250202_172231_batch_0069.parquet
Progress: 35.00% (7,000,000 / 20,000,000 records)


Successfully saved batch 0070 to /lakehouse/default/Files/data_log/task_logs_20250202_172237_batch_0070.parquet
Progress: 35.50% (7,100,000 / 20,000,000 records)


Successfully saved batch 0071 to /lakehouse/default/Files/data_log/task_logs_20250202_172243_batch_0071.parquet
Progress: 36.00% (7,200,000 / 20,000,000 records)


Successfully saved batch 0072 to /lakehouse/default/Files/data_log/task_logs_20250202_172249_batch_0072.parquet
Progress: 36.50% (7,300,000 / 20,000,000 records)


Successfully saved batch 0073 to /lakehouse/default/Files/data_log/task_logs_20250202_172256_batch_0073.parquet
Progress: 37.00% (7,400,000 / 20,000,000 records)


Successfully saved batch 0074 to /lakehouse/default/Files/data_log/task_logs_20250202_172302_batch_0074.parquet
Progress: 37.50% (7,500,000 / 20,000,000 records)
Successfully saved batch 0075 to /lakehouse/default/Files/data_log/task_logs_20250202_172308_batch_0075.parquet
Progress: 38.00% (7,600,000 / 20,000,000 records)


Successfully saved batch 0076 to /lakehouse/default/Files/data_log/task_logs_20250202_172314_batch_0076.parquet
Progress: 38.50% (7,700,000 / 20,000,000 records)


Successfully saved batch 0077 to /lakehouse/default/Files/data_log/task_logs_20250202_172320_batch_0077.parquet
Progress: 39.00% (7,800,000 / 20,000,000 records)


Successfully saved batch 0078 to /lakehouse/default/Files/data_log/task_logs_20250202_172327_batch_0078.parquet
Progress: 39.50% (7,900,000 / 20,000,000 records)


Successfully saved batch 0079 to /lakehouse/default/Files/data_log/task_logs_20250202_172333_batch_0079.parquet
Progress: 40.00% (8,000,000 / 20,000,000 records)


Successfully saved batch 0080 to /lakehouse/default/Files/data_log/task_logs_20250202_172339_batch_0080.parquet
Progress: 40.50% (8,100,000 / 20,000,000 records)


Successfully saved batch 0081 to /lakehouse/default/Files/data_log/task_logs_20250202_172345_batch_0081.parquet
Progress: 41.00% (8,200,000 / 20,000,000 records)


Successfully saved batch 0082 to /lakehouse/default/Files/data_log/task_logs_20250202_172351_batch_0082.parquet
Progress: 41.50% (8,300,000 / 20,000,000 records)


Successfully saved batch 0083 to /lakehouse/default/Files/data_log/task_logs_20250202_172358_batch_0083.parquet
Progress: 42.00% (8,400,000 / 20,000,000 records)


Successfully saved batch 0084 to /lakehouse/default/Files/data_log/task_logs_20250202_172404_batch_0084.parquet
Progress: 42.50% (8,500,000 / 20,000,000 records)


Successfully saved batch 0085 to /lakehouse/default/Files/data_log/task_logs_20250202_172410_batch_0085.parquet
Progress: 43.00% (8,600,000 / 20,000,000 records)


Successfully saved batch 0086 to /lakehouse/default/Files/data_log/task_logs_20250202_172417_batch_0086.parquet
Progress: 43.50% (8,700,000 / 20,000,000 records)


Successfully saved batch 0087 to /lakehouse/default/Files/data_log/task_logs_20250202_172423_batch_0087.parquet
Progress: 44.00% (8,800,000 / 20,000,000 records)


Successfully saved batch 0088 to /lakehouse/default/Files/data_log/task_logs_20250202_172429_batch_0088.parquet
Progress: 44.50% (8,900,000 / 20,000,000 records)


Successfully saved batch 0089 to /lakehouse/default/Files/data_log/task_logs_20250202_172435_batch_0089.parquet
Progress: 45.00% (9,000,000 / 20,000,000 records)
Successfully saved batch 0090 to /lakehouse/default/Files/data_log/task_logs_20250202_172442_batch_0090.parquet
Progress: 45.50% (9,100,000 / 20,000,000 records)


Successfully saved batch 0091 to /lakehouse/default/Files/data_log/task_logs_20250202_172448_batch_0091.parquet
Progress: 46.00% (9,200,000 / 20,000,000 records)
Successfully saved batch 0092 to /lakehouse/default/Files/data_log/task_logs_20250202_172454_batch_0092.parquet
Progress: 46.50% (9,300,000 / 20,000,000 records)


Successfully saved batch 0093 to /lakehouse/default/Files/data_log/task_logs_20250202_172501_batch_0093.parquet
Progress: 47.00% (9,400,000 / 20,000,000 records)


Successfully saved batch 0094 to /lakehouse/default/Files/data_log/task_logs_20250202_172507_batch_0094.parquet
Progress: 47.50% (9,500,000 / 20,000,000 records)


Successfully saved batch 0095 to /lakehouse/default/Files/data_log/task_logs_20250202_172513_batch_0095.parquet
Progress: 48.00% (9,600,000 / 20,000,000 records)


Successfully saved batch 0096 to /lakehouse/default/Files/data_log/task_logs_20250202_172519_batch_0096.parquet
Progress: 48.50% (9,700,000 / 20,000,000 records)


Successfully saved batch 0097 to /lakehouse/default/Files/data_log/task_logs_20250202_172526_batch_0097.parquet
Progress: 49.00% (9,800,000 / 20,000,000 records)


Successfully saved batch 0098 to /lakehouse/default/Files/data_log/task_logs_20250202_172532_batch_0098.parquet
Progress: 49.50% (9,900,000 / 20,000,000 records)


Successfully saved batch 0099 to /lakehouse/default/Files/data_log/task_logs_20250202_172538_batch_0099.parquet
Progress: 50.00% (10,000,000 / 20,000,000 records)


Successfully saved batch 0100 to /lakehouse/default/Files/data_log/task_logs_20250202_172545_batch_0100.parquet
Progress: 50.50% (10,100,000 / 20,000,000 records)


Successfully saved batch 0101 to /lakehouse/default/Files/data_log/task_logs_20250202_172551_batch_0101.parquet
Progress: 51.00% (10,200,000 / 20,000,000 records)


Successfully saved batch 0102 to /lakehouse/default/Files/data_log/task_logs_20250202_172558_batch_0102.parquet
Progress: 51.50% (10,300,000 / 20,000,000 records)


Successfully saved batch 0103 to /lakehouse/default/Files/data_log/task_logs_20250202_172604_batch_0103.parquet
Progress: 52.00% (10,400,000 / 20,000,000 records)
Successfully saved batch 0104 to /lakehouse/default/Files/data_log/task_logs_20250202_172610_batch_0104.parquet
Progress: 52.50% (10,500,000 / 20,000,000 records)
Successfully saved batch 0105 to /lakehouse/default/Files/data_log/task_logs_20250202_172616_batch_0105.parquet
Progress: 53.00% (10,600,000 / 20,000,000 records)
Successfully saved batch 0106 to /lakehouse/default/Files/data_log/task_logs_20250202_172623_batch_0106.parquet
Progress: 53.50% (10,700,000 / 20,000,000 records)
Successfully saved batch 0107 to /lakehouse/default/Files/data_log/task_logs_20250202_172629_batch_0107.parquet
Progress: 54.00% (10,800,000 / 20,000,000 records)


Successfully saved batch 0108 to /lakehouse/default/Files/data_log/task_logs_20250202_172635_batch_0108.parquet
Progress: 54.50% (10,900,000 / 20,000,000 records)


Successfully saved batch 0109 to /lakehouse/default/Files/data_log/task_logs_20250202_172641_batch_0109.parquet
Progress: 55.00% (11,000,000 / 20,000,000 records)


Successfully saved batch 0110 to /lakehouse/default/Files/data_log/task_logs_20250202_172648_batch_0110.parquet
Progress: 55.50% (11,100,000 / 20,000,000 records)


Successfully saved batch 0111 to /lakehouse/default/Files/data_log/task_logs_20250202_172654_batch_0111.parquet
Progress: 56.00% (11,200,000 / 20,000,000 records)


Successfully saved batch 0112 to /lakehouse/default/Files/data_log/task_logs_20250202_172700_batch_0112.parquet
Progress: 56.50% (11,300,000 / 20,000,000 records)


Successfully saved batch 0113 to /lakehouse/default/Files/data_log/task_logs_20250202_172706_batch_0113.parquet
Progress: 57.00% (11,400,000 / 20,000,000 records)


Successfully saved batch 0114 to /lakehouse/default/Files/data_log/task_logs_20250202_172713_batch_0114.parquet
Progress: 57.50% (11,500,000 / 20,000,000 records)


Successfully saved batch 0115 to /lakehouse/default/Files/data_log/task_logs_20250202_172719_batch_0115.parquet
Progress: 58.00% (11,600,000 / 20,000,000 records)


Successfully saved batch 0116 to /lakehouse/default/Files/data_log/task_logs_20250202_172725_batch_0116.parquet
Progress: 58.50% (11,700,000 / 20,000,000 records)


Successfully saved batch 0117 to /lakehouse/default/Files/data_log/task_logs_20250202_172731_batch_0117.parquet
Progress: 59.00% (11,800,000 / 20,000,000 records)


Successfully saved batch 0118 to /lakehouse/default/Files/data_log/task_logs_20250202_172737_batch_0118.parquet
Progress: 59.50% (11,900,000 / 20,000,000 records)


Successfully saved batch 0119 to /lakehouse/default/Files/data_log/task_logs_20250202_172743_batch_0119.parquet
Progress: 60.00% (12,000,000 / 20,000,000 records)


Successfully saved batch 0120 to /lakehouse/default/Files/data_log/task_logs_20250202_172750_batch_0120.parquet
Progress: 60.50% (12,100,000 / 20,000,000 records)


Successfully saved batch 0121 to /lakehouse/default/Files/data_log/task_logs_20250202_172756_batch_0121.parquet
Progress: 61.00% (12,200,000 / 20,000,000 records)


Successfully saved batch 0122 to /lakehouse/default/Files/data_log/task_logs_20250202_172802_batch_0122.parquet
Progress: 61.50% (12,300,000 / 20,000,000 records)


Successfully saved batch 0123 to /lakehouse/default/Files/data_log/task_logs_20250202_172809_batch_0123.parquet
Progress: 62.00% (12,400,000 / 20,000,000 records)


Successfully saved batch 0124 to /lakehouse/default/Files/data_log/task_logs_20250202_172815_batch_0124.parquet
Progress: 62.50% (12,500,000 / 20,000,000 records)


Successfully saved batch 0125 to /lakehouse/default/Files/data_log/task_logs_20250202_172821_batch_0125.parquet
Progress: 63.00% (12,600,000 / 20,000,000 records)


Successfully saved batch 0126 to /lakehouse/default/Files/data_log/task_logs_20250202_172828_batch_0126.parquet
Progress: 63.50% (12,700,000 / 20,000,000 records)


Successfully saved batch 0127 to /lakehouse/default/Files/data_log/task_logs_20250202_172834_batch_0127.parquet
Progress: 64.00% (12,800,000 / 20,000,000 records)


Successfully saved batch 0128 to /lakehouse/default/Files/data_log/task_logs_20250202_172840_batch_0128.parquet
Progress: 64.50% (12,900,000 / 20,000,000 records)


Successfully saved batch 0129 to /lakehouse/default/Files/data_log/task_logs_20250202_172846_batch_0129.parquet
Progress: 65.00% (13,000,000 / 20,000,000 records)


Successfully saved batch 0130 to /lakehouse/default/Files/data_log/task_logs_20250202_172853_batch_0130.parquet
Progress: 65.50% (13,100,000 / 20,000,000 records)


Successfully saved batch 0131 to /lakehouse/default/Files/data_log/task_logs_20250202_172859_batch_0131.parquet
Progress: 66.00% (13,200,000 / 20,000,000 records)


Successfully saved batch 0132 to /lakehouse/default/Files/data_log/task_logs_20250202_172905_batch_0132.parquet
Progress: 66.50% (13,300,000 / 20,000,000 records)


Successfully saved batch 0133 to /lakehouse/default/Files/data_log/task_logs_20250202_172911_batch_0133.parquet
Progress: 67.00% (13,400,000 / 20,000,000 records)


Successfully saved batch 0134 to /lakehouse/default/Files/data_log/task_logs_20250202_172917_batch_0134.parquet
Progress: 67.50% (13,500,000 / 20,000,000 records)


Successfully saved batch 0135 to /lakehouse/default/Files/data_log/task_logs_20250202_172924_batch_0135.parquet
Progress: 68.00% (13,600,000 / 20,000,000 records)


Successfully saved batch 0136 to /lakehouse/default/Files/data_log/task_logs_20250202_172930_batch_0136.parquet
Progress: 68.50% (13,700,000 / 20,000,000 records)
Successfully saved batch 0137 to /lakehouse/default/Files/data_log/task_logs_20250202_172936_batch_0137.parquet
Progress: 69.00% (13,800,000 / 20,000,000 records)
Successfully saved batch 0138 to /lakehouse/default/Files/data_log/task_logs_20250202_172942_batch_0138.parquet
Progress: 69.50% (13,900,000 / 20,000,000 records)
Successfully saved batch 0139 to /lakehouse/default/Files/data_log/task_logs_20250202_172949_batch_0139.parquet
Progress: 70.00% (14,000,000 / 20,000,000 records)
Successfully saved batch 0140 to /lakehouse/default/Files/data_log/task_logs_20250202_172955_batch_0140.parquet
Progress: 70.50% (14,100,000 / 20,000,000 records)
Successfully saved batch 0141 to /lakehouse/default/Files/data_log/task_logs_20250202_173001_batch_0141.parquet
Progress: 71.00% (14,200,000 / 20,000,000 records)


Successfully saved batch 0142 to /lakehouse/default/Files/data_log/task_logs_20250202_173007_batch_0142.parquet
Progress: 71.50% (14,300,000 / 20,000,000 records)
Successfully saved batch 0143 to /lakehouse/default/Files/data_log/task_logs_20250202_173014_batch_0143.parquet
Progress: 72.00% (14,400,000 / 20,000,000 records)
Successfully saved batch 0144 to /lakehouse/default/Files/data_log/task_logs_20250202_173020_batch_0144.parquet
Progress: 72.50% (14,500,000 / 20,000,000 records)


Successfully saved batch 0145 to /lakehouse/default/Files/data_log/task_logs_20250202_173026_batch_0145.parquet
Progress: 73.00% (14,600,000 / 20,000,000 records)


Successfully saved batch 0146 to /lakehouse/default/Files/data_log/task_logs_20250202_173032_batch_0146.parquet
Progress: 73.50% (14,700,000 / 20,000,000 records)


Successfully saved batch 0147 to /lakehouse/default/Files/data_log/task_logs_20250202_173038_batch_0147.parquet
Progress: 74.00% (14,800,000 / 20,000,000 records)


Successfully saved batch 0148 to /lakehouse/default/Files/data_log/task_logs_20250202_173045_batch_0148.parquet
Progress: 74.50% (14,900,000 / 20,000,000 records)


Successfully saved batch 0149 to /lakehouse/default/Files/data_log/task_logs_20250202_173051_batch_0149.parquet
Progress: 75.00% (15,000,000 / 20,000,000 records)


Successfully saved batch 0150 to /lakehouse/default/Files/data_log/task_logs_20250202_173057_batch_0150.parquet
Progress: 75.50% (15,100,000 / 20,000,000 records)


Successfully saved batch 0151 to /lakehouse/default/Files/data_log/task_logs_20250202_173103_batch_0151.parquet
Progress: 76.00% (15,200,000 / 20,000,000 records)


Successfully saved batch 0152 to /lakehouse/default/Files/data_log/task_logs_20250202_173110_batch_0152.parquet
Progress: 76.50% (15,300,000 / 20,000,000 records)


Successfully saved batch 0153 to /lakehouse/default/Files/data_log/task_logs_20250202_173116_batch_0153.parquet
Progress: 77.00% (15,400,000 / 20,000,000 records)


Successfully saved batch 0154 to /lakehouse/default/Files/data_log/task_logs_20250202_173122_batch_0154.parquet
Progress: 77.50% (15,500,000 / 20,000,000 records)


Successfully saved batch 0155 to /lakehouse/default/Files/data_log/task_logs_20250202_173128_batch_0155.parquet
Progress: 78.00% (15,600,000 / 20,000,000 records)


Successfully saved batch 0156 to /lakehouse/default/Files/data_log/task_logs_20250202_173134_batch_0156.parquet
Progress: 78.50% (15,700,000 / 20,000,000 records)


Successfully saved batch 0157 to /lakehouse/default/Files/data_log/task_logs_20250202_173140_batch_0157.parquet
Progress: 79.00% (15,800,000 / 20,000,000 records)


Successfully saved batch 0158 to /lakehouse/default/Files/data_log/task_logs_20250202_173147_batch_0158.parquet
Progress: 79.50% (15,900,000 / 20,000,000 records)


Successfully saved batch 0159 to /lakehouse/default/Files/data_log/task_logs_20250202_173153_batch_0159.parquet
Progress: 80.00% (16,000,000 / 20,000,000 records)


Successfully saved batch 0160 to /lakehouse/default/Files/data_log/task_logs_20250202_173159_batch_0160.parquet
Progress: 80.50% (16,100,000 / 20,000,000 records)


Successfully saved batch 0161 to /lakehouse/default/Files/data_log/task_logs_20250202_173206_batch_0161.parquet
Progress: 81.00% (16,200,000 / 20,000,000 records)


Successfully saved batch 0162 to /lakehouse/default/Files/data_log/task_logs_20250202_173212_batch_0162.parquet
Progress: 81.50% (16,300,000 / 20,000,000 records)


Successfully saved batch 0163 to /lakehouse/default/Files/data_log/task_logs_20250202_173218_batch_0163.parquet
Progress: 82.00% (16,400,000 / 20,000,000 records)


Successfully saved batch 0164 to /lakehouse/default/Files/data_log/task_logs_20250202_173224_batch_0164.parquet
Progress: 82.50% (16,500,000 / 20,000,000 records)


Successfully saved batch 0165 to /lakehouse/default/Files/data_log/task_logs_20250202_173231_batch_0165.parquet
Progress: 83.00% (16,600,000 / 20,000,000 records)


Successfully saved batch 0166 to /lakehouse/default/Files/data_log/task_logs_20250202_173237_batch_0166.parquet
Progress: 83.50% (16,700,000 / 20,000,000 records)


Successfully saved batch 0167 to /lakehouse/default/Files/data_log/task_logs_20250202_173243_batch_0167.parquet
Progress: 84.00% (16,800,000 / 20,000,000 records)


Successfully saved batch 0168 to /lakehouse/default/Files/data_log/task_logs_20250202_173249_batch_0168.parquet
Progress: 84.50% (16,900,000 / 20,000,000 records)


Successfully saved batch 0169 to /lakehouse/default/Files/data_log/task_logs_20250202_173255_batch_0169.parquet
Progress: 85.00% (17,000,000 / 20,000,000 records)


Successfully saved batch 0170 to /lakehouse/default/Files/data_log/task_logs_20250202_173302_batch_0170.parquet
Progress: 85.50% (17,100,000 / 20,000,000 records)


Successfully saved batch 0171 to /lakehouse/default/Files/data_log/task_logs_20250202_173308_batch_0171.parquet
Progress: 86.00% (17,200,000 / 20,000,000 records)


Successfully saved batch 0172 to /lakehouse/default/Files/data_log/task_logs_20250202_173314_batch_0172.parquet
Progress: 86.50% (17,300,000 / 20,000,000 records)


Successfully saved batch 0173 to /lakehouse/default/Files/data_log/task_logs_20250202_173321_batch_0173.parquet
Progress: 87.00% (17,400,000 / 20,000,000 records)


Successfully saved batch 0174 to /lakehouse/default/Files/data_log/task_logs_20250202_173327_batch_0174.parquet
Progress: 87.50% (17,500,000 / 20,000,000 records)


Successfully saved batch 0175 to /lakehouse/default/Files/data_log/task_logs_20250202_173333_batch_0175.parquet
Progress: 88.00% (17,600,000 / 20,000,000 records)


Successfully saved batch 0176 to /lakehouse/default/Files/data_log/task_logs_20250202_173339_batch_0176.parquet
Progress: 88.50% (17,700,000 / 20,000,000 records)


Successfully saved batch 0177 to /lakehouse/default/Files/data_log/task_logs_20250202_173346_batch_0177.parquet
Progress: 89.00% (17,800,000 / 20,000,000 records)


Successfully saved batch 0178 to /lakehouse/default/Files/data_log/task_logs_20250202_173352_batch_0178.parquet
Progress: 89.50% (17,900,000 / 20,000,000 records)


Successfully saved batch 0179 to /lakehouse/default/Files/data_log/task_logs_20250202_173358_batch_0179.parquet
Progress: 90.00% (18,000,000 / 20,000,000 records)


Successfully saved batch 0180 to /lakehouse/default/Files/data_log/task_logs_20250202_173405_batch_0180.parquet
Progress: 90.50% (18,100,000 / 20,000,000 records)


Successfully saved batch 0181 to /lakehouse/default/Files/data_log/task_logs_20250202_173411_batch_0181.parquet
Progress: 91.00% (18,200,000 / 20,000,000 records)


Successfully saved batch 0182 to /lakehouse/default/Files/data_log/task_logs_20250202_173417_batch_0182.parquet
Progress: 91.50% (18,300,000 / 20,000,000 records)


Successfully saved batch 0183 to /lakehouse/default/Files/data_log/task_logs_20250202_173424_batch_0183.parquet
Progress: 92.00% (18,400,000 / 20,000,000 records)


Successfully saved batch 0184 to /lakehouse/default/Files/data_log/task_logs_20250202_173430_batch_0184.parquet
Progress: 92.50% (18,500,000 / 20,000,000 records)


Successfully saved batch 0185 to /lakehouse/default/Files/data_log/task_logs_20250202_173436_batch_0185.parquet
Progress: 93.00% (18,600,000 / 20,000,000 records)


Successfully saved batch 0186 to /lakehouse/default/Files/data_log/task_logs_20250202_173442_batch_0186.parquet
Progress: 93.50% (18,700,000 / 20,000,000 records)


Successfully saved batch 0187 to /lakehouse/default/Files/data_log/task_logs_20250202_173449_batch_0187.parquet
Progress: 94.00% (18,800,000 / 20,000,000 records)


Successfully saved batch 0188 to /lakehouse/default/Files/data_log/task_logs_20250202_173455_batch_0188.parquet
Progress: 94.50% (18,900,000 / 20,000,000 records)


Successfully saved batch 0189 to /lakehouse/default/Files/data_log/task_logs_20250202_173501_batch_0189.parquet
Progress: 95.00% (19,000,000 / 20,000,000 records)


Successfully saved batch 0190 to /lakehouse/default/Files/data_log/task_logs_20250202_173507_batch_0190.parquet
Progress: 95.50% (19,100,000 / 20,000,000 records)


Successfully saved batch 0191 to /lakehouse/default/Files/data_log/task_logs_20250202_173514_batch_0191.parquet
Progress: 96.00% (19,200,000 / 20,000,000 records)


Successfully saved batch 0192 to /lakehouse/default/Files/data_log/task_logs_20250202_173520_batch_0192.parquet
Progress: 96.50% (19,300,000 / 20,000,000 records)


Successfully saved batch 0193 to /lakehouse/default/Files/data_log/task_logs_20250202_173526_batch_0193.parquet
Progress: 97.00% (19,400,000 / 20,000,000 records)


Successfully saved batch 0194 to /lakehouse/default/Files/data_log/task_logs_20250202_173533_batch_0194.parquet
Progress: 97.50% (19,500,000 / 20,000,000 records)


Successfully saved batch 0195 to /lakehouse/default/Files/data_log/task_logs_20250202_173539_batch_0195.parquet
Progress: 98.00% (19,600,000 / 20,000,000 records)


Successfully saved batch 0196 to /lakehouse/default/Files/data_log/task_logs_20250202_173545_batch_0196.parquet
Progress: 98.50% (19,700,000 / 20,000,000 records)


Successfully saved batch 0197 to /lakehouse/default/Files/data_log/task_logs_20250202_173552_batch_0197.parquet
Progress: 99.00% (19,800,000 / 20,000,000 records)


Successfully saved batch 0198 to /lakehouse/default/Files/data_log/task_logs_20250202_173558_batch_0198.parquet
Progress: 99.50% (19,900,000 / 20,000,000 records)


Successfully saved batch 0199 to /lakehouse/default/Files/data_log/task_logs_20250202_173604_batch_0199.parquet
Progress: 100.00% (20,000,000 / 20,000,000 records)
