In [4]:
import pandas as pd
import json
import numpy as np
import psutil
import os
import time

def calculate_costs(df):
    total_cost = df['line_item_unblended_cost'].sum()
    usage_cost = df.loc[df['line_item_line_item_type'] != 'Tax', 'line_item_unblended_cost'].sum()
    tax_cost = df.loc[df['line_item_line_item_type'] == 'Tax', 'line_item_unblended_cost'].sum()
    return total_cost, usage_cost, tax_cost

def extract_region_wise_details(df):
    region_wise_details = {}
    grouped = df.groupby('line_item_usage_account_id')

    for region, group in grouped:
        region_total_cost, region_usage_cost, _ = calculate_costs(group)
        region_details = {}

        for usage_type, usage_group in group.groupby('line_item_usage_type'):
            usage_total_cost = usage_group['line_item_unblended_cost'].sum()
            usage_details = {
                row['identity_line_item_id']: {
                    'total_cost': row['line_item_unblended_cost'],
                    'identityLineItemId': row['identity_line_item_id'],
                    'line_item_resource_id': row['line_item_resource_id'],
                    'region': row['line_item_usage_account_id'],
                    'line_item_product_code': row['line_item_product_code'],
                    'product_usagetype': row['line_item_usage_type']
                }
                for idx, row in usage_group.iterrows()
            }
            region_details[usage_type] = {
                f"{usage_type}_total_cost": usage_total_cost,
                f"{usage_type}_details": usage_details
            }
        region_wise_details[region] = [{
            'total_cost': region_total_cost,
            'usage_cost': region_usage_cost,
            'details': region_details
        }]

    return region_wise_details

def main(parquet_file_path, output_file_path):
    # Read the parquet file
    df = pd.read_parquet(parquet_file_path, columns=[
        'line_item_product_code', 'line_item_unblended_cost', 
        'line_item_line_item_type', 'line_item_usage_account_id',
        'line_item_usage_type', 'identity_line_item_id', 
        'line_item_resource_id'
    ])
    
    # Calculate costs for AWSCostExplorer
    filtered_df = df[df['line_item_product_code'] == 'AWSCostExplorer']
    cost_explorer_costs = calculate_costs(filtered_df)
    
    # Extract region-wise details
    cost_explorer_region_wise_details = extract_region_wise_details(filtered_df)
    
    # Prepare output
    output = {
        "details": {
            "AWSCostExplorer": {
                "total_cost": cost_explorer_costs[0],
                "usage_cost": cost_explorer_costs[1],
                "tax_cost": cost_explorer_costs[2],
                "region_wise_details": cost_explorer_region_wise_details
            }
        }
    }
    
    
 
   
# Start timer and monitor initial CPU and memory usage
process = psutil.Process(os.getpid())
start_time = time.time()
cpu_percent_start = process.cpu_percent(interval=0.1)
memory_usage_start = process.memory_info().rss / (1024 * 1024)

# Example usage
parquet_file = 'CUR10MB.parquet'
output = calculate_costs(parquet_file)

# Convert the output dictionary to a JSON string with double quotes
output_json = json.dumps(output, indent=4)
print(output_json)

# Store the output JSON string to a file
with open('output.json', 'w') as f:
    f.write(output_json)

# Monitor final CPU and memory usage and end timer
cpu_percent_end = process.cpu_percent(interval=0.1)
memory_usage_end = process.memory_info().rss / (1024 * 1024)
end_time = time.time()

# Calculate time taken, average CPU percentage, and average memory usage
time_taken = end_time - start_time
average_cpu_percent = (cpu_percent_start + cpu_percent_end) / 2
average_memory_usage = (memory_usage_start + memory_usage_end) / 2

# Print outputs
print(f"Time taken: {time_taken:.2f} seconds")
print(f"Average CPU percentage: {average_cpu_percent:.2f}%")
print(f"Average memory usage: {average_memory_usage:.2f} MB")
print(f"System configuration: {psutil.virtual_memory().total / (1024 * 1024):.2f} MB RAM")

TypeError: string indices must be integers, not 'str'