In [16]:
import json

In [17]:
json_file = json.load(open('sample-dataset-3.json'))
json_file

[{'transaction_id': 'T123456',
  'timestamp': '2024-01-15T14:30:00',
  'customer': {'id': 'CUS123',
   'region': 'North',
   'segment': 'Premium',
   'join_date': '2023-01-01'},
  'items': [{'product_id': 'P789',
    'category': 'Electronics',
    'subcategory': 'Smartphones',
    'price': 499.99,
    'quantity': 1,
    'discount': 0.0},
   {'product_id': 'P456',
    'category': 'Accessories',
    'subcategory': 'Phone Cases',
    'price': 29.99,
    'quantity': 2,
    'discount': 5.0}],
  'payment_method': 'credit_card',
  'status': 'completed',
  'shipping': {'method': 'express',
   'cost': 15.99,
   'address': {'city': 'Boston', 'state': 'MA', 'country': 'USA'}}},
 {'transaction_id': 'T123457',
  'timestamp': '2024-01-15T14:45:00',
  'customer': {'id': 'CUS456',
   'region': 'South',
   'segment': 'Standard',
   'join_date': '2023-06-15'},
  'items': [{'product_id': 'P234',
    'category': 'Books',
    'subcategory': 'Fiction',
    'price': 24.99,
    'quantity': 3,
    'discount': 

In [18]:
def transform_data(json_file):

    total_transaction=0
    for transaction in json_file:
        for item in transaction['items']:
            total_transaction += (item['price']*item['quantity'])-item['discount']
    print(f'Total Transaction is : {total_transaction}')

    def temp_func(x):
        for item in x['items']:
            return item['category']
    print("\nUnique Product Categories :")
    print(set(map(temp_func,json_file)))

transform_data(json_file)

Total Transaction is : 2049.86

Unique Product Categories :
{'Clothing', 'Home', 'Electronics', 'Books'}


In [19]:
def analysis_data(json_file):
    regional_dict = {}
    for transaction in json_file:
        region = transaction['customer']['region']
        if transaction['customer']['region'] in regional_dict.keys():
            for item in transaction['items']:
                regional_dict[region] += (item['price']*item['quantity'])-item['discount']
        else:
            for item in transaction['items']:
                regional_dict[region] = (item['price']*item['quantity'])-item['discount']
    print ("Grouped transactions by region and calculated regional sales :")
    print(regional_dict)


    dict_of_products = {}
    for transaction in json_file:
        for item in transaction['items']:
            product_name=item['product_id']
            if product_name in dict_of_products.keys():
                dict_of_products[product_name] += item['quantity']
            else:
                dict_of_products[product_name] = item['quantity']

    list_of_products= list(dict_of_products.keys())
    list_of_products= sorted(list_of_products,key = lambda x : dict_of_products[x], reverse=True)
    #print(dict_of_products)
    print("\nTop-selling products:")
    print(list_of_products)

    dict_of_transaction ={}
    for transaction in json_file:
        payment=transaction['payment_method']
        if payment in dict_of_transaction.keys():
            for item in transaction['items']:
                dict_of_transaction[payment] += ((item['price']*item['quantity'])-item['discount'])
    
        else:
            for item in transaction['items']:
                dict_of_transaction[payment] = (item['price']*item['quantity'])-item['discount']
            
    print("\nTransactions per Payment Type")
    print(dict_of_transaction)
            
analysis_data(json_file)

Grouped transactions by region and calculated regional sales :
{'North': 234.97, 'South': 64.97, 'West': 374.98, 'East': 59.99}

Top-selling products:
['P234', 'P789', 'P456', 'P777', 'P333', 'P555', 'P444', 'P888']

Transactions per Payment Type
{'credit_card': 1179.94, 'paypal': 64.97, 'debit_card': 59.99, 'gift_card': 179.99}


In [20]:
def report_generation(json_file,sort_key='timestamp'):

    filtered = list(filter(lambda x : x['status'] == 'completed', json_file))
    print("Filters completed transactions :\n")
    print(filtered)

    sorted_json=sorted(json_file,key=lambda x : x[sort_key])
    print("\nSorted\n")
    print(sorted_json)

report_generation(json_file)

Filters completed transactions :

[{'transaction_id': 'T123456', 'timestamp': '2024-01-15T14:30:00', 'customer': {'id': 'CUS123', 'region': 'North', 'segment': 'Premium', 'join_date': '2023-01-01'}, 'items': [{'product_id': 'P789', 'category': 'Electronics', 'subcategory': 'Smartphones', 'price': 499.99, 'quantity': 1, 'discount': 0.0}, {'product_id': 'P456', 'category': 'Accessories', 'subcategory': 'Phone Cases', 'price': 29.99, 'quantity': 2, 'discount': 5.0}], 'payment_method': 'credit_card', 'status': 'completed', 'shipping': {'method': 'express', 'cost': 15.99, 'address': {'city': 'Boston', 'state': 'MA', 'country': 'USA'}}}, {'transaction_id': 'T123457', 'timestamp': '2024-01-15T14:45:00', 'customer': {'id': 'CUS456', 'region': 'South', 'segment': 'Standard', 'join_date': '2023-06-15'}, 'items': [{'product_id': 'P234', 'category': 'Books', 'subcategory': 'Fiction', 'price': 24.99, 'quantity': 3, 'discount': 10.0}], 'payment_method': 'paypal', 'status': 'completed', 'shipping': {

In [29]:
import pandas as pd
def flatten_data(json_file):
    flattened_data = []
    for transaction in json_file:
        first_data = {
            "transaction_id": transaction["transaction_id"],
            "timestamp": transaction["timestamp"],
            "customer_id": transaction["customer"]["id"],
            "customer_region": transaction["customer"]["region"],
            "customer_segment": transaction["customer"]["segment"],
            "customer_join_date": transaction["customer"]["join_date"],
            "payment_method": transaction["payment_method"],
            "status": transaction["status"],
            "shipping_method": transaction["shipping"]["method"],
            "shipping_cost": transaction["shipping"]["cost"],
            "shipping_city": transaction["shipping"]["address"]["city"],
            "shipping_state": transaction["shipping"]["address"]["state"],
            "shipping_country": transaction["shipping"]["address"]["country"],
        }
        for item in transaction['items']:
            flattened_data.append({
                            **first_data, 
                            "product_id": item["product_id"],
                            "category": item["category"],
                            "subcategory": item["subcategory"],
                            "price": item["price"],
                            "quantity": item["quantity"],
                            "discount": item["discount"],})
    return pd.DataFrame(flattened_data)

flattened_df = flatten_data(json_file)
flattened_df

Unnamed: 0,transaction_id,timestamp,customer_id,customer_region,customer_segment,customer_join_date,payment_method,status,shipping_method,shipping_cost,shipping_city,shipping_state,shipping_country,product_id,category,subcategory,price,quantity,discount
0,T123456,2024-01-15T14:30:00,CUS123,North,Premium,2023-01-01,credit_card,completed,express,15.99,Boston,MA,USA,P789,Electronics,Smartphones,499.99,1,0.0
1,T123456,2024-01-15T14:30:00,CUS123,North,Premium,2023-01-01,credit_card,completed,express,15.99,Boston,MA,USA,P456,Accessories,Phone Cases,29.99,2,5.0
2,T123457,2024-01-15T14:45:00,CUS456,South,Standard,2023-06-15,paypal,completed,standard,5.99,Miami,FL,USA,P234,Books,Fiction,24.99,3,10.0
3,T123458,2024-01-15T15:00:00,CUS789,West,Premium,2022-03-20,credit_card,pending,express,25.99,Los Angeles,CA,USA,P789,Electronics,Smartphones,499.99,1,50.0
4,T123458,2024-01-15T15:00:00,CUS789,West,Premium,2022-03-20,credit_card,pending,express,25.99,Los Angeles,CA,USA,P555,Electronics,Tablets,299.99,1,0.0
5,T123458,2024-01-15T15:00:00,CUS789,West,Premium,2022-03-20,credit_card,pending,express,25.99,Los Angeles,CA,USA,P777,Electronics,Headphones,199.99,2,25.0
6,T123459,2024-01-15T15:15:00,CUS101,East,Standard,2024-01-01,debit_card,completed,standard,7.99,New York,NY,USA,P333,Clothing,Shirts,39.99,2,15.0
7,T123459,2024-01-15T15:15:00,CUS101,East,Standard,2024-01-01,debit_card,completed,standard,7.99,New York,NY,USA,P444,Clothing,Pants,59.99,1,0.0
8,T123460,2024-01-15T15:30:00,CUS202,North,Premium,2023-08-15,gift_card,cancelled,standard,9.99,Chicago,IL,USA,P888,Home,Kitchen,199.99,1,20.0
