In [1]:
import json
import ijson
import time

from tqdm import tqdm

def clean_fittings(original_path):
    user_type_1 = set()
    user_type_2 = set()

    with open(original_path, "r") as file:
        print("Reading data...")
        fittings = {str(fitting["user_id"]): fitting for fitting in list(ijson.items(file, "RECORDS.item"))}

        for user_id, fitting in tqdm(
            fittings.items(),
            desc="Processing",
            colour="green",
        ):
            fitting_content = json.loads(fitting["fitting_content"])
            if "ParametersDouble0" in fitting_content:
                user_type_1.add(user_id)
            else:
                user_type_2.add(user_id)
    return user_type_1, user_type_2

def load_json(file_path):
    with open(file_path, "r") as file:
        return json.load(file)

In [2]:
cleaned_aids_path = "../cleaned_data/cleaned_aids.json"
aids = {aid["user_id"]: aid for aid in load_json(cleaned_aids_path)}

original_fittings_path = "../data/fittings.json"
cleaned_fittings_path = "../cleaned_data/cleaned_fittings.json"
user_type_1, user_type_2 = clean_fittings(original_fittings_path)

Reading data...


Processing: 100%|[32m██████████████████████████████████████████████████████████████████████████████████████████████████████████[0m| 18621/18621 [00:04<00:00, 4546.19it/s][0m


In [3]:
for user_id_1 in user_type_1:
    for user_id_2 in user_type_2:
        if user_id_1 == user_id_2:
            print(user_id_1)

In [4]:
len(user_type_1), len(user_type_2)

(10760, 7861)

In [10]:
import csv
import re
def count_type(
    user_type_1, user_type_2, aids, product_type_1_path, product_type_2_path
):
    product_type_1 = set()
    for user_id in user_type_1:
        if str(user_id) in aids:
            product_type_1.add(aids[str(user_id)]["product_name"])
    with open(product_type_1_path, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["id", "product_name"])
        for i, product in enumerate(product_type_1):
                writer.writerow([i + 1, product])

    product_type_2 = set()
    for user_id in user_type_2:
        if str(user_id) in aids:
            product_type_2.add(aids[str(user_id)]["product_name"])
    with open(product_type_2_path, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["id", "product_name"])
        for i, product in enumerate(product_type_2):
                writer.writerow([i + 1, product])
    return product_type_1, product_type_2

In [11]:
product_type_1_path = "../cleaned_data/product_type_1.csv"
product_type_2_path = "../cleaned_data/product_type_2.csv"
product_type_1, product_type_2 = count_type(user_type_1, user_type_2, aids, product_type_1_path, product_type_2_path)

In [12]:
for product_1 in product_type_1:
    for product_2 in product_type_2:
        if product_1 == product_2:
            print(product_1)

CIC Charm E330
ITC Linear 12
ITC Linear 9B
CIC Enjoyment 7
CIC Linear 8B
HS Charm E330
MINI CIC Sequel 15
CIC Charm B230
CIC Charm G630
HS Linear 12
CIC Enjoyment 8
ITC Linear 7B
CIC Linear 15
HS Linear 9B
ITC Linear 11
MINI CIC Sequel 12
CIC Fascinating 3300
MINI CIC Sequel 13
CIC Charm K830
CIC Linear 10B
HS Laverock 10
HS Linear 15
CIC Skylark 530


In [5]:
import ijson
import json
import csv
import random

def count_fitting_params(user_ids, file_path, write_path):
    fitting_params = {}
    with open(file_path, "r") as file:
        fittings = {str(fitting["user_id"]): fitting for fitting in list(ijson.items(file, "RECORDS.item"))}
        user_a =  extract_fitting_dict(json.loads(fittings[user_ids[0]]["fitting_content"]))
        user_b =  extract_fitting_dict(json.loads(fittings[user_ids[1]]["fitting_content"]))
    return compare_dicts(user_a, user_b)
        
def compare_dicts(dict1, dict2):
    # Initialize a counter for differing key-value pairs
    count_diff = []
    
    # Iterate through the keys of the first dictionary
    for key in dict1.keys():
        # Check if the key exists in both dictionaries
        if key in dict2:
            # Compare the values of the key in both dictionaries
            if dict1[key] != dict2[key]:
                count_diff.append(key)
    
    return count_diff

def extract_fitting_dict(fitting):
    result = {}
    for key, values in fitting.items():
        for param, value in values.items():
            result[f"{key}_{param}"] = value
    return result


def load_random_users(file_path):
    with open(file_path, "r", newline='') as csvfile:
        reader = csv.reader(csvfile)
        total_user_ids = list(reader)
        
    random_users = random.sample(total_user_ids, 2)
    user_ids = [str(user[0]) for user in random_users]
    print(user_ids)
    return user_ids


def load_json(file_path):
    with open(file_path, "r") as file:
        return json.load(file)

In [30]:
fitting_path = "../data/fittings.json"

user_type_2_path = "../cleaned_data/user_type_2.csv"
user_ids = load_random_users(user_type_2_path)

write_path = "../cleaned_data/test.json"
important_params = count_fitting_params(user_ids, fitting_path, write_path)

['1093561', '1088405']


In [31]:
len(important_params)

74

In [32]:
for param in important_params:
    print(param)

Parameters0_BEQ10_gain
Parameters0_BEQ11_gain
Parameters0_BEQ12_gain
Parameters0_BEQ1_gain
Parameters0_BEQ2_gain
Parameters0_BEQ3_gain
Parameters0_BEQ4_gain
Parameters0_BEQ7_gain
Parameters0_BEQ8_gain
Parameters0_BEQ9_gain
Parameters0_C1_MPO
Parameters0_C1_Ratio
Parameters0_C1_TK
Parameters0_C2_MPO
Parameters0_C2_Ratio
Parameters0_C2_TK
Parameters0_C3_MPO
Parameters0_C3_Ratio
Parameters0_C3_TK
Parameters0_C4_MPO
Parameters0_C4_Ratio
Parameters0_C4_TK
Parameters0_C5_MPO
Parameters0_C5_Ratio
Parameters0_C5_TK
Parameters0_C6_MPO
Parameters0_C6_Ratio
Parameters0_C6_TK
Parameters0_FBC_Enable
Parameters0_matrix_gain
Parameters1_BEQ10_gain
Parameters1_BEQ11_gain
Parameters1_BEQ1_gain
Parameters1_BEQ2_gain
Parameters1_BEQ3_gain
Parameters1_BEQ4_gain
Parameters1_BEQ8_gain
Parameters1_BEQ9_gain
Parameters1_C1_MPO
Parameters1_C1_Ratio
Parameters1_C1_TK
Parameters1_C2_MPO
Parameters1_C2_Ratio
Parameters1_C2_TK
Parameters1_C3_MPO
Parameters1_C3_Ratio
Parameters1_C3_TK
Parameters1_C4_MPO
Parameters1