# Analyze JSON
Author: Edward Schmuhl

---

This notebook will show/visualize the results of the analysis

In [3]:
import os
import json

json_dir = './data/iherb_data/iherb_dataset_analyzed.json'
product_json = []

with open(json_dir,'rb') as f:
    product_json = json.load(f)

len(product_json)

1214

In [4]:
review_counter = 0

for p_key in product_json.keys():
    p = product_json[p_key]
    p_review_id_list = {}
    p_review_id_unique_array = []
    
    for r in p["reviews"]:
        if r["id"] not in p_review_id_list:
            p_review_id_list[r["id"]] = r

    for r_id in p_review_id_list:
        _review = p_review_id_list[r_id]
        p_review_id_unique_array.append(_review)
    
    review_counter += len(p_review_id_unique_array)
    p["reviews"] = p_review_id_unique_array


print(review_counter)
        

184791


In [5]:
for key in product_json.keys():
    product = product_json[key]
    condition_info = {}
    
    for review in product["reviews"]:
        for condition in review["condition"].keys():
            if condition.lower() not in condition_info:
                condition_info[condition.lower()] = {}
                condition_info[condition.lower()]["IMPROVED"] = 0
                condition_info[condition.lower()]["WORSEN"] = 0
                condition_info[condition.lower()]["NEUTRAL"] = 0
                
            condition_info[condition.lower()]["IMPROVED"] += review["condition"][condition]["IMPROVED"]
            condition_info[condition.lower()]["WORSEN"] += review["condition"][condition]["WORSEN"]
            condition_info[condition.lower()]["NEUTRAL"] += review["condition"][condition]["NEUTRAL"]

            
    product["condition"] = condition_info
   

# Condition Analysis

In [4]:
condition_dict = {}
condition_occurence_total = 0

for key in product_json.keys():
    product = product_json[key]
    for condition in product["condition"]:
        if condition not in condition_dict:
            condition_dict[condition] = {}
            condition_dict[condition]["IMPROVED"] = 0
            condition_dict[condition]["WORSEN"] = 0
            condition_dict[condition]["NEUTRAL"] = 0
        condition_dict[condition]["IMPROVED"] += product["condition"][condition]["IMPROVED"]
        condition_occurence_total += product["condition"][condition]["IMPROVED"]
        condition_dict[condition]["WORSEN"] += product["condition"][condition]["WORSEN"]
        condition_occurence_total += product["condition"][condition]["WORSEN"]
        condition_dict[condition]["NEUTRAL"] += product["condition"][condition]["NEUTRAL"]
        condition_occurence_total += product["condition"][condition]["NEUTRAL"]
        
print(f"Unique Conditions found: {len(condition_dict)} | Total Occurence: {condition_occurence_total}")
        

Unique Conditions found: 7200 | Total Occurence: 51369


In [6]:
top_improved_condition = []
top_worsen_condition = []

for c in condition_dict:
    top_improved_condition.append((c,condition_dict[c]["IMPROVED"]))
    top_worsen_condition.append((c,condition_dict[c]["WORSEN"]))
    
top_improved_condition = sorted(top_improved_condition, key=lambda tup: tup[1], reverse=True)
top_worsen_condition = sorted(top_worsen_condition, key=lambda tup: tup[1], reverse=True)

show_top = 15

print(f"Top {show_top} improved Conditions: ")
print(f"{top_improved_condition[:show_top]}")
print("---------------------------------------")
print(f"Top {show_top} worsen Conditions: ")
print(f"{top_worsen_condition[:show_top]}")

Top 15 improved Conditions: 
[('skin', 2879), ('sleep', 2470), ('anxiety', 585), ('candida', 572), ('inflammation', 445), ('depression', 403), ('acne', 396), ('colds', 388), ('thyroid', 386), ('blood pressure', 379), ('constipation', 339), ('allergies', 328), ('arthritis', 296), ('ibs', 287), ('joint pain', 286)]
---------------------------------------
Top 15 worsen Conditions: 
[('skin', 191), ('sleep', 147), ('diarrhea', 99), ('nausea', 92), ('constipation', 91), ('headache', 86), ('headaches', 71), ('heartburn', 66), ('nauseous', 62), ('acne', 59), ('stomach upset', 50), ('bloating', 47), ('insomnia', 47), ('anxiety', 34), ('libido', 32)]


# Product Analysis

In [7]:
best_products = []
worst_products = []

for p_key in product_json.keys():
    p = product_json[p_key]
    improvements = 0
    worsen = 0
    for c in p["condition"]:
        improvements += p["condition"][c]["IMPROVED"]
        worsen += p["condition"][c]["WORSEN"]
    best_products.append((p["name"],improvements))
    worst_products.append((p["name"],worsen))
    
best_products = sorted(best_products, key=lambda tup: tup[1], reverse=True)
worst_products = sorted(worst_products, key=lambda tup: tup[1], reverse=True)

print(f"Top {show_top} Products with improved conditions: ")
print(f"{best_products[:show_top]}")
print("---------------------------------------")
print(f"Top {show_top} Products with worsen conditions: ")
print(f"{worst_products[:show_top]}")

Top 15 Products with improved conditions: 
[('Collagen Hydrolysate, Collagen Joint Care, 16 oz (454 g)', 229), ('Candida Support, 90 Veg Capsules', 212), ('Hyaluronic Acid + Chondroitin Sulfate, 60 Veggie Caps', 198), ('Thyroid Energy, 90 Veg Capsules', 197), ('5-HTP, 100 mg, 120 Veg Capsules', 186), ('Super Collagen, Type 1 & 3, 6,000 mg, 7 oz (198 g)', 179), ('Super Primrose, Evening Primrose Oil, 1300 mg, 120 Softgels', 171), ('Melatonin, 3 mg, 180 Capsules', 169), ('Melatonin, Time Release, Extra Strength, 5 mg, 100 Tablets', 168), ('Quercetin with Bromelain, 120 Veg Capsules', 166), ('Super Collagen + C, Type 1 & 3, 6,000 mg, 250 Tablets', 164), ('Probiotics, Original Formula, 60 Capsules', 150), ('Serrapeptase, 40,000 SPU, 90 Veggie Caps', 145), ('Natural Calm, The Anti-Stress Drink, Original (Unflavored), 16 oz (453 g)', 137), ('5-HTP, 100 mg, 60 Veggie Caps', 129)]
---------------------------------------
Top 15 Products with worsen conditions: 
[('High Absorption Magnesium 100%

# Product & Condition Analysis

In [8]:
supplement_for_condition = {}

for p_key in product_json.keys():
    p = product_json[p_key]
    
    for condition in p["condition"]:
        if condition not in supplement_for_condition:
            supplement_for_condition[condition] = []
            
        value = p["condition"][condition]["IMPROVED"]-p["condition"][condition]["WORSEN"]+p["condition"][condition]["NEUTRAL"]*0.1
        supplement_for_condition[condition].append((p_key,value))

for c in supplement_for_condition:
    supplement_for_condition[c] = sorted(supplement_for_condition[c] , key=lambda tup: tup[1], reverse=True)
    
    

In [26]:
find_condition = "stomach"

for c in condition_dict:
    if find_condition in c:
        print(f"{c} \n {condition_dict[c]} \n")

upset stomach 
 {'IMPROVED': 23, 'WORSEN': 26, 'NEUTRAL': 71} 

stomachache 
 {'IMPROVED': 4, 'WORSEN': 6, 'NEUTRAL': 17} 

stomach problem 
 {'IMPROVED': 5, 'WORSEN': 2, 'NEUTRAL': 7} 

stomach ache 
 {'IMPROVED': 6, 'WORSEN': 31, 'NEUTRAL': 38} 

stomach problems 
 {'IMPROVED': 47, 'WORSEN': 20, 'NEUTRAL': 57} 

stomach flu 
 {'IMPROVED': 2, 'WORSEN': 0, 'NEUTRAL': 13} 

stomach problems bladder issues 
 {'IMPROVED': 0, 'WORSEN': 0, 'NEUTRAL': 1} 

stomach discomfort 
 {'IMPROVED': 11, 'WORSEN': 7, 'NEUTRAL': 13} 

stomach pain pain 
 {'IMPROVED': 0, 'WORSEN': 1, 'NEUTRAL': 2} 

stomach pain 
 {'IMPROVED': 17, 'WORSEN': 13, 'NEUTRAL': 47} 

stomach issues 
 {'IMPROVED': 38, 'WORSEN': 14, 'NEUTRAL': 61} 

stomach - 
 {'IMPROVED': 1, 'WORSEN': 0, 'NEUTRAL': 3} 

stomach digestive issues 
 {'IMPROVED': 2, 'WORSEN': 0, 'NEUTRAL': 0} 

stomach upset 
 {'IMPROVED': 36, 'WORSEN': 50, 'NEUTRAL': 78} 

stomach pains 
 {'IMPROVED': 8, 'WORSEN': 9, 'NEUTRAL': 17} 

stomachaches 
 {'IMPROVED': 3

In [28]:
get_top = 10
get_condition = "stomach issues"

print(f"Best {get_top} Products for {get_condition} ")

indexer = 1
for _t in supplement_for_condition[get_condition][:get_top]:
    print(f"({indexer}) {product_json[_t[0]]['name']} | Product ID: {_t[0]} | Score: {_t[1]} ")
    indexer += 1

Best 10 Products for stomach issues 
(1) PB8 With Lactobacillus & Bifidobacterium, 120 Veggie Caps | Product ID: 5930 | Score: 3.3 
(2) Slippery Elm, 400 mg, 100 Capsules | Product ID: 796 | Score: 3.2 
(3) PepZin GI, Zinc-L-Carnosine Complex, 120 Veggie Caps | Product ID: 2467 | Score: 2.1 
(4) Primadophilus, Kids, Orange, 3 Billion CFU, 30 Chewable Tablets | Product ID: 18537 | Score: 1.2 
(5) Super Enzymes, 180 Capsules | Product ID: 857 | Score: 1.2 
(6) Digest Gold, with ATPro, 180 Capsules | Product ID: 7056 | Score: 1.1 
(7) Ginger Root, 550 mg, 100 Veg Capsules | Product ID: 592 | Score: 1.1 
(8) LactoBif Probiotics, 5 Billion CFU, 60 Veggie Capsules | Product ID: 64006 | Score: 1.1 
(9) Mastic Gum, 120 Vegetarian Capsules | Product ID: 3450 | Score: 1.1 
(10) 20+ Bio Active Manuka Honey, 1.1 lb (500 g) | Product ID: 59963 | Score: 1.0 


In [337]:
get_reviews_from = "2162"


get_reviews_from_product = product_json[get_reviews_from]

print(get_reviews_from_product["condition"].keys())

indexer_review = 1
for review_json in get_reviews_from_product["reviews"]:
    print("\n")
    print(f"{indexer_review} : {review_json['text']}")
    print(f"{review_json['condition']}")
    indexer_review += 1

dict_keys(['reflux', 'indigestion', 'stomach pain', 'stomach upset', 'ulcer', 'gastritis', 'stmptoms', 'heartburn', 'blood pressure', 'stomach aches', 'digestive issues', 'gastric problem', 'nauseous', 'stomach disorders', 'diarrhea', 'gastritis stomach', 'upset stomach', 'stomach ulcer ulcer', 'colon ulcer', 'stomach ills pressure ills pressure', 'blood pressure ills pressure', 'stomach ills pressure', 'oesophagitis', 'reflux pressure', 'reflux problems', 'reflux symptoms', 'stomach stomach', 'inflammation', 'stomach problems digestive issues', 'ulcers digestive issues', 'stomach digestive issues', 'heartburn problem', 'stomach distress', 'stomach issues pressure', 'gerd symptoms hernia', 'gerd hernia', 'hiatal hernia', 'hernia', 'stomach issues issues', 'stomach issues', 'bloating', 'sleep', 'stomach demons', 'gerd', 'ulcers'])


1 : DGL. Really good that you stock this product as most OS online stores won’t ship these products to Aust. Certainly helps with  maintaining your gut heal