In [1]:
import json

In [2]:
#Load the data 
def load_data(filename):
    with open(filename,"r") as f:
        data = json.load(f)

    return data

In [6]:
data = load_data("store_data.json")
print(data)
print(type(data))

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': '4', 'feedback': 'Good quality, worth the price.', 'age': '30'}, {'name': 'Charlie', 'rating': 'three', 'feedback': 'Average experience, could be better.'}, {'name': 'Alice', 'rating': '3.5', 'feedback': 'Loved it! Will buy again.', 'age': '25'}, {'name': 'Diana', 'rating': 'two', 'feedback': 'Not satisfied with the product.', 'age': '28'}]
<class 'list'>


In [24]:
#Clean and structure the data
def clean_data(data):
    text_to_num = {"one": 1,"two": 2,"three": 3,"four": 4,"five": 5}
    cleaned_data = []
    unique_users = set()
    for user in data:
        #Clean ratings - data consistency
        raw_rating = str(user["rating"]).strip().lower() #to remove trailing spaces
        if(raw_rating in text_to_num):
            raw_rating = text_to_num[raw_rating]
        user["rating"] = raw_rating

        #handle missing values
        raw_age = user.get("age")
        if(raw_age == None):
            user["age"]=None

        #handling duplicate data(Deduplication)
        if(user["name"].strip() in unique_users):
            continue
        unique_users.add(user["name"].strip())
        cleaned_data.append(user)
    return cleaned_data

In [40]:
data = clean_data(data)

In [34]:
#Get meaningful insights from data
def get_insights(data):
    #avrg rating
    tot_rating = 0
    num=0
    for user in data:
        if(user["rating"]):
            num+=1
            tot_rating+=float(user["rating"])
    avrg_rating=tot_rating/num
    print(f"average rating = {avrg_rating}")

    # % of users with poor ratings(<3)
    poor_ratings=0
    for user in data:
        if(float(user["rating"])<3):
            poor_ratings += 1 

    print(f" % of user with poor rating = {poor_ratings/len(data)*100}%")

In [35]:
get_insights(data)

average rating = 3.5
 % of user with poor rating = 20.0%


In [38]:
#Recomendation feature
def get_recommendations(data):
    recommendations = []
    for user in data:
        curr_recomm = {}
        curr_recomm["name"] = user["name"]
        
        if(float(user["rating"])>=4):
            curr_recomm["brand"]="Apple"
        else:
            curr_recomm["brand"]="Samsung"
        recommendations.append(curr_recomm)
    return recommendations

In [41]:
get_recommendations(data)

[{'name': 'Alice', 'brand': 'Apple'},
 {'name': 'Bob', 'brand': 'Apple'},
 {'name': 'Charlie', 'brand': 'Samsung'},
 {'name': 'Diana', 'brand': 'Samsung'}]