In [1]:
import json


In [4]:
# load the data 
def load_data(filename):
    with open(filename,"r") as f:
        data = json.load(f)

    return data
    

In [5]:
# printing data
data = load_data("store_data.json")
print(data)
print(type(data))

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late delivery', 'age': '30'}, {'name': 'Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE'}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]
<class 'list'>


In [23]:
# clean and structure the data
def clean_data(data):
    text_to_num = {"one":1,"two":2,"three":3,"four":4,"five":5}
    cleaned_data = []
    unique_users = set()
    for user in data:
        # clean the rating data
        raw_rating = str(user['rating']).strip().lower()
        if(raw_rating in text_to_num):
            raw_rating = text_to_num[raw_rating]

        user['rating'] = raw_rating
        
        # handle missing values
        raw_age = user.get("age")  
        if(raw_age == None):
            user["age"] = None

        # Deduplication

        if(user["name"].strip() in unique_users):
            continue
        unique_users.add(user["name"])
        cleaned_data.append(user)

    return cleaned_data
            

        


        
        


In [24]:
cleaned_data = clean_data(data)
print(f"cleaned data = {cleaned_data}")

cleaned data = [{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': '4', 'feedback': 'ok but late delivery', 'age': '30'}, {'name': 'Charlie', 'rating': '2', 'feedback': 'BAD EXPERIENCE', 'age': None}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}]


In [35]:
# to get meaningful insights
def get_insights(data):
    total_rating = 0
    for user in data:
        total_rating += float(user['rating'])

    avg_rating = total_rating/len(data)
    total_count_poor_rating = 0
    for user in data:
        if(float(user['rating']) < 3):
            total_count_poor_rating += 1

    poor_rating_percntg = total_count_poor_rating/len(data)*100

    print(f" average rating =  {avg_rating}")
    print(f"% of user with  poor rating =  {poor_rating_percntg}%")
    
        

In [36]:
get_insights(cleaned_data)

 average rating =  3.9
% of user with  poor rating =  20.0%


In [43]:
# Build Recommendation Feature

def get_recommendation(data):
    recommendations = []

    for user in data:
        curr_recomm  = {}
        curr_recomm["name"] = user["name"]
        if(float(user['rating']) >= 4):
            curr_recomm["brand"] = "Apple"
        else:
            curr_recomm["brand"] = "Samsung"

        recommendations.append(curr_recomm)

    return recommendations
            

In [45]:
recommendations = get_recommendation(cleaned_data)
print(recommendations)

[{'name': 'Alice', 'brand': 'Apple'}, {'name': 'Bob', 'brand': 'Apple'}, {'name': 'Charlie', 'brand': 'Samsung'}, {'name': 'Diana', 'brand': 'Apple'}, {'name': 'Eve', 'brand': 'Samsung'}]
