In [9]:
import json

In [27]:
# Load the data
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    return data

In [28]:
data = load_data("store_data.json")
print(data)
print(type(data))

[{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': 'Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE'}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]
<class 'list'>


In [41]:
# ASSIGNMENT 1 - Clean & Structure Data

In [14]:
# Problems with the data:
# data is inconsistent - mixed type (rating)
# missing data (age)
# duplicate data (name) - assuming name is a unique value

In [39]:
# Clean & structure the data
def clean_data(data):
    text_to_num = {"one": 1, "two": 2, "three": 3, "four": 4, "five": 5} #map
    cleaned_data = []
    unique_users = set()

    for user in data:
        #Clean ratings - data consistency
        raw_rating = str(user["rating"]).strip().lower()
        if(raw_rating in text_to_num):
            raw_rating = text_to_num[raw_rating]
        user["rating"] = raw_rating

        #Handle missing vals
        raw_age = user.get("age")
        if(raw_age == None):
            user["age"] = None

        #Deduplication
        if(user["name"].strip() in unique_users):
            continue

        unique_users.add(user["name"])
        cleaned_data.append(user)

    return cleaned_data
            

In [47]:
data = clean_data(data)
print(data)

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': '4', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': 'Charlie', 'rating': '2', 'feedback': 'BAD EXPERIENCE', 'age': None}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}]


In [42]:
# ASSIGNMENT 2 - Getting Meaningful Insights from data
# i) What's the avg rating?
# ii) What % of users gave poor rating (<3)?

In [None]:
# Getting Meaningful insights from data
def get_insights(data):
    
    # avg rating
    total_rating = 0
    for user in data:
        total_rating += float(user["rating"])
    print(f"Avg rating: {total_rating/len(data)}")

    #percentage of users with poor rating
    poor_ratings = 0
    for user in data:
        if(float(user["rating"]) < 3):
            poor_ratings += 1
    print(f"Percentage of users with poor rating: {(poor_ratings/len(data))*100}%")

In [58]:
get_insights(data)

Avg rating: 3.9
Percentage of users with poor rating: 20.0%


In [59]:
# ASSIGNMENT 3 - Build a product recommendation feature 
# i) if user rating >= 4, recommend same brand products
# ii) if user rating < 4, recomment diff brand products

In [60]:
def get_recommendations(data):
    recommendations = []

    for user in data:
        curr_recomm = {}
        curr_recomm["name"] = user["name"]

        if(float(user["rating"]) >= 4):
            curr_recomm["brand"] = "Apple"
        else:
            curr_recomm["brand"] = "Samsung"

        recommendations.append(curr_recomm)
    return recommendations


In [61]:
get_recommendations(data)

[{'name': 'Alice', 'brand': 'Apple'},
 {'name': 'Bob', 'brand': 'Apple'},
 {'name': 'Charlie', 'brand': 'Samsung'},
 {'name': 'Diana', 'brand': 'Apple'},
 {'name': 'Eve', 'brand': 'Samsung'}]