In [4]:
import json

In [5]:
# load the data
def load_data(filename):
    with open(filename,"r")as f:
        data=json.load(f)
    return data

In [7]:
data=load_data("store_data.json")
print(type(data),data)

<class 'list'> [{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': ' Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]


In [11]:
#clean & structure the data
def clean_data(data):
    for user in data:
        print(user)

In [12]:
clean_data(data)

{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}
{'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}
{'name': ' Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}
{'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}
{'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}
{'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}


In [26]:
#clean & structure the data
def clean_data(data):
    text_to_num={"one":1,"two":2,"three":3,"four":4,"five":5}

    cleaned_data=[]
    unique_users=set()

    for user in data:
        #clean ratings-data consistency
        raw_ratings=str(user["rating"]).strip().lower()
        if(raw_ratings in text_to_num):
            raw_ratings=text_to_num[raw_ratings]

        user["rating"]=raw_ratings

        #Handle missing values
        raw_age=user.get("age")
        if(raw_age==None):
            user["age"]=None

        #De-duplication
        if(user["name"].strip() in unique_users):
            continue
        unique_users.add(user["name"])
        cleaned_data.append(user)
    return cleaned_data

In [27]:
data=clean_data(data)

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'},
 {'name': 'Bob',
  'rating': '4',
  'feedback': 'ok but late Delivery',
  'age': '30'},
 {'name': ' Charlie',
  'rating': '2',
  'feedback': 'BAD EXPERIENCE ',
  'age': None},
 {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'},
 {'name': 'Eve',
  'rating': '3.5',
  'feedback': 'Average - could be better',
  'age': '20'}]

In [57]:
#get meaningsful insights from the data
def get_insights(data):

    #avg ratings
    total_rating=0
    for user in data:
        total_rating+=float(user["rating"])
    print("avg rating is: ",total_rating/len(data))

    #percentage of user with poor rating
    poor_rating=0

    for user in data:
        if(float(user["rating"])<3):
            poor_rating+=1
    print("poor rating is: ",poor_rating/len(data)*100)
        

In [59]:
get_insights(data)



avg rating is:  4.083333333333333
poor rating is:  16.666666666666664


In [70]:
#Build recommendation feature
def get_recommendations(data):
    recommendation=[]

    for user in data:
        curr_recomm={}
        curr_recomm["name"]=user["name"]

        if(float(user["rating"])>=4):
            curr_recomm["brand"]="Apple"
        else:
            curr_recomm["brand"]="Samsung"
        recommendation.append(curr_recomm)

    return recommendation

In [71]:
get_recommendations(data)

[{'name': 'Alice', 'brand': 'Apple'},
 {'name': 'Bob', 'brand': 'Apple'},
 {'name': ' Charlie', 'brand': 'Samsung'},
 {'name': 'Diana', 'brand': 'Apple'},
 {'name': 'Eve', 'brand': 'Samsung'},
 {'name': 'Alice', 'brand': 'Apple'}]