In [3]:
import json

In [4]:
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)
        return data

In [10]:
data = load_data("data_cleaning.json")

In [11]:
print(data)


[{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': ' Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]


In [12]:
#clean and structure the data
def clean_data(data):
    text_to_num = {"one":1, "two":2, "three":3, "four":4, "five":5}
    for user in data:
        
        print(user)

In [13]:
clean_data(data)

{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}
{'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}
{'name': ' Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}
{'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}
{'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}
{'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}


In [14]:
#function to clean the data
def clean_data(data):
    text_to_num = {"one":1, "two":2, "three":3, "four":4, "five":5}
    unique_users = set()
    cleaned_data = []


    for user in data:
        raw_rating = user["rating"].strip().lower()
        if (raw_rating in text_to_num):
            raw_rating = text_to_num[raw_rating]
            user["rating"] = raw_rating

        #handle missing value
        raw_age = user.get("age")
        if(raw_age == None):
            user["age"] = None

        #deduplication
        if(user['name'].strip() in unique_users):
            continue
        unique_users.add(user['name'])
        cleaned_data.append(user)

    return cleaned_data
                         


In [15]:
data = clean_data(data)
print(data, len(data))

[{'name': 'Alice', 'rating': '5 ', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 4, 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': ' Charlie', 'rating': 2, 'feedback': 'BAD EXPERIENCE ', 'age': None}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average - could be better', 'age': '20'}] 5


In [18]:
def get_insights(data):
    total_rating = 0
    for user in data:
        total_rating += float(user['rating'])
    print(f"average product rating = {total_rating/len(data)}")
                              

In [19]:
get_insights(data)

average product rating = 3.9


In [23]:
poor_rating = 0
for user in data:
    if(float(user['rating']) < 3.0):
        poor_rating += 1 
print(f"% of users to give poor rating = {poor_rating/len(data)*100}%")

% of users to give poor rating = 20.0%


In [26]:
def get_recommendations(data):
    recommendations = []
    for user in data:
        current_recomm = {}
        current_recomm['name'] = user['name']
        if (float(user['rating'])>= 4):
            current_recomm['brand'] = "apple"
        else:
            current_recomm['brand'] = "samsung"
        recommendations.append(current_recomm)
    return recommendations

In [27]:
recommendations = get_recommendations(data)
print(recommendations)

[{'name': 'Alice', 'brand': 'apple'}, {'name': 'Bob', 'brand': 'apple'}, {'name': ' Charlie', 'brand': 'samsung'}, {'name': 'Diana', 'brand': 'apple'}, {'name': 'Eve', 'brand': 'samsung'}]
