# Amazon Store Feedback Analysis

We analyze customer feedback data to:
- clean inconsistent entries
- handle missing values
- calculate insights
- build simple recommendations

This demonstrates thinking in terms of data.

In [20]:
import json

In [21]:
def load_data(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    return data

data = load_data("dataset/store_data.json")
print(data)
print(type(data))


[{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'}, {'name': 'Bob', 'rating': 'four', 'feedback': 'ok but late Delivery', 'age': '30'}, {'name': 'Charlie', 'rating': 'two', 'feedback': 'BAD EXPERIENCE '}, {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'}, {'name': 'Eve', 'rating': '3.5', 'feedback': 'Average – could be better', 'age': '20'}, {'name': 'Alice', 'rating': '5', 'feedback': 'Great product again!', 'age': '25'}]
<class 'list'>


## Data Cleaning

Problems:
- ratings stored as words
- missing age values
- duplicate users


In [22]:
def clean_data(data):
    text_to_num = {"one": 1, "two": 2, "three": 3, "four": 4, "five": 5}
    cleaned_data = []
    unique_users = set()
    
    for user in data:
        # Clean ratings
        raw_rating = str(user["rating"]).strip().lower()
        if raw_rating in text_to_num:
            raw_rating = text_to_num[raw_rating]
        user["rating"] = raw_rating

        # handling missing values
        raw_age = user.get("age")
        if raw_age is None:
            user["age"] = None

        # Deduplication
        if user["name"].strip() in unique_users:
            continue

        unique_users.add(user["name"])
        cleaned_data.append(user)

    return cleaned_data

In [23]:
data = clean_data(data)
data

[{'name': 'Alice', 'rating': '5', 'feedback': 'Great product!!', 'age': '25'},
 {'name': 'Bob', 'rating': 4, 'feedback': 'ok but late Delivery', 'age': '30'},
 {'name': 'Charlie', 'rating': 2, 'feedback': 'BAD EXPERIENCE ', 'age': None},
 {'name': 'Diana', 'feedback': 'Loved it!', 'rating': '5', 'age': '28'},
 {'name': 'Eve',
  'rating': '3.5',
  'feedback': 'Average – could be better',
  'age': '20'}]

## Extract Insights
We calculate average rating and poor rating percentage


In [24]:
def get_insights(data):

    tot_rating = 0
    for user in data:
        tot_rating += float(user["rating"])

    print("Avg rating =", tot_rating/len(data))

    poor_rating = 0
    for user in data:
        if float(user["rating"]) < 3:
            poor_rating +=1

    print("% of users with poor rating =", (poor_rating/len(data))*100)

In [25]:
get_insights(data)

Avg rating = 3.9
% of users with poor rating = 20.0


## Recommendation Logic
rating ≥ 4 → Apple  
rating < 4 → Samsung


In [26]:
def get_recommendations(data):
    recommendations = []

    for user in data:
        curr_recomm = {}
        curr_recomm["name"] = user["name"]

        if float(user["rating"]) >= 4:
            curr_recomm["brand"] = "Apple"
        else:
            curr_recomm["brand"] = "Samsung"

        recommendations.append(curr_recomm)
    return recommendations


In [27]:
get_recommendations(data)

[{'name': 'Alice', 'brand': 'Apple'},
 {'name': 'Bob', 'brand': 'Apple'},
 {'name': 'Charlie', 'brand': 'Samsung'},
 {'name': 'Diana', 'brand': 'Apple'},
 {'name': 'Eve', 'brand': 'Samsung'}]

## Conclusion
Raw JSON → Clean Data → Insights → Recommendation

This is the basic workflow before machine learning.
