In [None]:

import pandas as pd

# Assuming your JSON data is stored in a file named 'data.json'
# If you have your JSON data as a string or in a different format, this step may vary
json_file_path = 'catalog.json'
df = pd.read_json(json_file_path)

# Group items by 'Type' and aggregate information. For demonstration, we'll just list the Names of the items.
# You can customize this aggregation to include other operations like counting items, averaging costs, etc.
grouped_items = df.groupby('Type').agg({'Name': lambda x: list(x)})

print(grouped_items)



## Randomly rates 5-20 elements for 10000 Managers 


In [13]:

# Load the element catalog to get element IDs
with open("./data/catalog/catalog_w_text3_emb.json", "r") as file:
    element_catalog = json.load(file)

# Extract element IDs
element_ids = [element["Id"] for element in element_catalog]

# Define a maximum number of elements a Manager can rate
max_elements_per_manager = 20

# Assuming a max rating value
max_rating = 10

# Define the number of Manager IDs you want
num_Manager_ids = 10001

# Generate Manager IDs from 1 to 10001
current_Manager_ids = list(range(1, num_Manager_ids))

current_ratings = []
# Generate new ratings while ensuring not all elements are rated
for Manager_id in current_Manager_ids:
    # Randomly select a subset of elements to rate, respecting the max_elements_per_manager limit
    num_elements_to_sample = np.random.randint(5,max_elements_per_manager)
    elements_to_rate = random.sample(element_ids, min(len(element_ids), num_elements_to_sample))
    
    for element_id in elements_to_rate:
        element_info = next((element for element in element_catalog if element["Id"] == element_id), None)
        if element_info:  # If element found
            new_rating = {
                "ManagerId": Manager_id,
                "ElementId": element_id,
                "Rating": random.randint(1, max_rating)
            }
            current_ratings.append(new_rating)

# Convert the updated data to JSON format
json_data = json.dumps(current_ratings, indent=4)

# Write the updated JSON data to a new file
with open("./data/ratings/randomRatings.json", "w") as file:
    file.write(json_data)

## Verify random ratings created

In [None]:
import json

# Load the updated ratings data
with open("./data/ratings/actualRatings.json", "r") as file:
    ratings = json.load(file)

# Count the number of items rated by each Manager
ratings_count_by_Manager = {}
for rating in ratings:
    Manager_id = rating["ManagerId"]
    if Manager_id in ratings_count_by_Manager:
        ratings_count_by_Manager[Manager_id] += 1
    else:
        ratings_count_by_Manager[Manager_id] = 1

total_Manager_ids = len(ratings_count_by_Manager)

# Print the total number of Manager IDs
print(f"Total number of Manager IDs: {total_Manager_ids}")

for Manager_id, count in ratings_count_by_Manager.items():
    print(f"Manager ID: {Manager_id} has rated {count} items.")

## Advanced Rating system
Now taking in random ratings and giving it a pattern based on brand/ type/ cost. Storing it in a new file 'AugmentedRating'


In [15]:
import json
import random

# Load the current Manager ratings
with open("./data/ratings/randomRatings.json", "r") as file:
    current_ratings = json.load(file)

# Load the element catalog
with open("./data/catalog/catalog_w_text3_emb.json", "r") as file:
    element_catalog = json.load(file)

total_desired_ratings = 200000
# Define preferences
brand_preferences = {"Daybird": 8, "Gravitator": 7, "WildRunner": 9, "Quester": 6, "B&R": 5, "Raptor Elite": 7, "Solstix": 8, "Grolltex": 6, "AirStrider": 7, "Green Equipment": 9, "Legend": 5, "Zephyr": 6, "XE": 4}
type_preferences = ["Footwear", "Climbing", "Ski/boarding", "Bags", "Jackets", "Navigation", "Cycling", "Trekking"]

curr = range(1, 10001)
# Simulate Manager preferences for element types
Manager_type_preferences = {Manager_id: random.sample(type_preferences, k=random.randint(1, len(type_preferences))) for Manager_id in curr}

# Normalize cost to a 1-10 scale
max_cost = max(element["Cost"] for element in element_catalog)
min_cost = min(element["Cost"] for element in element_catalog)
cost_range = max_cost - min_cost

def normalize_cost(cost):
    return 1 + (((cost - min_cost) / cost_range) * 9)

# Function to calculate rating based on brand, type, and cost
def calculate_rating(Manager_id, element):
    cost_score = normalize_cost(element["Cost"])
    brand_score = brand_preferences.get(element["Brand"], 5)  # Default to mid-preference
    type_score = 10 if element["Type"] in Manager_type_preferences[Manager_id] else 5
    # Weighted average of the scores
    return (cost_score * 0.4 + brand_score * 0.3 + type_score * 0.3)

current = []
# Generate new ratings considering Manager preferences
for i in current_ratings:
        Manager_id  = i["ManagerId"]
        element= element_catalog[i["ElementId"]-1]
        # Calculate new rating based on sophisticated algorithm
        rating = calculate_rating(Manager_id, element)
        new_rating = {
            "ManagerId": Manager_id,
            "ElementId": element["Id"],
            "Rating": round(rating, 2)  # Rounded for readability
        }
        current.append(new_rating)


# Convert the updated data to JSON format
json_data = json.dumps(current, indent=4)

with open("./data/ratings/AugmentedRating.json", "w") as file:
    file.write(json_data)