### Imports

In [1]:
import json
import csv
import pandas as pd
import numpy as np

In [2]:
products = {
    1: {'name': 'Apple iPhone 12', 'description': 'Smartphone with A14 Bionic chip', 'category': 'Electronics'}, 
    2: {'name': 'Nike Air Max', 'description': 'Athletic shoes with cushion support', 'category': 'Fashion'}, 
    3: {'name': 'LEGO Star Wars', 'description': 'Building toy set from Star Wars series', 'category': 'Toys'}, 
    4: {'name': 'Logitech Mouse', 'description': 'Wireless optical mouse', 'category': 'Electronics'}, 
    5: {'name': "Levi's Jeans", 'description': 'Denim straight fit jeans', 'category': 'Fashion'}, 
    6: {'name': 'Harry Potter', 'description': 'Fantasy book series by J.K. Rowling', 'category': 'Books'}, 
    7: {'name': 'Fender Strat', 'description': 'Electric guitar with classic tones', 'category': 'Music'}, 
    8: {'name': 'Canon DSLR', 'description': 'High-resolution digital camera', 'category': 'Electronics'}, 
    9: {'name': 'Green Pan Set', 'description': 'Non-stick, eco-friendly pan set', 'category': 'Home'}, 
    10: {'name': 'Sony Headphones', 'description': 'Noise-cancelling over the ear headphones', 'category': 'Electronics'}
}



with open('products.json', 'w') as json_file:
    json.dump(products, json_file, indent=4)



users_data = {
    1: {'gender': 'M', 'location': 'New York', 'age': 29, 'occupation': 'Software Eng', 'preferred_category': 'Electronics'},
    2: {'gender': 'F', 'location': 'Los Angeles', 'age': 35, 'occupation': 'Musician', 'preferred_category': 'Music'},
    3: {'gender': 'F', 'location': 'Miami', 'age': 22, 'occupation': 'Student', 'preferred_category': 'Books'},
    4: {'gender': 'M', 'location': 'Chicago', 'age': 45, 'occupation': 'Chef', 'preferred_category': 'Kitchen'},
    5: {'gender': 'F', 'location': 'Seattle', 'age': 31, 'occupation': 'Designer', 'preferred_category': 'Home Decor'},
}



users_df = pd.DataFrame.from_dict(users_data, orient='index').reset_index()
users_df.rename(columns={'index': 'id'}, inplace=True)
users_df.to_csv('users.csv', index=False)



np.random.seed(0)  # for reproducibility

ratings = [1, 2, 3, 4, 5]
missing = ['-']

user_ids = []
product_ids = []
interaction_ratings = []

for user, user_info in users_data.items():
    for product, product_info in products.items():
        
        user_ids.append(user)
        product_ids.append(product)
        
        if np.random.rand() < 0.5:  # 50% chance of a missing rating
            interaction_ratings.append('-')
            continue
        
        if user_info['preferred_category'] == product_info['category']:
            # User's preferred category matches product's category, higher chance of a 4 or 5 rating
            generated_rating = np.random.choice(ratings, p=[0.1, 0.1, 0.1, 0.35, 0.35])
        else:
            # Different category, higher chance of a 1 or 2 rating
            generated_rating = np.random.choice(ratings, p=[0.35, 0.35, 0.1, 0.1, 0.1])
        
        interaction_ratings.append(generated_rating)

interactions = {
    'user_id': user_ids,
    'product_id': product_ids,
    'rating': interaction_ratings
}

interactions_df = pd.DataFrame(interactions)

# Filter out missing interactions and save to .csv:
interactions_df = interactions_df[interactions_df['rating'] != '-']
interactions_df.to_csv('interactions.csv', index=False)

In [3]:
# Items Data
with open('products.json', 'r') as json_file:
    products = json.load(json_file)
products

{'1': {'name': 'Apple iPhone 12',
  'description': 'Smartphone with A14 Bionic chip',
  'category': 'Electronics'},
 '2': {'name': 'Nike Air Max',
  'description': 'Athletic shoes with cushion support',
  'category': 'Fashion'},
 '3': {'name': 'LEGO Star Wars',
  'description': 'Building toy set from Star Wars series',
  'category': 'Toys'},
 '4': {'name': 'Logitech Mouse',
  'description': 'Wireless optical mouse',
  'category': 'Electronics'},
 '5': {'name': "Levi's Jeans",
  'description': 'Denim straight fit jeans',
  'category': 'Fashion'},
 '6': {'name': 'Harry Potter',
  'description': 'Fantasy book series by J.K. Rowling',
  'category': 'Books'},
 '7': {'name': 'Fender Strat',
  'description': 'Electric guitar with classic tones',
  'category': 'Music'},
 '8': {'name': 'Canon DSLR',
  'description': 'High-resolution digital camera',
  'category': 'Electronics'},
 '9': {'name': 'Green Pan Set',
  'description': 'Non-stick, eco-friendly pan set',
  'category': 'Home'},
 '10': {'n

In [4]:
# Users Data
users = pd.read_csv('users.csv')
users.head()

Unnamed: 0,id,gender,location,age,occupation,preferred_category
0,1,M,New York,29,Software Eng,Electronics
1,2,F,Los Angeles,35,Musician,Music
2,3,F,Miami,22,Student,Books
3,4,M,Chicago,45,Chef,Kitchen
4,5,F,Seattle,31,Designer,Home Decor


In [5]:
# Interactions Data
interactions = pd.read_csv('interactions.csv')
interactions.head()

Unnamed: 0,user_id,product_id,rating
0,1,1,5
1,1,2,2
2,1,4,4
3,1,5,5
4,1,7,2
