In [10]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

class UserInteractionsGenerator:
    def __init__(self, user_profiles, product_data, num_hours=24 * 30):
        self.user_profiles = user_profiles
        self.product_data = product_data
        self.num_hours = num_hours
        self.start_time = datetime(2023, 11, 1, 0, 0, 0)
        self.interaction_id_counter = 1

    def generate_data(self):
        timestamps = [self.start_time + timedelta(hours=hour) for hour in range(self.num_hours)]
        user_ids = self.user_profiles['User_ID'].tolist()

        data = []
        for timestamp in timestamps:
            for user_id in user_ids:
                interaction_type = np.random.choice(['product_search', 'product_view', 'add_to_cart', 'purchase'])
                product_id, additional_attributes = self.generate_interaction_details(interaction_type)
                data.append([self.interaction_id_counter, timestamp, user_id, interaction_type, product_id, *additional_attributes])
                self.interaction_id_counter += 1

        df = pd.DataFrame(data, columns=['Interaction_ID', 'Timestamp', 'User_ID', 'Interaction_Type', 'Product_ID', *self.get_additional_columns(interaction_type)])

        # Fill NaN values with meaningful defaults
        df.fillna({'Quantity': 0, 'Payment_Method': 'Not Applicable'}, inplace=True)

        try:
            df.to_csv(r'C:\Users\Olivia\Documents\Fall-2023\Big-Data-Technology\Big-Daa_Project\UserInteractions_data.csv', index=False)
            print("User interactions data is written successfully.")
        except Exception as e:
            print(f"Error writing user interactions data: {e}")

    def generate_interaction_details(self, interaction_type):
        if interaction_type == 'product_search':
            return self.generate_product_search()
        elif interaction_type == 'product_view':
            return self.generate_product_view()
        elif interaction_type == 'add_to_cart':
            return self.generate_add_to_cart()
        elif interaction_type == 'purchase':
            return self.generate_purchase()

    def generate_product_search(self):
        search_query = np.random.choice(['laptop', 'phone', 'clothing', 'shoes', 'electronics'])
        return ('-', [search_query])

    def generate_product_view(self):
        product_id = np.random.choice(self.product_data['Product_ID'].tolist())
        product_name = self.product_data.loc[self.product_data['Product_ID'] == product_id, 'Product_Name'].values[0]
        return (product_id, [product_name])

    def generate_add_to_cart(self):
        product_id = np.random.choice(self.product_data['Product_ID'].tolist())
        product_name = self.product_data.loc[self.product_data['Product_ID'] == product_id, 'Product_Name'].values[0]
        quantity = np.random.randint(1, 5)
        return (product_id, [product_name, quantity])

    def generate_purchase(self):
        product_id = np.random.choice(self.product_data['Product_ID'].tolist())
        product_name = self.product_data.loc[self.product_data['Product_ID'] == product_id, 'Product_Name'].values[0]
        quantity = np.random.randint(1, 5)
        payment_method = np.random.choice(['Credit Card', 'Debit Card', 'PayPal'])
        return (product_id, [product_name, quantity, payment_method])

    def get_additional_columns(self, interaction_type):
        if interaction_type == 'product_search':
            return ['Search_Query']
        elif interaction_type == 'product_view':
            return ['Product_Name']
        elif interaction_type == 'add_to_cart':
            return ['Product_Name', 'Quantity']
        elif interaction_type == 'purchase':
            return ['Product_Name', 'Quantity', 'Payment_Method']

# Example usage
user_profiles = pd.read_csv(r'C:\Users\Olivia\Documents\Fall-2023\Big-Data-Technology\Big-Daa_Project\UserProfiles_data.csv')
product_data = pd.read_csv(r'C:\Users\Olivia\Documents\Fall-2023\Big-Data-Technology\Big-Daa_Project\ProductData_data.csv')

generator = UserInteractionsGenerator(user_profiles=user_profiles, product_data=product_data, num_hours=24 * 30)
generator.generate_data()


User interactions data is written successfully.
