In [1]:
from py2neo import Graph, Node, Relationship
import pandas as pd
import os
import pickle
from sklearn.preprocessing import LabelEncoder

In [2]:
# Dynamic path settings
BASE_DIR = os.path.dirname(os.path.abspath("__file__"))
DATA_DIR = os.path.join(BASE_DIR, "dataset")

In [3]:
# File paths
product_csv_path = os.path.join(DATA_DIR, "product_details.csv")
customer_csv_path = os.path.join(DATA_DIR, "customer_details.csv")
sales_csv_path = os.path.join(DATA_DIR, "sales_data.csv")

In [4]:
# Reading CSV files
products_df = pd.read_csv(product_csv_path)
customers_df = pd.read_csv(customer_csv_path)
sales_df = pd.read_csv(sales_csv_path)

In [5]:
# Encoding user and product IDs to numeric values
user_encoder = LabelEncoder()
product_encoder = LabelEncoder()
sales_df['encoded_user_id'] = user_encoder.fit_transform(sales_df['user id'])
sales_df['encoded_product_id'] = product_encoder.fit_transform(sales_df['product id'])

In [6]:
# Mapping user and product IDs
user_id_map = dict(zip(sales_df['encoded_user_id'], sales_df['user id']))
product_id_map = dict(zip(sales_df['encoded_product_id'], sales_df['product id']))

In [7]:
# Save Maps
with open(os.path.join(DATA_DIR, 'user_id_map.pkl'), 'wb') as f:
    pickle.dump(user_id_map, f)

with open(os.path.join(DATA_DIR, 'product_id_map.pkl'), 'wb') as f:
    pickle.dump(product_id_map, f)

In [8]:
# Connecting to the Neo4j database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "1234123412"))

In [9]:
# Clear existing data
graph.delete_all()

In [10]:
# Create Customers Nodes
for index, row in customers_df.iterrows():
    customer_node = Node(
        "Customer",
        id=row['Customer ID'],
        age=row['Age'],
        gender=row['Gender'],
        item_purchased=row['Item Purchased'],
        category=row['Category'],
        purchase_amount=row['Purchase Amount (USD)'],
        location=row['Location'],
        size=row['Size'],
        color=row['Color'],
        season=row['Season'],
        review_rating=row['Review Rating'],
        subscription_status=row['Subscription Status'],
        shipping_type=row['Shipping Type'],
        discount_applied=row['Discount Applied'],
        promo_code_used=row['Promo Code Used'],
        previous_purchases=row['Previous Purchases'],
        payment_method=row['Payment Method'],
        frequency=row['Frequency of Purchases']
    )
    graph.create(customer_node)

# Create Product Nodes
for index, row in products_df.iterrows():
    product_node = Node(
        "Product",
        id=row['Uniqe Id'],
        name=row['Product Name'],
        brand=row['Brand Name'],
        asin=row['Asin'],
        category=row['Category'],
        upc_ean_code=row['Upc Ean Code'],
        list_price=row['List Price'],
        selling_price=row['Selling Price'],
        quantity=row['Quantity'],
        model_number=row['Model Number'],
        about_product=row['About Product'],
        product_specification=row['Product Specification'],
        technical_details=row['Technical Details'],
        shipping_weight=row['Shipping Weight'],
        product_dimensions=row['Product Dimensions'],
        image=row['Image'],
        variants=row['Variants'],
        sku=row['Sku'],
        product_url=row['Product Url'],
        stock=row['Stock'],
        product_details=row['Product Details'],
        dimensions=row['Dimensions'],
        color=row['Color'],
        ingredients=row['Ingredients'],
        direction_to_use=row['Direction To Use'],
        is_amazon_seller=row['Is Amazon Seller'],
        size_quantity_variant=row['Size Quantity Variant'],
        product_description=row['Product Description']
    )
    graph.create(product_node)

In [11]:
# Creating interaction relationships
for index, row in sales_df.iterrows():
    user_node = graph.nodes.match("Customer", id=row['user id']).first()
    product_node = graph.nodes.match("Product", id=row['product id']).first()
    interaction_type = str(row['Interaction type']).upper()  # Convert to string and then upper case
    if user_node and product_node:
        interaction = Relationship(user_node, interaction_type, product_node, timestamp=row['Time stamp'])
        graph.create(interaction)