In [35]:
import sys
if 'google.colab' in sys.modules:
  %pip install -q stellargraph[demos]==1.2.1# install StellarGraph if running on Google Colab

In [36]:
# verify that we're using the correct version of StellarGraph for this notebook
import stellargraph as sg

try:
    sg.utils.validate_notebook_version("1.2.1")
except AttributeError:
    raise ValueError(
        f"This notebook requires StellarGraph version 1.2.1, but a different version {sg.__version__} is installed.  Please see <https://github.com/stellargraph/stellargraph/issues/1172>."
    ) from None


In [37]:
import json
import pandas as pd
import numpy as np
from sklearn import preprocessing, feature_extraction, model_selection
from sklearn.metrics import mean_absolute_error, mean_squared_error

import stellargraph as sg
from stellargraph.mapper import HinSAGELinkGenerator
from stellargraph.layer import HinSAGE, link_regression
from stellargraph.core.graph import StellarGraph
from tensorflow.keras import Model, optimizers, losses, metrics

import multiprocessing
from stellargraph import datasets
from IPython.display import display, HTML
import matplotlib.pyplot as plt
%matplotlib inline

In [38]:
batch_size = 200
epochs = 20
# Use 70% of edges for training, the rest for testing:
train_size = 0.7
test_size = 0.3

In [39]:
unique_customer_file_url = "E:\cons\pony\data\market\customers_unique.xlsx"
unique_item_file_url = "E:\cons\pony\data\market\items_unique.xlsx"
edges_file_url = "E:\cons\pony\data\market\edges.xlsx"

customers = pd.read_excel(unique_customer_file_url)

customers = customers[['CLIENTCODE', 'GENDER', 'CITY', 'AGE']]
customers.head()




Unnamed: 0,CLIENTCODE,GENDER,CITY,AGE
0,6476,K,Batman,17
1,456620,K,İstanbul,70
2,275882,K,Adıyaman,77
3,901974,E,Kırklareli,64
4,758893,E,Konya,80


In [40]:
items = pd.read_excel(unique_item_file_url)
items = items[['ITEMCODE']]
items.head()

Unnamed: 0,ITEMCODE
0,6.0
1,7.0
2,8.0
3,11.0
4,12.0


In [41]:
edges = pd.read_excel(edges_file_url)
edges.head()

Unnamed: 0.1,Unnamed: 0,ID,CLIENTCODE,ITEMCODE,AMOUNT
0,0,188111,1,1615,1.0
1,1,381019,1,2733,1.0
2,2,189209,1,2776,1.0
3,3,163899,1,3780,1.0
4,4,188115,1,3888,1.086


In [42]:
def c(customers):
    return "c_" + customers.astype(str)

def p(purchases):
    return "p_" + purchases.astype(str)

In [43]:
customer_ids = c(customers['CLIENTCODE'])
customer_ids.head()

0      c_6476
1    c_456620
2    c_275882
3    c_901974
4    c_758893
Name: CLIENTCODE, dtype: object

In [44]:

items['ITEMCODE'] = p(items['ITEMCODE'])
items.set_index("ITEMCODE", inplace=True)
items.head()

p_6.0
p_7.0
p_8.0
p_11.0
p_12.0


In [45]:

edges["CLIENTCODE"] = c(edges["CLIENTCODE"])
edges["ITEMCODE"] = p(edges["ITEMCODE"])
edges.head()

Unnamed: 0.1,Unnamed: 0,ID,CLIENTCODE,ITEMCODE,AMOUNT
0,0,188111,c_1,p_1615,1.0
1,1,381019,c_1,p_2733,1.0
2,2,189209,c_1,p_2776,1.0
3,3,163899,c_1,p_3780,1.0
4,4,188115,c_1,p_3888,1.086


In [48]:
# convert categorical user features to numeric, and normalize age
feature_encoding = preprocessing.OneHotEncoder(sparse=False)
onehot = feature_encoding.fit_transform(customers[["GENDER", "CITY"]])
scaled_age = preprocessing.scale(customers["AGE"])
encoded_customers = pd.DataFrame(onehot, index=customer_ids).assign(
    scaled_age=scaled_age
)
encoded_customers.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,74,75,76,77,78,79,80,81,82,scaled_age
CLIENTCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
c_6476,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.597683
c_456620,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.180339
c_275882,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.547248
c_901974,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.865846
c_758893,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.704494


In [47]:
g = StellarGraph(
        {"customer": encoded_customers, "item": items},
        {"purchase": edges[["CLIENTCODE", "ITEMCODE"]]},
        source_column="CLIENTCODE",
        target_column="ITEMCODE",
    )

ValueError: edges: expected all source and target node IDs to be contained in `nodes`, found some missing: 'p_1615', 'p_2733', 'p_2776', 'p_3780', 'p_3888', 'p_3893', 'p_3933', 'p_5238', 'p_5518', 'p_5695', 'p_5699', 'p_5702', 'p_5711', 'p_5721', 'p_5724', 'p_5729', 'p_7770', 'p_11361', 'p_13423', 'p_20871', ... (9313 more)