## Imports

In [43]:
import numpy as np
import pandas as pd

import sklearn.preprocessing as skp

In [44]:
import scipy.sparse as sp

In [80]:
from implicit.als import AlternatingLeastSquares
from implicit.evaluation import train_test_split
from implicit.evaluation import mean_average_precision_at_k
from implicit.evaluation import ndcg_at_k

In [77]:
import optuna

## Load Dataset

In [18]:
data_path = "Business-Supplier_Dataset1.csv"
data = pd.read_csv(data_path, engine='python')

In [19]:
data.head()

Unnamed: 0,BusinessID,SupplierID,Quality,Serviceability,Communicativeness,Reputation,Financial Condition,Condition of Supplier's Assets,Price,Delivery Time
0,1,14,10,3.74,1.88,7.16,5.46,1.31,78,17
1,1,40,2,4.83,2.87,6.11,5.05,4.56,14,16
2,1,31,7,5.69,9.65,8.6,6.28,9.69,60,28
3,1,46,1,1.14,4.81,4.55,2.79,7.4,35,1
4,1,18,7,6.86,9.23,8.65,4.34,7.02,37,3


In [20]:
data.tail()

Unnamed: 0,BusinessID,SupplierID,Quality,Serviceability,Communicativeness,Reputation,Financial Condition,Condition of Supplier's Assets,Price,Delivery Time
995,100,16,8,7.25,7.47,6.06,9.06,7.29,2,4
996,100,4,6,1.14,3.6,6.77,5.37,4.8,24,22
997,100,47,3,7.75,1.64,5.21,2.13,5.08,47,15
998,100,34,7,7.21,2.03,6.91,4.73,4.9,72,2
999,100,25,6,3.84,1.54,7.18,7.35,5.84,91,9


In [21]:
def printFeatureStats(feature):
    
    min_value = data[feature].min()
    max_value = data[feature].max()
    quantiles = data[feature].quantile([0.25, 0.5, 0.75])

    print(f"Min: {min_value}, Max: {max_value}")
    print(f"Quantiles: \n{quantiles}")

In [41]:
num_columns = data.shape[1]

for i in range(num_columns):
    feature = data.columns[i]
    print("\n")
    printFeatureStats(feature)




Min: 1, Max: 100
Quantiles: 
0.25    25.75
0.50    50.50
0.75    75.25
Name: BusinessID, dtype: float64


Min: 1, Max: 50
Quantiles: 
0.25    12.75
0.50    26.00
0.75    39.00
Name: SupplierID, dtype: float64


Min: 1, Max: 10
Quantiles: 
0.25    3.0
0.50    6.0
0.75    8.0
Name: Quality, dtype: float64


Min: 1.01, Max: 9.99
Quantiles: 
0.25    3.44
0.50    5.62
0.75    7.90
Name: Serviceability, dtype: float64


Min: 1.0, Max: 9.99
Quantiles: 
0.25    3.2375
0.50    5.5550
0.75    7.8700
Name: Communicativeness, dtype: float64


Min: 1.01, Max: 9.99
Quantiles: 
0.25    3.4175
0.50    5.5500
0.75    7.6925
Name: Reputation, dtype: float64


Min: 1.0, Max: 9.97
Quantiles: 
0.25    3.3000
0.50    5.5200
0.75    7.7725
Name: Financial Condition, dtype: float64


Min: 1.0, Max: 10.0
Quantiles: 
0.25    3.205
0.50    5.530
0.75    7.700
Name: Condition of Assets, dtype: float64


Min: 0.0, Max: 10.0
Quantiles: 
0.25    2.244898
0.50    4.795918
0.75    7.244898
Name: Price, dtype: float6

## Pre-processing

In [28]:
#Normalize all data between 1 to 10

scaler = skp.MinMaxScaler(feature_range=(0, 10))

data['Price'] = scaler.fit_transform(data[['Price']])
data['Delivery Time'] = scaler.fit_transform(data[['Delivery Time']])

printFeatureStats('Price')
printFeatureStats('Delivery Time')

Min: 0.0, Max: 10.0
Quantiles: 
0.25    2.244898
0.50    4.795918
0.75    7.244898
Name: Price, dtype: float64
Min: 0.0, Max: 10.0
Quantiles: 
0.25    2.5
0.50    5.0
0.75    7.5
Name: Delivery Time, dtype: float64


In [31]:
data.rename(columns={'Condition of Supplier\'s Assets': 'Condition of Assets'}, inplace=True)
print(data.columns)

Index(['BusinessID', 'SupplierID', 'Quality', 'Serviceability',
       'Communicativeness', 'Reputation', 'Financial Condition',
       'Condition of Assets', 'Price', 'Delivery Time'],
      dtype='object')


In [122]:
weights = {
    'Quality': 0.3,
    'Serviceability': 0.2,
    'Communicativeness': 0.08,
    'Reputation': 0.02,
    'Financial Condition': 0.05,
    'Condition of Assets': 0.05,
    'Price': 0.15,
    'Delivery Time': 0.15
}

In [123]:
business_id = data['BusinessID'].values
supplier_id = data['SupplierID'].values

def create_matrix(feature):
    value = data[feature].values
    utility_matrix = sp.coo_matrix((value, (business_id, supplier_id)))
    return utility_matrix

print(len(business_id))
print(len(supplier_id))

# quality_matrix = create_matrix('Quality')
# serviceability_matrix = create_matrix('Serviceability')
# communicativeness_matrix = create_matrix('Commicativeness')
# reputation_matrix = create_matrix('Reputation')
# financial_matrix = create_matrix('Financial Condition')
# asset_matrix = create_matrix('Condition of Assets')
# price_matrix = create_matrix('Price')
# delivery_matrix = create_matrix('Delivery Time')



1000
1000


In [124]:
utility_matrix = np.zeros((101, 51))
for i in range (2, num_columns):
    feature = data.columns[i]
    feature_matrix = create_matrix(feature)

    utility_matrix += feature_matrix * weights[feature]

utility_matrix = sp.coo_matrix(combined_matrix)
print("Utility matrix")
print(utility_matrix.toarray)

Utility matrix
<bound method _coo_base.toarray of <COOrdinate sparse matrix of dtype 'float64'
	with 1000 stored elements and shape (101, 51)>>


In [156]:
#split data into train and test sets

train_full, test = train_test_split(utility_matrix, train_percentage=0.75)

In [157]:
train, val = train_test_split(train_full, train_percentage=0.8)

In [158]:
print(f"train: {train.nnz}")
print(f"validation: {val.nnz}")
print(f"test: {test.nnz}")

train: 611
validation: 129
test: 260


## Run and Evaluate ALS Model

In [159]:
def als_tuning(trial):
    factors = trial.suggest_int(name='factors',
                                low=10,high=60,step=10)
    regularization = trial.suggest_float(name='regularization',
                                       low=0.01, high=1.0, step=0.001)
    iterations = trial.suggest_int(name='iterations',
                                   low=10, high=30, step=10)
    alpha = trial.suggest_int(name='alpha',
                                   low=5, high=50, step=5)
    #instance of model
    als_model = AlternatingLeastSquares(factors=factors, 
                                        regularization=regularization,
                                        iterations=iterations,
                                        alpha=alpha
                                       )
    #train the model and evaluate
    als_model.fit(train)
    val_metrics = ndcg_at_k(model=als_model, 
                                   train_user_items=train_full, 
                                   test_user_items=test, 
                                   K=5
                                  )
    return val_metrics

In [162]:
als_study = optuna.create_study(direction="minimize")
als_study.optimize(als_tuning, n_trials=10)  

[I 2024-09-18 14:46:49,345] A new study created in memory with name: no-name-3e29058b-eb49-4447-bcc1-da4703927cc4


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,382] Trial 0 finished with value: 0.11986840562868668 and parameters: {'factors': 10, 'regularization': 0.452, 'iterations': 10, 'alpha': 45}. Best is trial 0 with value: 0.11986840562868668.


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,432] Trial 1 finished with value: 0.10664040869890452 and parameters: {'factors': 60, 'regularization': 0.108, 'iterations': 20, 'alpha': 20}. Best is trial 1 with value: 0.10664040869890452.


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,455] Trial 2 finished with value: 0.11575621036011584 and parameters: {'factors': 10, 'regularization': 0.23800000000000002, 'iterations': 20, 'alpha': 50}. Best is trial 1 with value: 0.10664040869890452.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,498] Trial 3 finished with value: 0.08053710444417504 and parameters: {'factors': 30, 'regularization': 0.8250000000000001, 'iterations': 30, 'alpha': 10}. Best is trial 3 with value: 0.08053710444417504.


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,541] Trial 4 finished with value: 0.08830044842241987 and parameters: {'factors': 50, 'regularization': 0.119, 'iterations': 20, 'alpha': 50}. Best is trial 3 with value: 0.08053710444417504.


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,571] Trial 5 finished with value: 0.09280859720737719 and parameters: {'factors': 50, 'regularization': 0.6930000000000001, 'iterations': 10, 'alpha': 30}. Best is trial 3 with value: 0.08053710444417504.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,640] Trial 6 finished with value: 0.09905986503730287 and parameters: {'factors': 60, 'regularization': 0.395, 'iterations': 30, 'alpha': 30}. Best is trial 3 with value: 0.08053710444417504.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,696] Trial 7 finished with value: 0.10128051976209057 and parameters: {'factors': 50, 'regularization': 0.652, 'iterations': 30, 'alpha': 30}. Best is trial 3 with value: 0.08053710444417504.


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,720] Trial 8 finished with value: 0.10991100892924958 and parameters: {'factors': 20, 'regularization': 0.502, 'iterations': 20, 'alpha': 35}. Best is trial 3 with value: 0.08053710444417504.


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/97 [00:00<?, ?it/s]

[I 2024-09-18 14:46:49,747] Trial 9 finished with value: 0.10111670748736687 and parameters: {'factors': 40, 'regularization': 0.076, 'iterations': 10, 'alpha': 50}. Best is trial 3 with value: 0.08053710444417504.


In [163]:
print(als_study.best_value)

0.08053710444417504


## Generate Predictions