## Imports

In [3]:
import numpy as np
import pandas as pd

import sklearn.preprocessing as skp

In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline

In [33]:
import sklearn.metrics as skm

## Load Dataset

In [5]:
data_path = "Business-Supplier_Dataset1.csv"
data = pd.read_csv(data_path, engine='python')

In [6]:
data.head()

Unnamed: 0,BusinessID,SupplierID,Quality,Serviceability,Communicativeness,Reputation,Financial Condition,Condition of Supplier's Assets,Price,Delivery Time
0,1,14,10,3.74,1.88,7.16,5.46,1.31,78,17
1,1,40,2,4.83,2.87,6.11,5.05,4.56,14,16
2,1,31,7,5.69,9.65,8.6,6.28,9.69,60,28
3,1,46,1,1.14,4.81,4.55,2.79,7.4,35,1
4,1,18,7,6.86,9.23,8.65,4.34,7.02,37,3


In [7]:
def printFeatureStats(feature):
    
    min_value = data[feature].min()
    max_value = data[feature].max()
    quantiles = data[feature].quantile([0.25, 0.5, 0.75])

    print(f"Min: {min_value}, Max: {max_value}")
    print(f"Quantiles: \n{quantiles}")

## Pre-processing

In [8]:
## Normalize data
scaler = skp.MinMaxScaler(feature_range=(0, 10))

data['Price'] = scaler.fit_transform(data[['Price']])
data['Delivery Time'] = scaler.fit_transform(data[['Delivery Time']])

printFeatureStats('Price')
printFeatureStats('Delivery Time')

Min: 0.0, Max: 10.0
Quantiles: 
0.25    2.244898
0.50    4.795918
0.75    7.244898
Name: Price, dtype: float64
Min: 0.0, Max: 10.0
Quantiles: 
0.25    2.5
0.50    5.0
0.75    7.5
Name: Delivery Time, dtype: float64


In [9]:
data.rename(columns={'Condition of Supplier\'s Assets': 'Condition of Assets'}, inplace=True)
print(data.columns)

Index(['BusinessID', 'SupplierID', 'Quality', 'Serviceability',
       'Communicativeness', 'Reputation', 'Financial Condition',
       'Condition of Assets', 'Price', 'Delivery Time'],
      dtype='object')


In [10]:
## weights for interaction score
weights = {
    'Quality': 0.15,
    'Serviceability': 0.15,
    'Communicativeness': 0.12,
    'Reputation': 0.08,
    'Financial Condition': 0.10,
    'Condition of Assets': 0.10,
    'Price': 0.15,
    'Delivery Time': 0.15
}

In [11]:
num_columns = data.shape[1]
data['Interaction Score'] = 0

## Add values to column
for i in range(2, num_columns-1):
    feature = data.columns[i]
    data['Interaction Score'] += data[feature] * weights[feature]
    
data.head()       

Unnamed: 0,BusinessID,SupplierID,Quality,Serviceability,Communicativeness,Reputation,Financial Condition,Condition of Assets,Price,Delivery Time,Interaction Score
0,1,14,10,3.74,1.88,7.16,5.46,1.31,7.857143,5.714286,4.714971
1,1,40,2,4.83,2.87,6.11,5.05,4.56,1.326531,5.357143,3.01768
2,1,31,7,5.69,9.65,8.6,6.28,9.69,6.020408,9.642857,6.249561
3,1,46,1,1.14,4.81,4.55,2.79,7.4,3.469388,0.0,2.801608
4,1,18,7,6.86,9.23,8.65,4.34,7.02,3.673469,0.714286,5.56562


In [12]:
target = data['Interaction Score'].values

In [20]:
num_columns = data.shape[1]
#first 10 columns saved for training set
x = data.iloc[:, :num_columns-1].to_numpy()
print(x.shape)
#interaction score column saved for testing set
y = data.iloc[:, -1:].to_numpy()
print(y.shape)

(1000, 10)
(1000, 1)


In [21]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

## Run and Evaluate FM Regressor

In [22]:
fm_model = make_pipeline(skp.PolynomialFeatures(degree=2, interaction_only=True),
                         Ridge(alpha=1.0)
                        )

In [23]:
fm_model.fit(x_train, y_train)

## Generate Predictions

In [26]:
test_predictions = fm_model.predict(x_test)
print(test_predictions)

[[4.64944389]
 [5.5232855 ]
 [4.64089654]
 [5.84275246]
 [4.90132642]
 [4.73721527]
 [4.55228385]
 [3.20499059]
 [3.5800395 ]
 [3.43235178]
 [5.20004093]
 [5.79524119]
 [4.29158011]
 [6.16287011]
 [5.5366417 ]
 [4.45309051]
 [3.62952856]
 [6.22756245]
 [5.12642605]
 [2.68560662]
 [4.69315085]
 [5.38809568]
 [3.5155717 ]
 [5.09435565]
 [5.12178785]
 [4.68663828]
 [4.12204961]
 [5.99431891]
 [3.81526641]
 [2.93371001]
 [4.74115694]
 [4.13889987]
 [5.27708868]
 [2.94421204]
 [3.71361015]
 [5.61792381]
 [5.07816909]
 [3.46881534]
 [2.73815175]
 [4.54016156]
 [6.12887725]
 [3.96191486]
 [5.05547978]
 [4.33162247]
 [5.89720002]
 [5.16408792]
 [5.53563422]
 [5.1263867 ]
 [4.33138912]
 [5.71332962]
 [5.82579027]
 [4.11767838]
 [5.42324439]
 [3.61132823]
 [4.31390233]
 [3.97111665]
 [3.60704676]
 [6.28315521]
 [5.14212556]
 [5.92963956]
 [4.66822191]
 [4.42401724]
 [3.42046398]
 [5.91567351]
 [4.0873134 ]
 [5.15314695]
 [5.19682658]
 [4.61853662]
 [3.17286141]
 [5.51810405]
 [5.11109733]
 [4.73

In [27]:
#reshape predictions and y_test (ground truth) into 2D array format
y_test_reshaped = y_test.reshape(1, -1)
predictions_reshaped = test_predictions.reshape(1, -1)

In [36]:
ndcg = skm.ndcg_score(y_test_reshaped, predictions_reshaped, k=3)
print(f"ndcg score @ 5: {ndcg}" )

ndcg score @ 5: 1.0


In [47]:
new_data = data.copy()
business_id = 1

new_data['BusinessID'] = business_id

x_new = new_data.iloc[:, :num_columns-1].to_numpy()
print(x_new.shape)


(1000, 10)


In [48]:
business1_predictions = fm_model.predict(x_new)

In [60]:
print(business1_predictions)

[[4.71391213]
 [3.01938818]
 [6.25158217]
 [2.80495311]
 [5.56514774]
 [4.33791907]
 [3.1933393 ]
 [4.13815903]
 [4.4783497 ]
 [3.73859431]
 [6.28314174]
 [5.53939559]
 [6.04242411]
 [3.80999664]
 [5.03097511]
 [5.0973743 ]
 [4.59546513]
 [2.77390432]
 [4.19181773]
 [6.79165193]
 [3.91661361]
 [3.09568045]
 [2.92202785]
 [4.31390371]
 [2.82684647]
 [5.96718377]
 [5.18606118]
 [5.15475693]
 [3.95558837]
 [5.18947628]
 [3.9711261 ]
 [3.94493748]
 [4.33778253]
 [5.33729244]
 [5.77032185]
 [4.63837958]
 [5.28765698]
 [3.65090578]
 [4.34225467]
 [4.1154894 ]
 [5.20070028]
 [4.02634945]
 [4.37428703]
 [4.47858003]
 [4.75831593]
 [5.27532438]
 [3.98006944]
 [4.14446608]
 [3.75792552]
 [5.8423747 ]
 [5.85837771]
 [4.07957442]
 [4.50648777]
 [5.63285762]
 [4.42402241]
 [4.84636907]
 [4.60091408]
 [4.43727169]
 [3.79291337]
 [4.54018353]
 [4.47692047]
 [4.66825208]
 [5.95562777]
 [6.3448439 ]
 [5.20089331]
 [5.21771391]
 [4.43779025]
 [6.03118162]
 [4.70226654]
 [4.06933643]
 [3.17296778]
 [4.44

In [123]:
import heapq
k = 8 #adjust to get top 5 recommendations

top_k = heapq.nlargest(8, business1_predictions)
top_k = np.array(top_5)
print(top_k)

[[7.4175656 ]
 [7.06203858]
 [7.01900243]
 [6.79165193]
 [6.7880298 ]
 [6.75653428]
 [6.72545339]
 [6.71071626]]


In [124]:
indices = np.where(np.isin(business1_predictions, top_k))[0]
print(indices)
delete_arr = []

for i in range (len(indices)):
    index = indices[i]
    for j in range(0, 10):
        if data.loc[j, 'SupplierID'] == data.loc[index, 'SupplierID']:
            delete_arr.append(i)
            print(i)
print(delete_arr)
indices = np.delete(indices, delete_arr)
print(indices)

[ 19 103 144 733 873 891 910 923]
0
3
7
[0, 3, 7]
[103 144 873 891 910]


In [125]:
print(f"Supplier:\n{data.loc[indices, "SupplierID"]}")

Supplier:
103    50
144     3
873    21
891    44
910    42
Name: SupplierID, dtype: int64
