In [20]:
import json
from tabulate import tabulate

company_names = ['CLE','DHL','GDEX','J&T','PL']

# because company names are not consistent in problem 1 and problem 2
company_names_dict = {'DHL':'DHL','Pos Laju':'PL',
                      'City-link Express':'CLE','GDEX':'GDEX','J&T':'J&T'} 

## Import sentiment analysis output
Here, larger values are better because they represent a greater positive sentiment

In [35]:
f = open('sentiment_output.json',)

sentiment = json.load(f)

f.close()

sentiment = dict(sorted(sentiment.items(), key=lambda item: item[1], reverse=True)) # sort the dicionary based on values
print(sentiment)

{'DHL': 122, 'PL': 64, 'GDEX': 61, 'J&T': 40, 'CLE': 38}


## Import distnace data for 3 customers
Here, smaller values are better

In [12]:
path = 'Problem 1 output/'
filenames = ['Customer 1.json','Customer 2.hson','']
distanceList = [] # distance for each customer with all the companies

for customer_num in range(1, 4):
    f = open(path+f'Customer {customer_num}.json')
    
    output = json.load(f)
    
    temp_dict = {}
    
    # dictoinary is of the form {company:distance}
    # index in the distanceList represents nth customer
    for dictionary in output['courierRanking']:
        
        temp_dict[company_names_dict[dictionary['name']]] = dictionary['total_distance']
    
    distanceList.append(temp_dict)
    f.close()

print(distanceList)

[{'DHL': 53944, 'GDEX': 61078, 'PL': 63230, 'CLE': 99022, 'J&T': 124942}, {'DHL': 47523, 'PL': 56006, 'CLE': 70249, 'GDEX': 77009, 'J&T': 105959}, {'J&T': 52392, 'PL': 54168, 'GDEX': 72422, 'DHL': 83505, 'CLE': 111776}]


## Combining distance and sentiment analysis info to choose the best company
Distance shortest is best. Sentiment greatest is best.<br>
We want to give 50-50 importance to distance and sentiment. This percentage can be changed<br><br>

**Formula**<br>
score = -(0.5 x distance) +(0.5 x sentiment)<br>
Larger score is better

In [48]:
# as this number reaches 1, more importance is given to distance
# as this number reaches 0, more importance is given to sentiment
probability = 0.5

score = [] # score is of the same format as distanceList (see the cell above)

for customer in distanceList:
    
    temp_dict = {}
    for company in company_names:
#         temp_dict[company] = -probability*customer[company] + (1-probability)*sentiment[company]

        # distance values scaled by 1000
        temp_dict[company] = -probability*(customer[company]/1000) + (1-probability)*sentiment[company] 
    
    temp_dict = dict(sorted(temp_dict.items(), key=lambda item: item[1], reverse=True))
    print(temp_dict)
    print()
    score.append(temp_dict)

{'DHL': 34.028, 'PL': 0.38500000000000156, 'GDEX': -0.03900000000000148, 'CLE': -30.511000000000003, 'J&T': -42.471}

{'DHL': 37.2385, 'PL': 3.997, 'GDEX': -8.0045, 'CLE': -16.124499999999998, 'J&T': -32.9795}

{'DHL': 19.247500000000002, 'PL': 4.916, 'GDEX': -5.7109999999999985, 'J&T': -6.1960000000000015, 'CLE': -36.888}



## Best company for each customer
Companies are sorted in the score dictionary from best to worst. Rank 1 is best company for customer. Rank 5 is worst

In [49]:
table = [['Rank','1','2','3','4','5']]

for i, j in enumerate(score):
    table.append([f'Customer {i+1}']+list(score[i].keys()))
    
print(tabulate(table))

----------  ---  --  ----  ---  ---
Rank        1    2   3     4    5
Customer 1  DHL  PL  GDEX  CLE  J&T
Customer 2  DHL  PL  GDEX  CLE  J&T
Customer 3  DHL  PL  GDEX  J&T  CLE
----------  ---  --  ----  ---  ---
