In [None]:
# Import necessary libraries:

import tkinter as tk
from tkinter import filedialog
import pandas as pd
from rake_nltk import Rake
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Code to display the window for choosing excel file
root= tk.Tk()

canvas1 = tk.Canvas(root, width = 300, height = 300, bg = 'lightsteelblue2', relief = 'raised')
canvas1.pack()

# Define function to read excel file from chosen file:

def getExcel ():
    global df
    
    import_file_path = filedialog.askopenfilename()
    df = pd.read_excel (import_file_path)
    
browseButton_Excel = tk.Button(text="      Import Excel File     ", command=getExcel, bg='green', fg='white', font=('helvetica', 12, 'bold'))
canvas1.create_window(150, 150, window=browseButton_Excel)

root.mainloop()

# Prepare data

## Drop null values
df.dropna(inplace = True)

## Transform data
df['Name'] = df['Name'].map(lambda x: x.split(' '))
for index, row in df.iterrows():
    row['Name'] = ''.join(row['Name']).lower()

## Create column consisting of all the key words of that product
df['Key words'] = ''
for index, row in df.iterrows():
    desc = row['Product long name specifications']
    r = Rake()
    r.extract_keywords_from_text(desc)
    key_words_dct = r.get_word_degrees()
    df.at[index, 'Key words'] = list(key_words_dct.keys())

## Drop unnecessary attributes

df.drop(columns = ['Product long name specifications'], inplace = True)

## Creating attribute called 'Bag of words' to contain all the words used to make comparison
df['Bag of words'] = ''
columns = df.columns
for index, row in df.iterrows():
    words = ''
    for col in columns:
        if col in ['Name', 'Key words']:
            words = words + ' '.join(row[col])+ ' '
        elif col not in ['Status', 'SKU', 'Quantity']:
            words = words + row[col]+ ' '
    df.at[index, 'Bag of words'] = words
    
df.set_index('SKU', inplace = True)


## Calculate the similarity scores, each word will be given a certain scores and will based on the total scores 
##for the final recommendation
count = CountVectorizer()
count_matrix = count.fit_transform(df['Bag of words'])
indices = pd.Series(df.index)

cosine_sim = cosine_similarity(count_matrix, count_matrix)


## Define function to make recommendation

def recommendations(sku, cosine_sim = cosine_sim):
    
    recommended_products = [] # A list of possible recommendation
    
    idx = indices[indices == sku].index[0]

    score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False) # Sorting the product substitution scores in descending order

    top_indexes = list(score_series.iloc[1:].index)
    
    for i in top_indexes:
        if df.loc[df.index[i]]['Quantity'] != 0:
            recommended_products.append(list(df.index)[i])
        
    return recommended_products[:3] # Return the first 3 possible products


# Run test case

sku = str(input('Please enter the SKU: ')) #Take SKU input from user
recommendations(sku)