# Importing Libraries

In [121]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import datetime
from sklearn.metrics.pairwise import cosine_similarity

# Importing the datasets

In [122]:
df1_flipkart = pd.read_csv("flipkart_com-ecommerce_sample.csv")

In [123]:
df2_amazon = pd.read_csv("amz_com-ecommerce_sample.csv",encoding = 'latin')

##### Feature Engineering 

In [124]:
df2_amazon.drop(['uniq_id','product_url','image','is_FK_Advantage_product','product_rating','overall_rating','product_specifications'],axis = 1,inplace = True)

In [125]:
df1_flipkart.drop(['uniq_id','product_url','image','is_FK_Advantage_product','product_rating','overall_rating','product_specifications'],axis = 1,inplace = True)

In [126]:
df1_flipkart.head(1)

Unnamed: 0,crawl_timestamp,product_name,product_category_tree,pid,retail_price,discounted_price,description,brand
0,2016-03-25 22:59:23 +0000,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",SRTEH2FF9KEDEFGF,999.0,379.0,Key Features of Alisha Solid Women's Cycling S...,Alisha


In [127]:
df2_amazon.head(1)

Unnamed: 0,crawl_timestamp,product_name,product_category_tree,pid,retail_price,discounted_price,description,brand
0,2016-03-25 22:59:23 +0000,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",SRTEH2FF9KEDEFGF,982,438,Key Features of Alisha Solid Women's Cycling S...,Alisha


#### Extracting the date and time 

In [128]:
df2_time = []
for i in df2_amazon['crawl_timestamp']:
    df2_time.append(i)

In [129]:
df1_time = []
for i in df1_flipkart['crawl_timestamp']:
    df1_time.append(i)

In [130]:
df2_year = []
df2_month = []
df2_day = []
df2_hour = []
df2_minute = []
df2_second = []

In [131]:
df1_year = []
df1_month = []
df1_day = []
df1_hour = []
df1_minute = []
df1_second = []

In [132]:
for i in df1_time:
    s = i[:-6]
    d = datetime.datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
    df1_year.append(d.year)
    df1_month.append(d.month)
    df1_day.append(d.day)
    df1_hour.append(d.hour)
    df1_minute.append(d.minute)
    df1_second.append(d.second)

In [133]:
for i in df2_time:
    s = i[:-6]
    d = datetime.datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
    df2_year.append(d.year)
    df2_month.append(d.month)
    df2_day.append(d.day)
    df2_hour.append(d.hour)
    df2_minute.append(d.minute)
    df2_second.append(d.second)

In [134]:
df1_flipkart.head(1)

Unnamed: 0,crawl_timestamp,product_name,product_category_tree,pid,retail_price,discounted_price,description,brand
0,2016-03-25 22:59:23 +0000,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",SRTEH2FF9KEDEFGF,999.0,379.0,Key Features of Alisha Solid Women's Cycling S...,Alisha


###### Adding the date and time as a new column

In [135]:
df1_flipkart['year'] = pd.DataFrame(df1_year)
df1_flipkart['month'] =  pd.DataFrame(df1_month)
df1_flipkart['day'] = pd.DataFrame(df1_day)
df1_flipkart['hour']  = pd.DataFrame(df1_hour)
df1_flipkart['minute'] = pd.DataFrame(df1_minute)
df1_flipkart['second'] = pd.DataFrame(df1_second)

In [136]:
df2_amazon['year'] = pd.DataFrame(df2_year)
df2_amazon['month'] =  pd.DataFrame(df2_month)
df2_amazon['day'] = pd.DataFrame(df2_day)
df2_amazon['hour']  = pd.DataFrame(df2_hour)
df2_amazon['minute'] = pd.DataFrame(df2_minute)
df2_amazon['second'] = pd.DataFrame(df2_second)

In [137]:
df1_flipkart.head(1)

Unnamed: 0,crawl_timestamp,product_name,product_category_tree,pid,retail_price,discounted_price,description,brand,year,month,day,hour,minute,second
0,2016-03-25 22:59:23 +0000,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",SRTEH2FF9KEDEFGF,999.0,379.0,Key Features of Alisha Solid Women's Cycling S...,Alisha,2016,3,25,22,59,23


In [138]:
df2_amazon.head(1)

Unnamed: 0,crawl_timestamp,product_name,product_category_tree,pid,retail_price,discounted_price,description,brand,year,month,day,hour,minute,second
0,2016-03-25 22:59:23 +0000,Alisha Solid Women's Cycling Shorts,"[""Clothing >> Women's Clothing >> Lingerie, Sl...",SRTEH2FF9KEDEFGF,982,438,Key Features of Alisha Solid Women's Cycling S...,Alisha,2016,3,25,22,59,23


###### Feature Engineering

In [139]:
df2_amazon.drop(['crawl_timestamp'],axis = 1,inplace = True)

In [140]:
df1_flipkart.drop(['crawl_timestamp'],axis = 1,inplace = True)

###### Importing the pretrained Sentence Transformenr model

In [141]:
model = SentenceTransformer("nli-distilroberta-base-v2")

###### Creating some useful functions so that we can use it again and again

In [142]:
def choosing_similar_products(prd_name):
    sentences = []
    index = []
    to_compare = [prd_name]
    for i in range(len(df2_amazon['product_name'])):
        try:
            if df2_amazon['product_name'][i][:2] == to_compare[0][:2]:
                index.append(i)
                sentences.append(df2_amazon['product_name'][i])
        except:
            pass
    return [sentences,index,to_compare]

In [143]:
def encoding(to_compare,sentences,prd_name):
    sentences  = to_compare + sentences
    sentence_embeddings = model.encode(sentences)
    cos = cosine_similarity([sentence_embeddings[0]],sentence_embeddings[1:]).flatten()
    cos  = cos.tolist()
    df_new = pd.DataFrame({"Sentence":sentences[1:],"Similarity_Score":cos})
    r = df_new[df_new['Sentence'] == prd_name]
    return r

In [144]:
def filtering_amazon(r,index):
    dic_amazon = {'Product name in Amazon':[],'Retail Price in Amazon':[],'Discounted Price in Amazon':[]}
    for i in index[0:len(r)]:
        for j in df2_amazon['product_name']:
            dic_amazon["Product name in Amazon"].append(df2_amazon['product_name'][i])
            break
        for k in df2_amazon['retail_price']:
            dic_amazon['Retail Price in Amazon'].append(df2_amazon['retail_price'][i])
            break
        for u in df2_amazon['discounted_price']:
            dic_amazon['Discounted Price in Amazon'].append(df2_amazon['discounted_price'][i])
            break
    return dic_amazon

In [145]:
def filtering_flipkart(r,index):
    dic_flipkart = {'Product name in Flipkart':[],'Retail Price in Flipkart':[],'Discounted Price in Flipkart':[]}
    for i in index[0:len(r)]:
        for j in df1_flipkart['product_name']:
            dic_flipkart["Product name in Flipkart"].append(df1_flipkart['product_name'][i])
            break
        for k in df1_flipkart['retail_price']:
            dic_flipkart['Retail Price in Flipkart'].append(df1_flipkart['retail_price'][i])
            break
        for u in df1_flipkart['discounted_price']:
            dic_flipkart['Discounted Price in Flipkart'].append(df1_flipkart['discounted_price'][i])
            break
    return dic_flipkart

#  Building a predictive system

In [152]:
def predict():
    # take input
    print("Type the product name you want to compare:")
    print("Note: Type the product name from amazon dataset")
    prd_name = input()
    
    # Using the above created "choosing_similar_products" function to get the similar products,
    # index of those similar products 

    sentences,index,to_compare = choosing_similar_products(prd_name)
    
    # the below code will compare the similarity of the input product with other products using cosine similarity
    # and return a new dataframe "r" which will be having the similar products
    
    r = encoding(to_compare,sentences,prd_name)
    
    
    # The below code line will return a dictionary which will be containg the product name , retail price and discount 
    # from the amazon dataset for the respective product
    
    dic_amazon = filtering_amazon(r,index)
    
    # The below code line will return a dictionary which will be containg the product name , retail price and discount 
    # from the flipkart dataset for the respective product
    
    dic_flipkart = filtering_flipkart(r,index)
    
    # The below code will create a dataframe for the dic_flipkart 
    
    final_df_flipkart = pd.DataFrame(dic_flipkart)
    
    # The below code will create a dataframe for the dic_amazon 
    
    final_df_amazon = pd.DataFrame(dic_amazon)
    
    # the below code line will concat the final_df_flipkart and final_df_amazon dataframes and store it in the result variable
    
    result = pd.concat([final_df_flipkart, final_df_amazon], axis=1, join='inner')
    
    # the final  output as required in the problem statement 
    return result

In [153]:
predict()

Type the product name you want to compare:
Note: Type the product name from amazon dataset
FDT WOMEN'S Leggings Pants


Unnamed: 0,Product name in Flipkart,Retail Price in Flipkart,Discounted Price in Flipkart,Product name in Amazon,Retail Price in Amazon,Discounted Price in Amazon
0,FDT Women's Leggings,699.0,309.0,FDT WOMEN'S Leggings Pants,698,362


# Project completed by - Swetanshu Pandey