In [106]:
from flask import Flask, request, jsonify
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from flask_cors import CORS

In [107]:
order_history = pd.read_csv("OrderHistory.csv")
products_list = pd.read_csv("ProductsList.csv")

In [134]:
product_info = products_list[products_list['ProductId'] == 306].iloc[0]

In [135]:
product_info

ProductId                                                              306
product_url              http://www.flipkart.com/first-choice-cotton-em...
product_name                     First Choice Cotton Embroidered Diwan Set
product_category_tree    ["Home Furnishing >> Living Room Furnishing >>...
image                    ["http://img5a.flixcart.com/image/diwan-set/w/...
description              Key Features of First Choice Cotton Embroidere...
brand                                                         First Choice
Name: 305, dtype: object

In [136]:
pd.concat([order_history, product_info], ignore_index=True)
product_descriptions = products_list.set_index('ProductId')['description'].to_dict()

# Sample a subset of your data (assuming df1 is your DataFrame)
sample_size = 2600  # Adjust as needed
df2 = products_list.sample(n=sample_size, random_state=42)

In [137]:
# df2 = df2.merge(products_list[['ProductId', 'description']], on='ProductId', how='left')

    # Fill missing descriptions with an empty string
df2['description'] = df2['description'].fillna('')

product_info_df = pd.DataFrame(product_info).transpose()

In [138]:
product_info_df

Unnamed: 0,ProductId,product_url,product_name,product_category_tree,image,description,brand
305,306,http://www.flipkart.com/first-choice-cotton-em...,First Choice Cotton Embroidered Diwan Set,"[""Home Furnishing >> Living Room Furnishing >>...","[""http://img5a.flixcart.com/image/diwan-set/w/...",Key Features of First Choice Cotton Embroidere...,First Choice


In [139]:
insert_index = 459  # Adjust this to the desired index
df2 = pd.concat([df2.iloc[:insert_index], product_info_df, df2.iloc[insert_index:]], axis=0)
df2.index = range(1, len(df2) + 1)

In [140]:
df2


Unnamed: 0,ProductId,product_url,product_name,product_category_tree,image,description,brand
1,10651,http://www.flipkart.com/avaron-projekt-moustac...,Avaron Projekt Moustache Brooch,"[""Jewellery >> Accessories >> Brooches >> Avar...","[""http://img6a.flixcart.com/image/brooch/s/r/7...",Specifications of Avaron Projekt Moustache Bro...,Avaron Projekt
2,2042,http://www.flipkart.com/grafion-comfort-feel-w...,Grafion by Grafion - Comfort Feel Women's Tube...,"[""Clothing >> Women's Clothing >> Lingerie, Sl...","[""http://img5a.flixcart.com/image/bra/b/3/q/ln...",Grafion by Grafion - Comfort Feel Women's Tube...,Black
3,8669,http://www.flipkart.com/blessed-ring-plant-con...,Blessed Ring Plant Container Set,"[""Tools & Hardware >> Tools >> Gardening Tools...","[""http://img6a.flixcart.com/image/plant-contai...",Buy Blessed Ring Plant Container Set for Rs.84...,Blessed
4,1115,http://www.flipkart.com/jrb-1042-smallest-mobi...,JRB 1042 Smallest Mobile Powered By OTG Enable...,"[""Computers >> Laptop Accessories >> USB Gadge...","[""http://img5a.flixcart.com/image/usb-gadget/b...",Key Features of JRB 1042 Smallest Mobile Power...,JRB
5,13903,http://www.flipkart.com/northern-lights-stripe...,Northern Lights Striped Men's Polo Neck T-Shirt,"[""Clothing >> Men's Clothing >> T-Shirts >> No...","[""http://img6a.flixcart.com/image/t-shirt/b/2/...",Northern Lights Striped Men's Polo Neck T-Shir...,
...,...,...,...,...,...,...,...
2597,19364,http://www.flipkart.com/eaglemoss-publications...,Eaglemoss Publications Dc Superhero Chess & Ma...,"[""Toys & School Supplies >> Action Figures >> ...","[""http://img5a.flixcart.com/image/action-figur...",Specifications of Eaglemoss Publications Dc Su...,Eaglemoss Publications
2598,10841,http://www.flipkart.com/lab-no-4-analog-30-cm-...,Lab No. 4 Analog 30 cm Dia Wall Clock,"[""Home Decor & Festive Needs >> Wall Decor & C...","[""http://img5a.flixcart.com/image/wall-clock/a...",Key Features of Lab No. 4 Analog 30 cm Dia Wal...,Lab No. 4
2599,931,http://www.flipkart.com/rosemoore-home-gel-air...,ROSEMOORE Home Gel Air Freshener,"[""Home Decor & Festive Needs >> Candles & Frag...","[""http://img6a.flixcart.com/image/air-freshene...",Key Features of ROSEMOORE Home Gel Air Freshen...,ROSEMOORE
2600,5602,http://www.flipkart.com/gadget-wagon-consistan...,Gadget-Wagon Consistant 2.1 Channel Home Theat...,"[""Home Entertainment >> Audio Players >> Home ...","[""http://img6a.flixcart.com/image/hifi-system/...",Buy Gadget-Wagon Consistant 2.1 Channel Home T...,Gadget-Wagon


In [141]:
tfidf = TfidfVectorizer(stop_words='english')

    # Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(df2['description'])

    # Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
indices = pd.Series(df2.index, index=df2['ProductId']).drop_duplicates()

In [142]:
cosine_sim

array([[1.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        6.84441968e-02, 0.00000000e+00, 1.86694271e-02],
       [0.00000000e+00, 1.00000000e+00, 1.51776688e-02, ...,
        6.08571524e-03, 1.04826155e-02, 6.50440222e-04],
       [0.00000000e+00, 1.51776688e-02, 1.00000000e+00, ...,
        7.11290722e-04, 5.68909146e-02, 7.51246667e-03],
       ...,
       [6.84441968e-02, 6.08571524e-03, 7.11290722e-04, ...,
        1.00000000e+00, 3.09332950e-02, 2.08759035e-02],
       [0.00000000e+00, 1.04826155e-02, 5.68909146e-02, ...,
        3.09332950e-02, 1.00000000e+00, 7.60336968e-04],
       [1.86694271e-02, 6.50440222e-04, 7.51246667e-03, ...,
        2.08759035e-02, 7.60336968e-04, 1.00000000e+00]])

KeyError: 306

In [143]:
import numpy as np

In [144]:
indices

ProductId
10651       1
2042        2
8669        3
1115        4
13903       5
         ... 
19364    2597
10841    2598
931      2599
5602     2600
9467     2601
Length: 2601, dtype: int64

In [145]:
def get_recommendations(product_id, cosine_sim=cosine_sim):
    idx = indices[product_id]  # Use the mapped index from indices
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return df2['ProductId'].iloc[movie_indices].tolist()


recommendations = get_recommendations(306)
# Append the product information to the order_history DataFrame
# order_history = order_history.append(product_info, ignore_index=True)
# print(order_history)


# Get product names based on product IDs
product_names = [products_list.loc[products_list['ProductId'] == pid, 'product_name'].values[0] for pid in recommendations]

In [146]:
product_names

['First Choice Cotton Embroidered Diwan Set',
 'Hemden Embroidered Cushions Cover',
 'Adishma Printed Cushions Cover',
 'Homec Geometric Cushions Cover',
 'SYON Cotton Floral Double Bedsheet',
 'Shopkeeda Abstract Cushions Cover',
 'JBC Collection Cotton Printed Single Bedsheet',
 'JBC Collection Cotton Printed Single Bedsheet',
 'Lushomes Plain Cushions Cover',
 "Ruhi's Creations Polyester Silk Blend Cartoon King sized Double Bedsheet"]

: 