In [1]:
#import library
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import euclidean, cityblock, cosine
from scipy.spatial.distance import hamming
from collections import Counter



In [2]:
#import data
df = pd.read_csv('df_20220323ver.csv')
#change column names
df.columns=['ID', 'Gender', 'Age_Group', 'Height', 'Weight', 'Foot_Size',
       'Product_Size', 'Rating', 'Item_Code','Sum_Code','Item_Name','Tags','Dept']

In [3]:
# Data Cleaning 
df['Height']=df['Height'].apply(lambda x:str(x).replace('身長: ','')).apply(lambda x:str(x).replace('"',''))
df['Weight']=df['Weight'].apply(lambda x:str(x).replace('体重: ',''))
df['Product_Size']=df['Product_Size'].apply(lambda x:str(x).replace('購入サイズ: ',''))
df['Foot_Size']=df['Foot_Size'].apply(lambda x:str(x).replace('足のサイズ: ',''))
df=df.replace('回答しない', np.nan)
df=df.replace('その他', np.nan)
df=df.replace('nan', np.nan)



In [4]:
# making dataframe for non foot size

df_nfs=df.copy()
df_nfs.drop('Foot_Size',axis=1,inplace=True)
print(len(df_nfs))
print(len(df_nfs.dropna()))
print(len(df_nfs.dropna())/len(df_nfs))
df_nfs.dropna(inplace=True)

47219
33712
0.7139498930515259


In [5]:
# dropna for dataframe with foot size
print(len(df))
print(len(df.dropna()))
print(len(df.dropna())/len(df))
df.dropna(inplace=True)


47219
28663
0.6070225968360194


In [6]:
# Data Cleaning with functions

# Gender

def gender_eng(x):
    if x=='男性':
        return 'Male'
    if x=='女性':
        return 'Female'


df['Gender']=df['Gender'].apply(lambda x:gender_eng(x))
df_nfs['Gender']=df_nfs['Gender'].apply(lambda x:gender_eng(x))

df=df[(df['Age_Group']!='10歳以下')&(df['Age_Group']!='0 - 6ヶ月')&(df['Age_Group']!='0 - 3ヶ月')&(df['Age_Group']!='7 - 9ヶ月')&(df['Age_Group']!='10 - 12ヶ月')&(df['Age_Group']!='7 - 12ヶ月')&(df['Age_Group']!='4 - 6ヶ月')]
df_nfs=df_nfs[(df_nfs['Age_Group']!='10歳以下')&(df_nfs['Age_Group']!='0 - 6ヶ月')&(df_nfs['Age_Group']!='0 - 3ヶ月')&(df_nfs['Age_Group']!='7 - 9ヶ月')&(df_nfs['Age_Group']!='10 - 12ヶ月')&(df_nfs['Age_Group']!='7 - 12ヶ月')&(df_nfs['Age_Group']!='4 - 6ヶ月')]

# Age

def Age_impute(x,y,z):
    if x=='10代':
        if z=='Female':
            if y in ['141 - 150cm', '151 - 155cm', '131 - 140cm','121 - 130cm']:
                return '10 - 14歳'
            else:
                return '15 - 19歳'
        if z=='Male':
            if y in ['91 - 95cm','141 - 150cm', '151 - 155cm', '131 - 140cm','121 - 130cm','156 - 160cm']:
                return '10 - 14歳'
            else:
                return '15 - 19歳'
            
    if x=='10歳以下':
        if z=='Female':
            if y in ['141 - 150cm', '151 - 155cm', '131 - 140cm','121 - 130cm']:
                return '10 - 14歳'
            else:
                return '15 - 19歳'
        if z=='Male':
            if y in ['91 - 95cm','141 - 150cm', '151 - 155cm', '131 - 140cm','121 - 130cm','156 - 160cm']:
                return '10 - 14歳'
            else:
                return '15 - 19歳'
    
    
    
            
    elif x=='3 - 6歳':
        return '4 - 6歳'
    
    else:
        return x

df['Age_Group']=df.apply(lambda df:Age_impute(df['Age_Group'],df['Height'],df['Gender']), axis=1)
df_nfs['Age_Group']=df_nfs.apply(lambda df_nfs:Age_impute(df_nfs['Age_Group'],df_nfs['Height'],df_nfs['Gender']), axis=1)


def Age_eng(x):
    if x=='10 - 14歳':
        return '10-14'
    elif x=='7 - 9歳':
        return '7-9'
    elif x=='4 - 6歳':
        return '4-6'
    elif x=='30代':
        return '30-39'
    elif x=='2 - 3歳':
        return '2-3'
    elif x=='40代':
        return '40-49'
    elif x=='20代':
        return '20-29'
    elif x=='15 - 19歳':
        return '15-19'
    elif x=='50代':
        return '50-59'
    elif x=='60代以上':
        return '60<'
    elif x=='13 - 24ヶ月':
        return '<2'
    else:
        return x
    
df['Age_Group']=df['Age_Group'].apply(lambda x:Age_eng(x))    
df_nfs['Age_Group']=df_nfs['Age_Group'].apply(lambda x:Age_eng(x))        


# Height 


df=df[(df['Height']!='81 - 85cm')&(df['Height']!='50cm以下')&(df['Height']!='65cm以下')&(df['Height']!='61 - 70cm')&(df['Height']!='51 - 60cm')&(df['Height']!='71 - 80cm')]
df_nfs=df_nfs[(df_nfs['Height']!='81 - 85cm')&(df_nfs['Height']!='50cm以下')&(df_nfs['Height']!='65cm以下')&(df_nfs['Height']!='61 - 70cm')&(df_nfs['Height']!='51 - 60cm')&(df_nfs['Height']!='71 - 80cm')]

def Height_impute(x):
    if x in ['91 - 100cm','81 - 90cm','96 - 100cm','91 - 95cm','86 - 90cm', '66 - 70cm','71 - 75cm']:
        return '100 cm >='
    elif x in ['181cm以上','191cm以上', '186 - 190cm','181 - 185cm']:
        return '>180 cm '
    elif x=='76 - 80cm': 
        return '176 - 180cm'
    else:
        return x
   
    
df['Height']=df['Height'].apply(lambda x: Height_impute(x))
df_nfs['Height']=df_nfs['Height'].apply(lambda x: Height_impute(x))

# Weight

df=df[(df['Weight']!='5kg以下')]
df_nfs=df_nfs[(df_nfs['Weight']!='5kg以下')]



def Weight_impute(x):
    if x =='9 - 12kg':
        return '13kg >'
    else:
        return x
        
df['Weight']=df['Weight'].apply(lambda x: Weight_impute(x))
df_nfs['Weight']=df_nfs['Weight'].apply(lambda x: Weight_impute(x))

#Foot Size

def Foot_Size_impute(x):
    if x in ['21.5cm以下','22.0cm以下']:
        return '22.0cm >'
    if x in ['28.0cm以上','30.0cm以上','29.0cm','28.5cm','29.5cm']:
        return '>28.0cm'
    
    else:
        return x
    
df['Foot_Size']=df['Foot_Size'].apply(lambda x: Foot_Size_impute(x))

# reset_index for dataframe
df_nfs.reset_index(drop=True,inplace=True)
df.reset_index(drop=True,inplace=True)

In [7]:
# Create Item_Sex Columns

df['Item_Sex']=df['Item_Name'].apply(lambda x:x[:1])
df_nfs['Item_Sex']=df_nfs['Item_Name'].apply(lambda x:x[:1])

In [8]:
# one hot for tags

# df
Set_Tags=[]
for i in df['Tags'].unique():
    Set_Tags=Set_Tags+i.split(', ')
Set_Tags=list(set(Set_Tags))


dict_tag=dict()
list_tag=[]
for j in Set_Tags:
    dict_tag['{}'.format(j)]=[]
    list_tag.append(dict_tag['{}'.format(j)])

for i in df['Tags']:
    for j in i.split(', '):
        dict_tag['{}'.format(j)].append(1)
    for k in list_tag:
        if len(k)!=len(dict_tag['{}'.format(j)]):
            k.append(0)
df_tag=pd.DataFrame(dict_tag)
df=pd.concat([df,df_tag],axis=1)
df.drop('Tags',axis=1,inplace=True)

#df_nfs

dict_tag=dict()
list_tag=[]
for j in Set_Tags:
    dict_tag['{}'.format(j)]=[]
    list_tag.append(dict_tag['{}'.format(j)])

for i in df_nfs['Tags']:
    for j in i.split(', '):
        dict_tag['{}'.format(j)].append(1)
    for k in list_tag:
        if len(k)!=len(dict_tag['{}'.format(j)]):
            k.append(0)
df_tag=pd.DataFrame(dict_tag)
df_nfs=pd.concat([df_nfs,df_tag],axis=1)
df_nfs.drop('Tags',axis=1,inplace=True)



In [9]:
# sneakers product size decision by foot size

print(df[(df['sneakers']==1)&(df['Rating']>3)].groupby('Foot_Size')['Product_Size'].value_counts().to_frame())

dict_size={}
list_Foot_Size=[]
for j in df[(df['sneakers']==1)&(df['Rating']>3)]['Product_Size'].unique():
    list_Foot_Size.append(j)
list_Foot_Size.sort()
for i in list_Foot_Size:
    if (i != '29')&(i != '28.5'):
        dict_size['{}'.format(i)]=[]
        for j in df[(df['sneakers']==1)&(df['Rating']>3)&(df['Product_Size']==i)]['Foot_Size']:
            if (j != '>28.0cm') & (j != '22.0cm >'):
                size_j=float(re.search('[0-9]+[/.]*[0-9]*',j).group())
                #print(size_j)
                dict_size['{}'.format(i)].append(size_j)
    
        percent=dict_size['{}'.format(i)].count(float(i))/len(dict_size['{}'.format(i)])
        print(i,' is recommended for',j,' by ',percent,'percent')
        
# sneakers is a fit size products in the range(22.5cm to 28.0cm)    

                        Product_Size
Foot_Size Product_Size              
23.0cm    23                       3
23.5cm    23.5                     2
          23                       1
          24                       1
24.0cm    24                       3
          23.5                     2
24.5cm    24.5                     4
          24                       2
25.0cm    26.5                     1
25.5cm    25.5                     7
          26                       2
          27.5                     1
26.0cm    26                       5
26.5cm    25.5                     1
          26                       1
          26.5                     1
          27                       1
27.0cm    27                       1
27.5cm    27.5                     2
28.0cm    27                       1
          28                       1
>28.0cm   29                       2
          28.5                     1
23  is recommended for 23.0cm  by  0.75 percent
23.5  is recommended for 24

In [10]:
# room shoes product size decision by foot size

print(df[(df['room shoes']==1)&(df['Rating']>3)].groupby('Foot_Size')['Product_Size'].value_counts().to_frame())

list_Foot_Size=[]
for j in df[(df['room shoes']==1)&(df['Rating']>3)]['Foot_Size'].unique():
    list_Foot_Size.append(j)
list_Foot_Size.sort()

for i in list_Foot_Size:
    M=len(df[(df['room shoes']==1)&(df['Rating']>3)&(df['Foot_Size']==i)&(df['Product_Size']=='M')])/len(df[(df['room shoes']==1)&(df['Rating']>3)&(df['Foot_Size']==i)])
    L=len(df[(df['room shoes']==1)&(df['Rating']>3)&(df['Foot_Size']==i)&(df['Product_Size']=='L')])/len(df[(df['room shoes']==1)&(df['Rating']>3)&(df['Foot_Size']==i)])
    XL=len(df[(df['room shoes']==1)&(df['Rating']>3)&(df['Foot_Size']==i)&(df['Product_Size']=='XL')])/len(df[(df['room shoes']==1)&(df['Rating']>3)&(df['Foot_Size']==i)])
    
    if M==max([M,L,XL]):
        print('M is recommended for',i ,'with',M,'percent of reviews')
    elif L==max([M,L,XL]):
        print('L is recommended for',i ,'with',L,'percent of reviews')
    else:
        print('XL is recommended for',i ,'with',XL,'percent of reviews')

# Size cut-off line
#M: <25cm
#L: 25cm - 28cm
#XL:28cm>


                        Product_Size
Foot_Size Product_Size              
22.5cm    M                        3
23.0cm    M                        6
          L                        1
23.5cm    M                       17
          L                        2
          XL                       1
24.0cm    M                       12
24.5cm    M                        8
          L                        2
25.0cm    L                        8
          M                        7
          XL                       1
25.5cm    L                        6
          M                        4
          XL                       1
26.0cm    L                       16
26.5cm    L                       14
          M                        1
          XL                       1
27.0cm    L                       15
          XL                       2
27.5cm    L                       10
          XL                       1
28.0cm    L                        6
          XL                       3
>

In [11]:
# pre-set PredictID data

item_code_input='434176'
Gender_input='Male'
Age_Group_input='41-49'
Height_input='171 - 175cm'
Weight_input='56 - 60kg'
bra_size='NA'
foot_size_re='25.5cm'

item_code_input=int(item_code_input)

In [12]:
df['Foot_Size'].unique()

array(['22.0cm >', '23.0cm', '22.5cm', '25.0cm', '22.0cm', '24.5cm',
       '23.5cm', '24.0cm', '25.5cm', '26.5cm', '27.5cm', '27.0cm',
       '26.0cm', '>28.0cm', '28.0cm'], dtype=object)

In [13]:
#Bra and Shoes functions
    
          
def bra_size_rec(item_code_input,x):
    
    if item_code_input==445383:
        #['M', 'XL', 'L', 'S', 'XS', 'XXL']
    
        if x in ['65AA','70AA']:
            return 'XS'
        elif x in ['65A','65B','65C','70A']:
            return 'S'
        elif x in ['65D','70B','70C','70D','75A','75B']:
            return 'M'
        elif x in ['70E','75C','75D','75E','80B','80C']:
            return 'L'
        elif x in ['80D','80E','80F','85B','85C','85D']:
            return 'XL'
        elif x in ['85E','85F','80F','90B','90C','90D']:
            return 'XXL'
        else:
            return 'No suitable size yet'
    else:
        #438961
        #['ABC 65/70', 'ABC 85/90', 'ABC 75/80', 'DEF 65/70', 'DEF 75/80', 'DEF 85/90', 'AA 65/70']
        if x in ['65AA','70AA']:
            return 'AA 65/70'
        elif x in ['65A','65B','65C','70A','70B','70C']:
            return 'ABC 65/70'
        elif x in ['65D','65E','65F','70D','70E','70F']:
            return 'DEF 65/70'
        elif x in ['75A','75B','75C','80A','80B','80C']:
            return 'ABC 75/80'
        elif x in ['75D','75E','75F','80D','80E','80F']:
            return 'DEF 75/80'
        elif x in ['85A','85B','85C','90A','90B','90C']:
            return 'ABC 85/90'
        elif x in ['85D','85E','85F','90D','90E','90F']:
            return 'DEF 85/90'
        else:
            return 'No suitable size yet'


        
def shoes_size_rec(item_code_input,x):
    
    if item_code_input==445086:
        if x in ['22.5cm','23.0cm','23.5cm','24.0cm','24.5cm','25.0cm','25.5cm','26.0cm','26.5cm','27.0cm','27.5cm','28.0cm']:
            return re.search('[0-9]+(.5)*',x).group()
        else:
            return 'No suitable size yet'
    else:
        if x in ['22.5cm','23.0cm','23.5cm','24.0cm','24.5cm']:
            return 'M'
        elif x in ['25.0cm','25.5cm','26.0cm','26.5cm','27.0cm','27.5cm','28.0cm']:
            return 'L'
        elif x in ['>28.0cm']:
            return 'XL'
        else:
            return 'No suitable size yet'

#use which functions
#if df_nfs[df_nfs['Item_Code']==item_code_input]['bra'].unique()==1:
    #print(bra_size_rec(item_code_input,bra_size))
    
        
#elif (df[df['Item_Code']==item_code_input]['room shoes'].unique()==1)or(df[df['Item_Code']==item_code_input]['sneakers'].unique()==1):
    #print(shoes_size_rec(item_code_input,foot_size_re))    
    

In [14]:
#initial prefill input for recommendation system

def general_rec(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input):

    #data frame for recommendation system
    df_nfs_nb=df_nfs[(df_nfs['bra']!=1)&(df_nfs['Rating']>3)]
    df_nfs_nb.drop('bra',axis=1,inplace=True)
    df_nfs_nb.drop('Rating',axis=1,inplace=True)
    df_nfs_nb.reset_index(drop=True,inplace=True)
    
    def prefill_data(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input):
        dict_input={}
        for i in df_nfs_nb.columns:
            dict_input['{}'.format(i)]=[]
        dict_input['ID'].append('PredictID')
        dict_input['Item_Code'].append(item_code_input)
        dict_input['Gender'].append(Gender_input)
        dict_input['Age_Group'].append(Age_Group_input)
        dict_input['Height'].append(Height_input)
        dict_input['Weight'].append(Weight_input)
        dict_input['Product_Size'].append('M')
        for j in ['Sum_Code', 'Item_Name', 'Dept', 'Item_Sex', 'cropped',  'easy shorts', 'chinos', 'rayon', 'room shoes', 'long', 'suw',       't-shirts', 'skinny', 'shirts', 'uv cut', 'blouse', 'outer', 'chino', '2way stretch', 'ultra light', 'oversized', 'lounge', 'bottoms',       'relaco', 'polo', 'shorts', 'short', 'legging pants', 'joggers','dry-ex', 'regular', 'cardigan', 'jeans', 'sweat', 'easy pants','ankle pants', 'trousers', 'ultra stretch', 'relaxed', 'outer,uv cut',       'nylon', 'airism cotton', 'leggings', 'jersey', 'fashion','lounge pants', 'cotton', 'formal', 'inner', 'wide', 'unisex', 'slim',       'inner bottoms', 'lounge set', 'boxers', 'airism', 'bratop','high rise', 'sneakers']:
            dict_input['{}'.format(j)].append(df_nfs_nb[df_nfs_nb['Item_Code']==item_code_input]['{}'.format(j)].unique().tolist()[0])

        return dict_input
    
    df_nfs_nb=pd.concat([df_nfs_nb,pd.DataFrame(prefill_data(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input) )],axis=0)
    df_nfs_nb.reset_index(drop=True,inplace=True)

    # get dummies
    dummies_list=df_nfs_nb[['Item_Code','Gender', 'Age_Group', 'Height', 'Weight','Sum_Code','Item_Sex','Dept']]
    df_nfs_nb_dummies=pd.get_dummies(dummies_list, prefix='',prefix_sep='')
    df_nfs_nb=pd.concat([df_nfs_nb,df_nfs_nb_dummies],axis=1)
    df_nfs_nb.drop(['Item_Code','Gender', 'Age_Group', 'Height', 'Weight','Sum_Code','Item_Sex','Dept'],axis=1,inplace=True)
    df_nfs_nb.reset_index(drop=True,inplace=True)
    df_nfs_nb.set_index('ID',inplace=True)

    df_Model=df_nfs_nb.copy()
    df_Model.drop(['Product_Size','Item_Name'],axis=1,inplace=True)
    
    
    def size_predictor(distance_method, ID, N):
        # create dataframe used to store distances between recipes
        df_distance = pd.DataFrame(data=df_Model.index)

        # remove rows where index is equal to the inputted recipe_id
        df_distance = df_distance[df_Model.index != ID]

        # add a distance column that states the inputted recipe's distance with every other recipe
        df_distance['distance'] = df_distance['ID'].apply(lambda x: distance_method(df_Model.loc[ID],df_Model.loc[x]))

        # sort the allRecipes by distance and take N closes number of rows to put in the TopNRecommendation as the recommendations
        df_distance.sort_values(by='distance',inplace=True)

        # for each recipe in TopNRecommendation, input to defined lists

        # return dataframe with the inputted recipe and the recommended recipe's normalized nutritional values
        return(df_distance.head(N))

    size_rec_id=[i for i in size_predictor(euclidean, 'PredictID', 10)['ID']]

    size_rec=[]

    for i in size_rec_id:
        size_rec.append(df_nfs_nb.loc[[i]]['Product_Size'][0])
        
    def most_frequent(List):
        return max(set(List), key = List.count)


    return most_frequent(size_rec)

In [15]:
# Whole function
def size_recommender(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input):
    item_code_input=int(item_code_input)
    if df_nfs[df_nfs['Item_Code']==item_code_input]['bra'].unique()==1:
        return bra_size_rec(item_code_input,bra_size)
    elif (df[df['Item_Code']==item_code_input]['room shoes'].unique()==1)or(df[df['Item_Code']==item_code_input]['sneakers'].unique()==1):
        return shoes_size_rec(item_code_input,foot_size_re)   
    else:
        return general_rec(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input)
        
predicted_size=size_recommender(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input)    

print(predicted_size)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


M


In [16]:
# product recommender

def product_recommender(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input,predicted_size):
    item_code_input=int(item_code_input)
    df_nfs_rec=df_nfs[(df_nfs['Rating']>3)]
    df_nfs_rec.drop('Rating',axis=1,inplace=True)
    
    df_nfs_rec.reset_index(drop=True,inplace=True)
    
    
    def prefill_data(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input,predicted_size):
        dict_input={}
        for i in df_nfs_rec.columns:
            dict_input['{}'.format(i)]=[]
        dict_input['ID'].append('PredictID')
        dict_input['Item_Code'].append(item_code_input)
        dict_input['Gender'].append(Gender_input)
        dict_input['Age_Group'].append(Age_Group_input)
        dict_input['Height'].append(Height_input)
        dict_input['Weight'].append(Weight_input)
        dict_input['Product_Size'].append(predicted_size)
        for j in ['Sum_Code', 'Item_Name', 'Dept', 'Item_Sex',
       'easy shorts', 'leggings', 'jersey', 'trousers', 'suw', 'airism',
       'regular', 'outer,uv cut', 'slim', 'fashion', 'lounge pants',
       '2way stretch', 'long', 'cardigan', 'lounge set', 'sneakers', 'joggers',
       'lounge', 'chinos', 'relaxed', 'skinny', 'dry-ex', 'short', 'unisex',
       't-shirts', 'ultra stretch', 'high rise', 'bratop', 'cotton', 'nylon',
       'room shoes', 'blouse', 'bra', 'formal', 'inner bottoms', 'shorts',
       'relaco', 'easy pants', 'shirts', 'airism cotton', 'chino',
       'ultra light', 'outer', 'boxers', 'oversized', 'wide', 'cropped',
       'uv cut', 'ankle pants', 'sweat', 'polo', 'bottoms', 'rayon', 'jeans',
       'legging pants', 'inner']:
            dict_input['{}'.format(j)].append(df_nfs_rec[df_nfs_rec['Item_Code']==item_code_input]['{}'.format(j)].unique().tolist()[0])

        return dict_input
    
    df_nfs_rec=pd.concat([df_nfs_rec,pd.DataFrame(prefill_data(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input,predicted_size) )],axis=0)
    df_nfs_rec.reset_index(drop=True,inplace=True)
    
    # get dummies
    dummies_list=df_nfs_rec[['Gender', 'Age_Group', 'Height', 'Weight','Item_Sex','Dept']]
    df_nfs_rec_dummies=pd.get_dummies(dummies_list, prefix='',prefix_sep='')
    df_nfs_rec=pd.concat([df_nfs_rec,df_nfs_rec_dummies],axis=1)
    df_pre_row=df_nfs_rec[(df_nfs_rec['ID']=='PredictID')].copy()
    df_nfs_rec=df_nfs_rec[(df_nfs_rec['Item_Code']!=item_code_input)]
    df_nfs_rec=pd.concat([df_nfs_rec,df_pre_row],axis=0)
    
    
    df_nfs_rec.drop(['Item_Code','Gender', 'Age_Group', 'Height', 'Weight','Sum_Code','Item_Sex','Dept'],axis=1,inplace=True)
    
    df_nfs_rec.reset_index(drop=True,inplace=True)
    df_nfs_rec.set_index('ID',inplace=True)

    df_Model=df_nfs_rec.copy()
    df_Model.drop(['Product_Size','Item_Name'],axis=1,inplace=True)
    
    
    
    def product_predictor(distance_method, ID, N):
        # create dataframe used to store distances between recipes
        df_distance = pd.DataFrame(data=df_Model.index)

        # remove rows where index is equal to the inputted recipe_id
        df_distance = df_distance[df_Model.index != ID]

        # add a distance column that states the inputted recipe's distance with every other recipe
        df_distance['distance'] = df_distance['ID'].apply(lambda x: distance_method(df_Model.loc[ID],df_Model.loc[x]))

        # sort the allRecipes by distance and take N closes number of rows to put in the TopNRecommendation as the recommendations
        df_distance.sort_values(by='distance',inplace=True)

        # for each recipe in TopNRecommendation, input to defined lists

        # return dataframe with the inputted recipe and the recommended recipe's normalized nutritional values
        return(df_distance.head(N))

    product_rec_id=[i for i in product_predictor(euclidean, 'PredictID', 10)['ID']]
    
    product_rec=[]
    df_nfs.set_index('ID',inplace=True)
    for i in product_rec_id:
        product_rec.append(df_nfs.loc[[i]]['Item_Code'][0])
     
    #def most_frequent(List):
        #return max(set(List), key = List.count)

    #return most_frequent(product_rec)

    products = Counter(product_rec)
    rec2 = products.most_common(2)
    return rec2





In [17]:
predicted_product=product_recommender(item_code_input,Gender_input,Age_Group_input,Height_input,Weight_input,predicted_size)    

print('Recommendaed Product: ',predicted_product[0][0],'  Name:  ',df_nfs[df_nfs['Item_Code']==predicted_product[0][0]]['Item_Name'][0])
print('Recommendaed Product: ',predicted_product[1][0],'  Name:  ',df_nfs[df_nfs['Item_Code']==predicted_product[1][0]]['Item_Name'][0])


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Recommendaed Product:  434172   Name:   M's airism boxer briefs
Recommendaed Product:  434174   Name:   M's airism trunks


In [18]:

# save input
dict_input={'Item_Code':item_code_input,'Searched_Product_Name':df_nfs[df_nfs['Item_Code']==item_code_input]['Item_Name'].unique()[0],'Gender':Gender_input,'Age_Group':Age_Group_input,'Height':Height_input,'Weight':Weight_input,'Bra_Size':bra_size,'Foot_Size':foot_size_re,'Predicted_Size':predicted_size,'Rec_P1_Code':predicted_product[0][0],'Rec_P1_Name':df_nfs[df_nfs['Item_Code']==predicted_product[0][0]]['Item_Name'][0],'Rec_P2_Code':predicted_product[1][0],'Rec_P2_Name':df_nfs[df_nfs['Item_Code']==predicted_product[1][0]]['Item_Name'][0]}
df_input_new=pd.DataFrame(dict_input, index=[0])
#df_input_new.to_csv('Input_Data.csv',index=False)

try:
    df_input=pd.read_csv('Input_Data.csv')

    if sum(sum([k==v for (k,v) in zip(df_input.iloc[-1:,2:6].to_numpy(),df_input_new.iloc[-1:,2:6].to_numpy())]))==4:
        df_input_new['customer_id']=df_input['customer_id'].iloc[-1]
    else:
        df_input_new['customer_id']=df_input['customer_id'].iloc[-1]+1
    df_input=pd.concat([df_input,df_input_new],axis=0)
    df_input.reset_index(drop=True, inplace=True)
    df_input.to_csv('Input_Data.csv',index=False)

except:
    df_input_new['customer_id']=1
    df_input_new.to_csv('Input_Data.csv',index=False)

In [19]:
# previous search record

try:
    print(df_input[df_input['customer_id']==df_input_new['customer_id'].iloc[-1]].iloc[:,:2])
except:
    print('None')

    Item_Code           Searched_Product_Name
31     441829           M's sweat ankle pants
32     441829           M's sweat ankle pants
33     434176  M's supima cotton boxer briefs


In [20]:
df_img=pd.read_csv('Sample_img.csv')
df_img.drop_duplicates(subset=['item_code'],inplace=True)
df_img.reset_index(drop=True,inplace=True)
item_list=pd.read_excel('item list.xlsx')

In [21]:
his_list=df_input[df_input['customer_id']==df_input_new['customer_id'].iloc[-1]]['Item_Code']
his_list=his_list.tolist()
his_list=list(set(his_list))
his_list.remove(item_code_input)
his_list.reverse()
for i in range(0,len(set(his_list)),4):
    #if len(set(his_list))>=4:
    count=0
    for j in range(i,i+4):
        try:
            print(df_img[df_img['item_code']==his_list[j]]['img_1'].unique()[0])
            print((df_nfs[df_nfs['Item_Code']==his_list[j]]['Item_Name'][0]).upper())
            print('Item code: {}'.format(his_list[j]))  
            print('Product Size: {}'.format(his_list[j]))   
        except:
            print(' ')
        count+=1

https://image.uniqlo.com/UQ/ST3/AsianCommon/imagesgoods/438174/item/goods_68_438174.jpg
M'S SWEAT ANKLE PANTS
Item code: 441829
Product Size: 441829
 
 
 
