# Kurly Hackerton 2022 - 나만의 상점 만들기 
## Data simulation Code

### written by. DS 담당 이은표(Team Markurly)





In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime

# user data generation

In [2]:
# functions to use

def min_max_scaler(x, m= None, M= None) : 
    if m is None : 
        m = min(x)
    if M is None :
        M = max(x)
    return np.array((x-m)/(M-m))

In [3]:
n_users = 1000 # sample
seed_num = 25

In [4]:
# user informations
np.random.seed(seed=seed_num)

## user-related covariates(deterministic)
gender = np.random.choice(['m','f'], n_users, p =[0.3,0.7]) # gender ~ bin(0.5), 0: male, 1 : female
age = np.random.normal(35, 15, n_users*10); age = age[(age>=20)&(age<80)][:n_users] # age ~ truncated normal(35,15) on [20, 80]
# job, region ~ multinom(0.05, 0.2, 0.2, 0.4, 0.15) - 높을수록 상급지 | 좋은 직장
job = np.random.choice(['a','b','c','d','e'], n_users, p =[0.05, 0.2, 0.2, 0.4, 0.15])  
region = np.random.choice(['a','b','c'], n_users, p =[0.05, 0.35, 0.6])
device = np.random.choice(['a','i'], n_users, p =[0.5, 0.5]) # a - andriod, i - iphone

### minmax scaling value to make further variables
gender_2 = min_max_scaler(pd.Series(gender).map({'m':0, 'f':1}))
age_2 = min_max_scaler(age, m = 20, M = 80)
job_2 = min_max_scaler(pd.Series(job).map({'a':4, 'b':3,'c':2, 'd':1,'e':0}))
region_2 = min_max_scaler(pd.Series(region).map({'a':2, 'b':1,'c':0}))
device_2 = min_max_scaler(pd.Series(device).map({'a':0, 'i':1}))


##
p_marry = 0.8
married = np.random.choice([0,1], n_users, p =[1-p_marry,p_marry]) # 5명 중 1명은 평생 결혼 안 함.
married2 = [np.random.choice([0,1], 1, p =[1-p,p])[0] for p in stats.norm.cdf(age, loc=32, scale=8) ] # 결혼한다면, 평균 결혼 32세

is_married = married* married2
is_children = married*married2 * np.random.choice([0,1], n_users)

## user-related variables(stochastic) -- latent
np.random.seed(seed=seed_num)
consumption_propensity_eps = 0.01*np.random.normal(0, 1, n_users)
consumption_propensity = min_max_scaler(0.1 * gender_2  + 0.3 * -(age_2-0.5)**2 + 0.3*(job_2**3) + 0.2*(region_2**2) + 0.1*device_2 + consumption_propensity_eps)

consumption_propensity_2 = min_max_scaler(0.2 * gender_2 + 0.3 * age_2 - 0.5 * region_2 + 0.1*np.random.normal(0, 1, n_users))

is_diet = min_max_scaler(0.5*(gender_2 + np.random.normal(0,1,n_users)) - np.exp(age_2) +np.exp(1) + np.random.normal(0, 1, n_users))
is_drinker = min_max_scaler(-0.2*(gender_2 + np.random.normal(0,1,n_users)) + np.exp(age_2) + np.exp(1) + np.random.normal(0, 1, n_users))



In [5]:
df_user = pd.DataFrame({'gender':gender,'gender2':gender_2,
                   'age':age, 'age2' : age_2,
                   'job':job, 'job_2':job_2,
                   'region':region, 'region_2': region_2,
                   'device':device, 'device2' : device_2,
                   'consumption_propensity':consumption_propensity, 
                   'consumption_propensity2': consumption_propensity_2,
                   'is_maried' : is_married,
                   'is_children' : is_children,
                   'is_diet' : is_diet,
                   'is_drinker' : is_drinker})


In [6]:
df_user.head(10)

Unnamed: 0,gender,gender2,age,age2,job,job_2,region,region_2,device,device2,consumption_propensity,consumption_propensity2,is_maried,is_children,is_diet,is_drinker
0,f,1.0,55.775935,0.596266,e,0.0,b,0.5,i,1.0,0.445809,0.547771,1,1,0.461598,0.277868
1,f,1.0,55.453282,0.590888,d,0.25,c,0.0,i,1.0,0.395785,0.716831,0,0,0.399561,0.380728
2,m,0.0,26.752491,0.112542,e,0.0,c,0.0,i,1.0,0.171622,0.444485,1,0,0.22608,0.319357
3,m,0.0,45.333443,0.422224,d,0.25,c,0.0,a,0.0,0.104599,0.85739,1,0,0.423203,0.443409
4,f,1.0,55.419324,0.590322,d,0.25,c,0.0,i,1.0,0.369024,0.683136,1,1,0.282578,0.478363
5,m,0.0,29.282938,0.154716,b,0.75,c,0.0,a,0.0,0.228376,0.512737,1,0,0.550401,0.343757
6,f,1.0,46.967176,0.449453,e,0.0,c,0.0,i,1.0,0.369515,0.75457,1,1,0.307365,0.205448
7,f,1.0,40.538116,0.342302,d,0.25,c,0.0,a,0.0,0.264901,0.888811,0,0,0.469554,0.439791
8,f,1.0,43.101983,0.385033,c,0.5,c,0.0,a,0.0,0.261383,0.481647,1,1,0.244083,0.541059
9,f,1.0,41.2078,0.353463,d,0.25,c,0.0,a,0.0,0.253181,0.594984,1,0,0.484574,0.681584


# item data generation

In [7]:
df_item = pd.read_csv('/Users/pio/Google 드라이브/kurly_hackerton/item_input_data.csv', encoding='CP949')



In [8]:
df_item

Unnamed: 0,name,price,img_url,category
0,[탄단지] 소스 듬뿍 스테이크 3종,8900,https://product-image.kurly.com/product/image/...,샐러드·닭가슴살
1,[스윗밸런스] 시그니처 샐러드 4종,6500,https://product-image.kurly.com/product/image/...,샐러드·닭가슴살
2,[Better me] 냉동 닭가슴살 4종 (3개입),6990,https://product-image.kurly.com/product/image/...,샐러드·닭가슴살
3,[굽네] 닭가슴살 야채볶음밥(4개입),11600,https://product-image.kurly.com/product/image/...,샐러드·닭가슴살
4,[홍루이젠] 에그마요 샌드위치 2종,3000,https://img-cf.kurly.com/shop/data/goods/16500...,샐러드·닭가슴살
...,...,...,...,...
137,[기린] 오후의 홍차 밀크티 1.5L,5980,https://img-cf.kurly.com/cdn-cgi/image/width=6...,차
138,[소다미] 하동 매실원액 2종 (500ml/1000ml),7900,https://product-image.kurly.com/cdn-cgi/image/...,차
139,[할리스] 복숭아 얼그레이 파우치 (10개입),15900,https://product-image.kurly.com/cdn-cgi/image/...,차
140,[문경몰] 프리미엄 오미자 자일로스청 원액 1L,19000,https://img-cf.kurly.com/cdn-cgi/image/width=6...,차


## Rating generation

In [9]:

#price1_m
#price2_m

latent_mat1 = []
for i in range(df_item.shape[0]) : 
    cat_temp = df_item.loc[i,'category']

#    price_1_temp = df_item.loc[i,'price'] - price1_m
#    price_2_temp = 


    # R1
    if cat_temp == '과실주·리큐르' : 
        latent_mean = [1,0,1,0,0,0,0,0,1]
    elif cat_temp == '도시락·밥류'  : 
        latent_mean = [0,0,0,0,0,1,1,0,0]
    elif cat_temp == '떡볶이·튀김·순대' : 
        latent_mean = [1,0,0,0,0,1,1,0,0]
    elif cat_temp == '레드와인' : 
        latent_mean = [0.5,0,1,0,0,0,0,0,1]
    elif cat_temp == '막걸리·탁주' : 
        latent_mean = [0,1,0,0,0,0,0,0,1]
    elif cat_temp == '샐러드·닭가슴살' : 
        latent_mean = [0.5,0,0,0,0,0,0,1,0]
    elif cat_temp == '생수·탄산수' : 
        latent_mean = [0,0,1,0,0,0,0,1,0]
    elif cat_temp == '샴페인' : 
        latent_mean = [0.5,0,1,0,0,0,0,0,1]
    elif cat_temp == '선식·시리얼' : 
        latent_mean = [0.25,0.5,0,0,0,1,1,1,0]
    elif cat_temp == '우유·두유·요거트' : 
        latent_mean = [0,0,0,0,0,1,1,0,0]
    elif cat_temp == '음료·주스' : 
        latent_mean = [0,0,0,0,0,1,1,0,0]
    elif cat_temp == '죽·스프·카레' : 
        latent_mean = [0,0,0,0,0,0,0,0,0]
    elif cat_temp == '증류주·약주·청주' : 
        latent_mean = [-1,0,0,0,0,0,0,0,0]
    elif cat_temp == '차' : 
        latent_mean = [0,0,0,0,0,1,1,1,0]
    elif cat_temp == '커피' : 
        latent_mean = [0,1,0,0,0,1,1,0,0]
    elif cat_temp == '파스타·면류' :
        latent_mean = [1,0,0,1,0,0,0,0,1]
    elif cat_temp == '폭립·떡갈비·안주' : 
        latent_mean = [0,0,0,1,0,0,0,-1,1]
    elif cat_temp == '피자·핫도그·만두' : 
        latent_mean = [0,0,0,1,0,0,1,-1,1]
    else :  # cat_temp == '화이트와인'
        latent_mean = [1,0,1,0,0,0,0,0,1]
            
    latent_vec1 = [np.random.normal(loc= m, scale = 0.5, size=1)[0] for m in latent_mean]        
            
    latent_mat1.append(latent_vec1)
    # R2

#    price1   price2

In [10]:
m=0
np.random.normal(loc= m, scale = 0.5,  size=1)[0]

-1.4858426409864491

## latent vector for item

In [11]:
latent_item = pd.DataFrame(latent_mat1,  # y
                           columns = ['for_women','for_older','for_job2','for_region','for_device','for_married','for_children','for_diet','for_drinker'])

# temp df manipulation for consumption2 latent variable
_ = df_item.merge(df_item.groupby('category').mean('price'), how='left', on='category')
# 

latent_item['consumption1'] = min_max_scaler(_.price_x)
latent_item['consumption2'] = min_max_scaler((_['price_x'] - _['price_y'])/_['price_y'])

In [12]:
latent_item

Unnamed: 0,for_women,for_older,for_job2,for_region,for_device,for_married,for_children,for_diet,for_drinker,consumption1,consumption2
0,-0.595090,0.170021,-0.621739,0.544024,0.633539,-0.741115,0.360673,1.267386,0.369693,0.019429,0.214213
1,0.764507,0.143550,-0.228264,-0.233603,0.755340,0.190393,-0.578905,2.012538,-1.161689,0.012997,0.144427
2,1.088341,0.285551,-0.793594,-0.519338,0.103626,-0.095409,0.300996,0.684484,-0.577082,0.014311,0.158675
3,0.182644,-0.280558,-0.910357,-0.087847,-0.284918,0.289763,0.533088,0.710049,0.438786,0.026665,0.292721
4,0.585087,-0.241180,-0.579748,-0.098253,-0.357401,-0.186563,-0.024657,0.954264,-1.039549,0.003618,0.042657
...,...,...,...,...,...,...,...,...,...,...,...
137,-0.307864,-0.281988,-0.067196,-0.288906,-0.747314,1.146092,0.683587,0.266063,-0.050354,0.011604,0.001696
138,0.242312,0.122084,-0.062527,0.288610,-0.194908,0.981495,1.311624,1.321837,-0.117021,0.016749,0.016552
139,0.275993,-1.451121,-0.475763,0.247796,-1.127772,0.732655,0.539521,0.688931,0.012187,0.038188,0.078453
140,0.073979,0.626625,0.284336,0.680679,-0.006431,1.146054,1.199017,1.181443,0.291696,0.046496,0.102439


## latent vector for user

In [13]:
latent_user = df_user[['gender2','age2','job_2','region_2','device2','is_maried','is_children','is_diet','is_drinker','consumption_propensity','consumption_propensity2']]
latent_user.head()

Unnamed: 0,gender2,age2,job_2,region_2,device2,is_maried,is_children,is_diet,is_drinker,consumption_propensity,consumption_propensity2
0,1.0,0.596266,0.0,0.5,1.0,1,1,0.461598,0.277868,0.445809,0.547771
1,1.0,0.590888,0.25,0.0,1.0,0,0,0.399561,0.380728,0.395785,0.716831
2,0.0,0.112542,0.0,0.0,1.0,1,0,0.22608,0.319357,0.171622,0.444485
3,0.0,0.422224,0.25,0.0,0.0,1,0,0.423203,0.443409,0.104599,0.85739
4,1.0,0.590322,0.25,0.0,1.0,1,1,0.282578,0.478363,0.369024,0.683136


# Gen R

### R represents Rating(or preference) of user on items. 
### R will influence User-Item interactions

### R = x_latent @ y_latent

In [14]:
latent_user.shape

(1000, 11)

In [15]:
latent_item.shape

(142, 11)

In [16]:
R = np.array(latent_user) @ np.array(latent_item).T
R.shape

(1000, 142)

In [17]:
pd.DataFrame(R)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,132,133,134,135,136,137,138,139,140,141
0,0.845146,1.791222,1.557050,1.131279,0.000140,1.744838,1.600112,1.331829,1.177069,0.608976,...,3.414756,2.890516,3.694936,4.296284,2.547511,0.576834,3.151797,0.060434,3.829840,2.680149
1,0.791872,2.018122,1.335486,0.175510,-0.042251,1.917606,0.747762,0.459879,0.187481,-1.176615,...,0.501041,0.579963,1.128092,1.541533,-0.084450,-1.145654,0.606005,-1.476909,1.183847,0.586538
2,0.414702,1.112316,0.083791,0.408615,-0.667772,1.499757,-0.465586,-0.337899,0.621486,0.077588,...,1.258663,2.048984,1.881141,2.092532,1.536743,0.413859,1.072028,-0.357358,1.623913,1.149308
3,0.061220,0.655736,-0.001904,0.692537,-0.453477,0.495887,0.412507,0.607560,0.332645,0.426879,...,1.700264,2.346829,2.435096,2.224899,2.440664,1.103170,1.540871,0.369236,2.203740,1.010661
4,0.291429,1.275460,1.398766,0.947720,-0.467997,1.714509,1.516624,0.845557,0.677735,0.605722,...,3.388205,2.595685,3.437509,3.661725,2.137269,0.647776,2.731992,-0.286981,3.414139,2.118351
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.208464,0.666093,0.919051,0.663785,-0.101919,0.732480,1.123902,0.863959,-0.534580,-0.779125,...,0.243458,0.493642,1.106118,0.681123,0.252524,-0.381281,0.650219,-0.104903,1.114114,0.081268
996,1.122649,2.553476,1.528492,0.347290,0.205210,2.134103,0.928784,0.686574,0.552400,-1.158402,...,0.656550,0.841069,1.519386,1.899205,0.216291,-1.086590,0.951595,-1.349571,1.507426,0.866469
997,1.347165,0.399119,-0.396212,0.445548,-1.192485,0.766447,1.465859,0.397008,-0.133571,0.212784,...,3.704158,3.236565,2.954321,4.331874,2.849275,0.695079,2.913675,-0.312706,4.181578,2.867018
998,0.255961,0.960736,0.911414,1.382091,0.048061,0.339926,2.348332,1.696711,0.387526,0.856915,...,3.087913,3.237781,3.788608,3.666138,3.480574,1.449686,3.545527,1.695651,4.103858,2.522632


---

# Gen interaction

In [18]:
# Gen p_everyweek
eps = 0.1; down_ratio = 0.5; n_weeks = 52
p_everyweek = (eps + min_max_scaler(R.sum(axis=1)) * 0.4 + np.random.uniform(0, 0.5, R.shape[0]))*down_ratio

pd.DataFrame(p_everyweek).describe()

Unnamed: 0,0
count,1000.0
mean,0.26865
std,0.090564
min,0.067072
25%,0.198824
50%,0.266684
75%,0.33158
max,0.477102


In [19]:
# Gen week of the day
purchase_yes = []
for p_u in p_everyweek : 
    purchase_yes.append(np.random.choice([0,1], p= [1-p_u, p_u], size=n_weeks))
    
purchase_yes = pd.DataFrame(purchase_yes)

purchase_yes.columns = list('shop_'+str(s) for s in purchase_yes.columns)
purchase_yes = purchase_yes.reset_index()

interaction = pd.wide_to_long(purchase_yes, stubnames='shop_', i='index', j='week').reset_index()

interaction.columns =['user_id', 'week', 'is_shopping']
interaction = interaction[interaction['is_shopping']==1 ]

interaction['year'] = 2020 # gen year
interaction['day'] = np.random.choice(list(range(7)), size = interaction.shape[0]) # gen hour
interaction['hour'] = np.random.choice(list(range(24)), size = interaction.shape[0]) # gen hour
interaction['minuate'] = np.random.choice(list(range(60)), size = interaction.shape[0]) # gen minuate

interaction = interaction[['user_id','year','week','day','hour','minuate']]

In [20]:
# Gen week of the day
purchase_yes = []
for p_u in p_everyweek : 
    purchase_yes.append(np.random.choice([0,1], p= [1-p_u, p_u], size=n_weeks))
    
purchase_yes = pd.DataFrame(purchase_yes)

purchase_yes.columns = list('shop_'+str(s) for s in purchase_yes.columns)
purchase_yes = purchase_yes.reset_index()

interaction2 = pd.wide_to_long(purchase_yes, stubnames='shop_', i='index', j='week').reset_index()

interaction2.columns =['user_id', 'week', 'is_shopping']
interaction2 = interaction2[interaction2['is_shopping']==1 ]

interaction2['year'] = 2021 # gen year
interaction2['day'] = np.random.choice(list(range(7)), size = interaction2.shape[0]) # gen hour
interaction2['hour'] = np.random.choice(list(range(24)), size = interaction2.shape[0]) # gen hour
interaction2['minuate'] = np.random.choice(list(range(60)), size = interaction2.shape[0]) # gen minuate

interaction2 = interaction2[['user_id','year','week','day','hour','minuate']]

In [21]:
interaction_all = pd.concat([interaction, interaction2], axis=0).reset_index(drop=True)

# 시간 timestamp 달기

In [22]:
dt_l = []
for i in range(interaction_all.shape[0]) : 
    Y = interaction_all.loc[i,'year']
    W = interaction_all.loc[i,'week']
    D = interaction_all.loc[i,'day']
    H = interaction_all.loc[i,'hour']
    M = interaction_all.loc[i,'minuate']
    
    date = "{}/{}/{} {}:{}".format(Y, W, D, H, M)
    dt = datetime.strptime(date, "%Y/%W/%w %H:%M")
    dt_l.append(dt)



In [23]:
interaction_all['datetime'] = dt_l

In [24]:
interaction_all = interaction_all.reset_index()


In [25]:
interaction_all

Unnamed: 0,index,user_id,year,week,day,hour,minuate,datetime
0,0,6,2020,0,2,17,51,2019-12-31 17:51:00
1,1,7,2020,0,6,12,57,2020-01-04 12:57:00
2,2,8,2020,0,4,15,57,2020-01-02 15:57:00
3,3,9,2020,0,4,14,12,2020-01-02 14:12:00
4,4,13,2020,0,3,7,16,2020-01-01 07:16:00
...,...,...,...,...,...,...,...,...
27979,27979,994,2021,51,6,6,10,2021-12-25 06:10:00
27980,27980,995,2021,51,3,22,53,2021-12-22 22:53:00
27981,27981,996,2021,51,1,22,4,2021-12-20 22:04:00
27982,27982,997,2021,51,5,3,7,2021-12-24 03:07:00


# Random draw (interaction item)

In [26]:
def random_draw(R, interaction) :
    interaction_item_df_l = [ ]
#    interaction_idx = interaction['index']
    for interaction_idx in range(interaction.shape[0]) : 
        user_temp =interaction.loc[interaction_idx, 'user_id']
        R_temp = R[user_temp]
        
        # N_draw from N_bag
        N_bag = 20 ; N_draw = 1+int(np.sqrt(np.random.chisquare(df = 5, size=1))[0])
        
        top_N_item_idx = (-R_temp).argsort()[:N_bag]
        top_N_item_score = R_temp[top_N_item_idx]
        
        top_N_p = min_max_scaler(top_N_item_score) / sum(min_max_scaler(top_N_item_score))
        
        # draw
        draw = np.random.choice(top_N_item_idx, p = top_N_p, size = N_draw)
        
        # to df
        temp_product_quantity = pd.Series(draw).value_counts().reset_index()
        temp_product_quantity.columns = ['item_id','quantity']
        
        temp_product_quantity['interaction_id'] = interaction_idx
        interaction_item_df_l.append(temp_product_quantity)
    
    return pd.concat(interaction_item_df_l , axis=0)

In [27]:
interaction_item = random_draw(R, interaction_all)

---

# 정리 to send up to db

### customer table

In [28]:
df_user_out = df_user[['gender', 'age', 'region', 'device']].reset_index()
# df_user_out.columns = ['user_id','name','gender', 'age', "job", 'region', 'device']


# add  name column
df_user_out['name'] = 'ddddd'


# add birthdate column
birth_year = (2020 - df_user_out['age']).astype('int')
dt_l= []
for b in birth_year : 
    dt = datetime.strptime(str(b), '%Y')
    dt_l.append(dt)

df_user_out['birth_date'] = dt_l


# sign-in date
sign_in_date = datetime.strptime(str(2018),'%Y')
df_user_out['sign_in_date'] = sign_in_date


# add detailed address
df_user_out['detailed_adress'] = 'details'


In [29]:
df_user_out.columns = ['id', 'gender', 'age', 'main_address', 'device', 'name', 'birth_date','sign_in_date', 'detailed_address']

In [30]:
table_customer = df_user_out[['id','name','gender', 'birth_date','device','main_address','detailed_address', 'sign_in_date']]

In [31]:
table_customer

Unnamed: 0,id,name,gender,birth_date,device,main_address,detailed_address,sign_in_date
0,0,ddddd,f,1964-01-01,i,b,details,2018-01-01
1,1,ddddd,f,1964-01-01,i,c,details,2018-01-01
2,2,ddddd,m,1993-01-01,i,c,details,2018-01-01
3,3,ddddd,m,1974-01-01,a,c,details,2018-01-01
4,4,ddddd,f,1964-01-01,i,c,details,2018-01-01
...,...,...,...,...,...,...,...,...
995,995,ddddd,f,1976-01-01,a,c,details,2018-01-01
996,996,ddddd,f,1962-01-01,i,c,details,2018-01-01
997,997,ddddd,m,1967-01-01,i,a,details,2018-01-01
998,998,ddddd,f,1988-01-01,a,b,details,2018-01-01


### item table

In [33]:
df_item_out = df_item.reset_index()

In [34]:
df_item_out.columns = ['id','name','price','img_url','category']
table_item = df_item_out[['id','name','category','price','img_url']]

In [35]:
table_item

Unnamed: 0,id,name,category,price,img_url
0,0,[탄단지] 소스 듬뿍 스테이크 3종,샐러드·닭가슴살,8900,https://product-image.kurly.com/product/image/...
1,1,[스윗밸런스] 시그니처 샐러드 4종,샐러드·닭가슴살,6500,https://product-image.kurly.com/product/image/...
2,2,[Better me] 냉동 닭가슴살 4종 (3개입),샐러드·닭가슴살,6990,https://product-image.kurly.com/product/image/...
3,3,[굽네] 닭가슴살 야채볶음밥(4개입),샐러드·닭가슴살,11600,https://product-image.kurly.com/product/image/...
4,4,[홍루이젠] 에그마요 샌드위치 2종,샐러드·닭가슴살,3000,https://img-cf.kurly.com/shop/data/goods/16500...
...,...,...,...,...,...
137,137,[기린] 오후의 홍차 밀크티 1.5L,차,5980,https://img-cf.kurly.com/cdn-cgi/image/width=6...
138,138,[소다미] 하동 매실원액 2종 (500ml/1000ml),차,7900,https://product-image.kurly.com/cdn-cgi/image/...
139,139,[할리스] 복숭아 얼그레이 파우치 (10개입),차,15900,https://product-image.kurly.com/cdn-cgi/image/...
140,140,[문경몰] 프리미엄 오미자 자일로스청 원액 1L,차,19000,https://img-cf.kurly.com/cdn-cgi/image/width=6...


### interaction table

In [36]:
interaction_out = interaction_all[['index','user_id','datetime']]

In [37]:
interaction_out.columns = ['id','customer_id','order_date']


In [38]:
table_interaction = interaction_out

### interaction item table

In [39]:
interaction_item_out = interaction_item.reset_index()
interaction_item_out.columns = ['id','item_id','quantity', 'interaction_id']

table_interaction_item = interaction_item_out[['id','item_id','interaction_id','quantity']]

---


# Sending out


In [45]:
# table_customer.head()
table_customer.id = table_customer.id+1

In [49]:
table_item.id = table_item.id + 1

In [52]:
table_interaction.id = table_interaction.id +1 
table_interaction.customer_id = table_interaction.customer_id +1 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [55]:
table_interaction_item.id = table_interaction_item.id+1
table_interaction_item.item_id = table_interaction_item.item_id +1
table_interaction_item.interaction_id = table_interaction_item.interaction_id +1

In [58]:
table_interaction

Unnamed: 0,id,customer_id,order_date
0,1,7,2019-12-31 17:51:00
1,2,8,2020-01-04 12:57:00
2,3,9,2020-01-02 15:57:00
3,4,10,2020-01-02 14:12:00
4,5,14,2020-01-01 07:16:00
...,...,...,...
27979,27980,995,2021-12-25 06:10:00
27980,27981,996,2021-12-22 22:53:00
27981,27982,997,2021-12-20 22:04:00
27982,27983,998,2021-12-24 03:07:00


In [61]:
table_interaction_item

Unnamed: 0,id,item_id,interaction_id,quantity
0,1,65,1,1
1,2,135,1,1
2,3,124,1,1
3,1,97,2,1
4,2,74,2,1
...,...,...,...,...
68076,2,141,27983,1
68077,1,34,27984,1
68078,2,67,27984,1
68079,3,20,27984,1


In [64]:
table_interaction_item = table_interaction_item.reset_index()
# table_interaction_item.columns = ['id', ]

In [65]:
table_interaction_item

Unnamed: 0,index,id,item_id,interaction_id,quantity
0,0,1,65,1,1
1,1,2,135,1,1
2,2,3,124,1,1
3,3,1,97,2,1
4,4,2,74,2,1
...,...,...,...,...,...
68076,68076,2,141,27983,1
68077,68077,1,34,27984,1
68078,68078,2,67,27984,1
68079,68079,3,20,27984,1


In [68]:
table_interaction_item = table_interaction_item[['index','item_id','interaction_id','quantity']]
table_interaction_item.columns = ['id','item_id','interaction_id','quantity']

In [70]:
table_interaction_item.id = table_interaction_item.id + 1

In [71]:
table_customer.to_csv('/Users/pio/Google 드라이브/kurly_hackerton/simulated_data/' + 'table_customer.csv', index=False, encoding='utf-8')
table_item.to_csv('/Users/pio/Google 드라이브/kurly_hackerton/simulated_data/' + 'table_item.csv', index=False, encoding='utf-8')
table_interaction.to_csv('/Users/pio/Google 드라이브/kurly_hackerton/simulated_data/' + 'table_interaction.csv', index=False, encoding='utf-8')
table_interaction_item.to_csv('/Users/pio/Google 드라이브/kurly_hackerton/simulated_data/' + 'table_interaction_item.csv', index=False, encoding='utf-8')