# Import user data and item data to DynamoDB

### Generate user history data

In [10]:
import csv
import json 

user_file = r'../retail_data/users.csv'
csv_user_file=open(user_file,mode='r',encoding='utf8')
user_reader = [each for each in csv.DictReader(csv_user_file, delimiter=',')]

item_file = r'../retail_data/items.csv'
csv_item_file=open(item_file,mode='r',encoding='utf8')
item_reader = [each for each in csv.DictReader(csv_item_file, delimiter=',')]

interactions_file = r'../retail_data/interactions.csv'
csv_interaction_file=open(interactions_file,mode='r',encoding='utf8')
interaction_reader = [each for each in csv.DictReader(csv_interaction_file, delimiter=',')]
interaction_reader.reverse()


print('begin process user')
i = 0
user_dict = {}
if len(user_reader)>0:
    for user in user_reader:
        user_id = user['USER_ID']
        age = user['AGE']
        gender = user['GENDER']
        if gender == 'M':
            gender_trans = 'male'
        elif gender == 'F':
            gender_trans = 'female'
        user_dict[user_id] = [{'age':age,'gender':gender_trans}]


print('begin process item')
i = 0
item_dict = {}
if len(item_reader)>0:
    for item in item_reader:
        item_id = item['ITEM_ID']
        price = item['PRICE']
        category_1 = item['CATEGORY_L1']
        category_2 = item['CATEGORY_L2']
        product_description = item['PRODUCT_DESCRIPTION']
        gender = item['GENDER']
        if gender == 'M':
            gender_trans = 'male'
        elif gender == 'F':
            gender_trans = 'female'
        item_dict[item_id] = {'price':price,'category_1':category_1,'category_2':category_2,'product_description':product_description,'gender':gender_trans}
        

print('begin process interaction')
i = 0
user_history = {}
if len(interaction_reader)>0:
    for interaction in interaction_reader:
        user_id = interaction['USER_ID']
        item_id = interaction['ITEM_ID']
        event_type = interaction['EVENT_TYPE']
        timestamp = interaction['TIMESTAMP']
        if user_id in user_history.keys():
            user_interaction = user_history[user_id]
        else:
            user_interaction = []
        if len(user_interaction) >= 11:
            continue
        item_info = item_dict[item_id]
        if item_info is not None:
            find_flag = False
            for item in user_interaction:
                if 'item_id' in item.keys():
                    if item['item_id'] == item_id:
                        find_flag = True
                        break
            if not find_flag:
                item_info['item_id'] = item_id
                item_info['event_type'] = event_type
                user_interaction.append(item_info)
                user_history[user_id] = user_interaction
        
print('begin to write user history')
write_file = open(r'./user_history.csv', 'w',encoding='utf8')
writer = csv.writer(write_file)
writer.writerow(['user_id','user_history'])
for key in user_history.keys():
    writer.writerow([key, json.dumps(user_history[key])])
    write_file.flush()
print('finish write user history')

        

begin process user
begin process item
begin process interaction
begin to write user info
finish write user info


### Define session function

In [None]:
import boto3
import json

def get_session_info(table_name, index, table_type):

    dynamodb = boto3.resource('dynamodb')
    table = dynamodb.Table(table_name)
    
    session_result = ""
    if table_type=='user':
        key='user_id'
    elif table_type=='item':
        key='item_id'
    response = table.get_item(Key={key: index})
    if "Item" in response.keys():
        session_result = json.loads(response["Item"]["content"])
    else:
        session_result = ""

    return session_result


def update_session_info(table_name, index, index_info, table_type):

    dynamodb = boto3.resource('dynamodb')
    table = dynamodb.Table(table_name)

    content = json.dumps(index_info)
    
    if table_type=='user':
        key='user_id'
    elif table_type=='item':
        key='item_id'
    
    print('table_type:',table_type)
    print('key:',key)

    response = table.put_item(
        Item={
            key: index,
            'content': content
        }
    )

    if "ResponseMetadata" in response.keys():
        if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
            update_result = "success"
        else:
            update_result = "failed"
    else:
        update_result = "failed"

    return update_result


### Store user data in DynamoBD

find the user table name in DynamoBD, and change the table name as follow 

In [None]:
import csv
import json 

table_name = 'PersonalizeStack-retailusertable9836B5C5-HKAHZW7IOMWF'

user_file = r'./users.csv'
csv_user_file=open(user_file,mode='r',encoding='utf8')
user_reader = [each for each in csv.DictReader(csv_user_file, delimiter=',')]

user_file = r'./user_history.csv'
csv_user_history_file=open(user_file,mode='r',encoding='utf8')
user_history_reader = [each for each in csv.DictReader(csv_user_history_file, delimiter=',')]

i = 0
if len(user_reader)>0:
    for user in user_reader:
        i += 1
        print(i)
        user_id = user['USER_ID']
        dynamodb = boto3.resource('dynamodb')
        table = dynamodb.Table(table_name)
        response = table.get_item(Key={'user_id': user_id})
        if "Item" in response.keys():
            continue
        
        age = user['AGE']
        gender = user['GENDER']
        if gender == 'M':
            gender_trans = 'male'
        elif gender == 'F':
            gender_trans = 'female'
        user_base = {'age':age,'gender':gender_trans}
        user_history_list = []
        for user_history in user_history_reader:        
            user_history_id = user_history['user_id']
            if user_id == user_history_id:
                user_history_list = json.loads(user_history['user_history'])
                break
        try:
            user_info = {'user_base':user_base,'user_history':user_history}
            update_session_info(table_name,user_id,user_info,"user")
        except:
            print('update session info error')
            

### Store item data in DynamoBD

find the item table name in DynamoBD, and change the table name as follow 

In [None]:
import csv
import json 

item_table_name='PersonalizeStack-retailitemtable624AB3CD-DBBU6LWB5U3A'

item_file = r'./items.csv'
csv_item_file=open(item_file,mode='r',encoding='utf8')
item_reader = [each for each in csv.DictReader(csv_item_file, delimiter=',')]

i = 0
if len(item_reader)>0:
    for item in item_reader:
        i += 1
        print(i)
        item_id = item['ITEM_ID']
        
        dynamodb = boto3.resource('dynamodb')
        table = dynamodb.Table(item_table_name)
        response = table.get_item(Key={'item_id': item_id})
        if "Item" in response.keys():
            continue
        
        price = item['PRICE']
        category_1 = item['CATEGORY_L1']
        category_2 = item['CATEGORY_L2']
        product_description = item['PRODUCT_DESCRIPTION']
        gender = item['GENDER']
        if gender == 'M':
            gender_trans = 'male'
        elif gender == 'F':
            gender_trans = 'female'
        item_info = {'price':price,'category_1':category_1,'category_2':category_2,'product_description':product_description,'gender':gender_trans}
        try:
            update_session_info(item_table_name,item_id,item_info,"item")
        except:
            print('update session info error')
