### Import Libraries

In [1]:
import pandas as pd
import operator

### Load Dataset

In [2]:
df = pd.read_csv('order_brush_order.csv')
df.head()

Unnamed: 0,orderid,shopid,userid,event_time
0,31076582227611,93950878,30530270,2019-12-27 00:23:03
1,31118059853484,156423439,46057927,2019-12-27 11:54:20
2,31123355095755,173699291,67341739,2019-12-27 13:22:35
3,31122059872723,63674025,149380322,2019-12-27 13:01:00
4,31117075665123,127249066,149493217,2019-12-27 11:37:55


In [3]:
# Convert event_time to datetime
df['event_time'] = pd.to_datetime(df['event_time'])
# Convert userid to string to avoid userid.0 
df['userid'] = df['userid'].apply(str)

## Order Brushing

In [4]:
def get_subset_shop(df, shopid):
    '''
    input: df, shopid
    output: subset of df for shopid provided
    '''
    
    df_shop = df[df.shopid == shopid]
    df_shop = df_shop.sort_values(['event_time', 'userid'])
    
    return df_shop.reset_index(drop=True)

In [5]:
def get_hour_marker(time_list):
    '''
    input: column for timestamp (sorted, ascending)
    output: list, row number where delta time is or before 1hr mark
    '''
    
    time_list = list(time_list)
    num_rows = len(time_list)
    index_marker = []
    
    for i in range(num_rows):
        start_time = time_list[i]
        
        for j in range(i, num_rows):
            end_time = time_list[j]
            delta_time = end_time - start_time


            if delta_time == pd.to_timedelta(1, unit='h'):
                index_marker.append(j)
                break
                
            elif delta_time > pd.to_timedelta(1, unit='h'):
                index_marker.append(j-1)
                break
                
            elif (delta_time < pd.to_timedelta(1, unit='h')) and (j == num_rows-1):
                index_marker.append(j)
            
    return index_marker

In [6]:
def get_brush_period(df, start, marker):
    '''
    input: df, start, end
    output: subset of df subset shopid for a period
    '''
    return df.loc[start:marker]

In [7]:
def get_suspicious_users(df_order_brushing):
    '''
    input: df with all the df with order brushing for specific shopid
    output: suspicious user/s
    '''
    user_list = list(set(df_order_brushing['userid']))
    user_prop = {}
    total_orders = df_order_brushing.shape[0]
    
    for user in user_list:
        user_count = df_order_brushing[df_order_brushing['userid'] == user].shape[0]
        user_prop[user] = user_count/total_orders
    
    max_value = max(user_prop.values())
    susp_user_list = []
    
    for user in user_list:
        if user_prop[user] == max_value:
            susp_user_list.append(user)

    return '&'.join(str(user) for user in susp_user_list)

In [8]:
# Get distinct shopid
shop_list = list(set(df['shopid']))
final_table = pd.DataFrame(shop_list)
final_table.columns = ['shopid']

In [9]:
# Master Loop
shop_status = []
suspicious_users = []

for shop in shop_list:
    df_shop = get_subset_shop(df, shop)
    index_marker = get_hour_marker(df_shop['event_time'])

    cr_list = []
    df_order_brushing = pd.DataFrame({'orderid': [], 'shopid': [], 'userid': [], 'event_time': []})
    
    for i in range(len(index_marker)):
        df_period = get_brush_period(df_shop, i, index_marker[i])
        concentration_rate = df_period.shape[0]/df_period['userid'].nunique()
        
        if concentration_rate >= 3:
            cr_list.append('OB')
            df_order_brushing = df_order_brushing.append(df_period)
            
    if 'OB' in cr_list:
        shop_status.append('OB')
        users = get_suspicious_users(df_order_brushing)
        suspicious_users.append(users)
        
    else:
        shop_status.append('OK')
        suspicious_users.append(0)

In [10]:
# Add userid column:
final_table['userid'] = suspicious_users

In [12]:
final_table.to_csv('submission.csv')

-----------------------