## Implementing Modules here on Notebook

In [1]:
import os
os.chdir('..')

In [2]:
!pwd

/Users/temporaryadmin/dh_projects/optimal-location-finder


In [3]:
%reload_ext autoreload



from helper.metrics_kpi import *

from connector.gcp_conn import *

import warnings
warnings.filterwarnings('ignore')

In [4]:
def get_input_parameters(polygon_file, warehouse_polygon_df, json_):
    driving_modes = get_drive_modes(json_)
    # driving_mode_maps = label_drive_modes(driving_modes)
    input_parameters = {
        'Number_of_stores': warehouse_polygon_df[['latitude', 'longitude']].drop_duplicates().shape[0],
        'Driving_time': int(polygon_file.split('.')[0].split('-')[-1]),
        'Driving_mode': list(driving_modes)[0], # chose 0 for cycle and 1 for driving
        'City_ID': polygon_file.split('.')[0].split('-')[-5]
    }

    return input_parameters

def save_kpis(input_params, business_kpis):
    bucket_name = "qc-store-growth-expansion-stg"
    domain = 'DS_exploration'
    project_name = 'optimal_location_finder'
    folder = 'metrics'
    region = 'FP_SG'
    path_to_save = f'gs://{bucket_name}/{domain}/{project_name}/{folder}/{region}'
    runtime = get_current_time()
    region, no_of_clusters, drive_time_selected, drive_mode_selected = input_params['City_ID'], \
                                                                       input_params['Number_of_stores'], \
                                                                       input_params['Driving_time'], \
                                                                       input_params['Driving_mode']

    filename = f'{region}_{no_of_clusters}_{drive_mode_selected}_1-{drive_time_selected}_{runtime}_{folder}.csv'
    try:
        business_kpis.to_csv(f'{path_to_save}/{filename}')
        return f'{path_to_save}/{filename}'
    except Exception as ex:
        return str(ex)


def get_order_data():
    # query = get_order_query(city_id, start_date, end_date, round_off)
    # queried_result = bigquery_conn.execute_query(query)
    bucket_name = "qc-store-growth-expansion-stg"
    domain = 'DS_exploration'
    project_name = 'optimal_location_finder'
    folder = 'evaluation'
    file_name = 'cluster_data_created_only_restaurant_11_11_2021.csv'
    region = 'FP_SG'

    path_to_file = f'gs://{bucket_name}/{domain}/{project_name}/{folder}/{region}/{file_name}'
    queried_order_info = pd.read_csv(path_to_file)

    return queried_order_info


def get_order_query(city_id, start_date, end_date, round_off=4):

    metrics_query = f"""

        SELECT store_type_l2,
        ROUND(delivery_location.latitude, {round_off}) lat,
        ROUND(delivery_location.longitude, {round_off}) long,
        COUNT(distinct analytical_customer_id) num_customers,
        COUNT(1) num_orders,
        SUM(i.quantity) num_items,
        SUM(value.gmv_local) gmv
        FROM `fulfillment-dwh-production.curated_data_shared_central_dwh.orders` o, UNNEST(items) AS i
        LEFT JOIN `fulfillment-dwh-production.curated_data_shared_central_dwh.vendors` USING(vendor_id)
        WHERE o.global_entity_id = '{city_id}'
        AND DATE(placed_at) BETWEEN '{start_date}' AND '{end_date}'
        AND order_status = 'sent'
        AND o.is_own_delivery
        AND store_type_l2 in ('restaurants')
        AND delivery_location.latitude IS NOT NULL
        AND delivery_location.longitude IS NOT NULL
        GROUP BY 1, 2, 3

    """

    return metrics_query



In [5]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../cred/credentials.json'
GCSOperations("qc-store-growth-expansion-stg")

bucket_name = "qc-store-growth-expansion-stg"
folder_name = 'existing_dmart_bucket'
polygon_file = 'dmart-polygon-FP_SG-drive-time-4-15.json'
path_for_polygon = f'gs://{bucket_name}/{folder_name}/{polygon_file}'
path_for_polygon

'gs://qc-store-growth-expansion-stg/existing_dmart_bucket/dmart-polygon-FP_SG-drive-time-4-15.json'

In [6]:
pwd

'/Users/temporaryadmin/dh_projects/optimal-location-finder'

In [8]:
# set the bigquery connection once ready

# bigquery_conn = BigQueryOperations("quick-commerce-data")
# sql_query = "SELECT * FROM `fulfillment-dwh-production.curated_data_shared_central_dwh.orders` LIMIT 100"
# resp = bigquery_conn.execute_query(sql_query)

queried_order_info = get_order_data()

In [7]:
json_ = get_polygons_for_locations(path_for_polygon)

In [9]:
json_ = get_polygons_for_locations(path_for_polygon)

warehouse_polygon_df = convert_json_to_dataframe(json_)

input_params = get_input_parameters(path_for_polygon, warehouse_polygon_df, json_)

polygons_based_on_driving = select_polygons_based_on_input_params(warehouse_polygon_df, input_params)

In [10]:
constrained_locations_nested, constrained_locations_unnest = generate_constraint_based_info(queried_order_info, polygons_based_on_driving)

In [9]:
columns_for_metrics = ['number_of_locations_covered',
                       'total_customers_list_coverage',
                       'total_orders_list_coverage']

baseline_metrics = get_all_baseline_metrics(queried_order_info, columns_for_metrics)

In [10]:
all_locations_for_all_time, metric_df = calculate_metrics_at_store_and_network_level(constrained_locations_nested,
                                                                                     constrained_locations_unnest,
                                                                                     columns_for_metrics,
                                                                                     baseline_metrics)

In [11]:
average_driving_time_for_all_stores_all_time = \
        get_weighted_driving_time(all_locations_for_all_time)[['pred_lat', 'pred_long', 'driving_time',
                                                      'total_customers_list_coverage', 'avg_driving_time']]

In [12]:
network_driving_average_at_different_times_df =  get_avg_drive_time_network(average_driving_time_for_all_stores_all_time)

In [13]:
metric_df = metric_df.join(network_driving_average_at_different_times_df)

In [14]:
store_level_new_metrics = get_avg_drive_time_store(metric_df,average_driving_time_for_all_stores_all_time)

In [15]:
metric_df = update_metrics(metric_df, store_level_new_metrics)

In [16]:
business_kpis = select_relevant_kpis(metric_df)

In [1]:
business_kpis

NameError: name 'business_kpis' is not defined

In [18]:
# saved_path = save_kpis(input_params, business_kpis)

# print(f'saved at {saved_path}')

## The Final KPI Run

In [5]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '../../cred/credentials.json'
GCSOperations("qc-store-growth-expansion-stg")

bucket_name = "qc-store-growth-expansion-stg"

folder_name = 'existing_dmart_bucket'
polygon_file = 'dmart-polygon-FP_SG-drive-time-4-15.json'
path_for_polygon = f'gs://{bucket_name}/{folder_name}/{polygon_file}'
path_for_polygon

'gs://qc-store-growth-expansion-stg/existing_dmart_bucket/dmart-polygon-FP_SG-drive-time-4-15.json'

In [6]:
json_ = get_polygons_for_locations(path_for_polygon)

In [7]:
warehouse_polygon_df = convert_json_to_dataframe(json_)

In [8]:
input_params = get_input_parameters(path_for_polygon, warehouse_polygon_df, json_)

In [9]:
input_params

{'Number_of_stores': 15,
 'Driving_time': 15,
 'Driving_mode': 'cycling',
 'City_ID': 'FP_SG'}

In [10]:
from connector.bigquery_conn import *

In [11]:
project_id = 'dh-darkstores-stg'

In [12]:
bq_conn = BigQueryOperations(project_id)

In [13]:
queried_order_info = bq_conn.get_customer_order_data('FP_SG',"2021-01-01","2021-01-01",4)

bigQuery job done, downloading...


In [14]:
## big query
# queried_order_info = get_order_data()

In [15]:
queried_order_info

Unnamed: 0,store_type_l2,lat,long,num_customers,num_orders,num_items,gmv
0,restaurants,1.3329,103.7210,3,60,78,1009.800000000
1,restaurants,1.3892,103.8979,8,83,85,1895.770000000
2,restaurants,1.3944,103.7446,3,52,56,1472.280000000
3,restaurants,1.3665,103.8723,3,41,41,1130.080000000
4,restaurants,1.2995,103.8600,8,56,59,1839.550000000
...,...,...,...,...,...,...,...
33333,restaurants,1.4365,103.7816,4,48,48,1448.820000000
33334,restaurants,1.3759,103.9440,2,48,48,2590.320000000
33335,restaurants,1.3788,103.7359,3,48,68,1013.620000000
33336,restaurants,1.3527,103.7194,4,48,55,746.720000000


In [23]:
%load_ext autoreload
%autoreload 2



from helper.metrics_kpi import *

from connector.gcp_conn import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
%%time
business_kp = get_kpi_metrics(warehouse_polygon_df,
                              queried_order_info,
                              input_params)

CPU times: user 36.5 s, sys: 503 ms, total: 37 s
Wall time: 37.6 s


In [18]:
business_kp

Unnamed: 0_level_0,customer_penetration_absolute,customer_penetration %,overlap %,avg_drive_time,store_level_metrics
driving_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
5,11139,10.042916,0.0,5.0,"{ 'pred_lat' : 1.32620892, 'pred..."
7,21706,19.570117,0.0,6.321723,"{ 'pred_lat' : 1.32620892, 'pred..."
10,42199,38.046595,1.685034,8.390104,"{ 'pred_lat' : 1.32620892, 'pred..."
15,75539,68.10592,13.831656,11.705916,"{ 'pred_lat' : 1.32620892, 'pred..."


In [20]:
pd.DataFrame(eval(business_kp.loc[5]['store_level_metrics']))

Unnamed: 0,pred_lat,pred_long,customer_penetration,customer_penetration %,overlap coverage %,avg_driving_time
0,1.326209,103.853011,841.0,17.524484,0.0,5.0
1,1.322911,103.920874,688.0,14.33632,0.0,5.0
2,1.420482,103.912009,16.0,0.333403,0.0,5.0
3,1.28537,103.834724,852.0,17.753699,0.0,5.0
4,1.360945,103.857244,377.0,7.855803,0.0,5.0
5,1.406025,103.758371,20.0,0.416753,0.0,5.0
6,1.290559,103.814227,912.0,19.003959,0.0,5.0
7,1.372423,103.933873,27.0,0.562617,0.0,5.0
8,1.437959,103.844939,975.0,20.316733,0.0,5.0
9,1.334231,103.699479,252.0,5.251094,0.0,5.0
