In [None]:
# Imports and useful definitions
import random
import numpy as np
import pandas as pd
from IPython.display import display
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

random.seed(156)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [None]:
# Define the evaluation metric that we want to minimize
def rmsle(predicted, actual):
    assert len(predicted) == len(actual)
    return np.sqrt(np.mean(np.power(np.log1p(predicted) - np.log1p(actual), 2)))

In [None]:
# Load training and validation data
x_train = pd.read_csv("temp_data/x_train10.csv")
y_train = x_train["adj_demand"]
x_valid = pd.read_csv("temp_data/x_valid10.csv")
y_valid = x_valid["adj_demand"]

In [None]:
# Create prediction with global median and mean
global_mean = np.mean(x_train["adj_demand"])
'''x_valid["glob1"] = global_mean
display(str(rmsle(x_valid["glob1"], y_valid)))'''

In [None]:
### Level 1 ###

'''# Create prediction using data for similar products
sProd_means = x_train.groupby("prod_cluster").agg({"adj_demand" : np.mean}).to_dict()
def sProd_mean(prod_cluster):
    return sProd_means["adj_demand"][prod_cluster]'''

# Create prediction using data for product_id
prod_means = x_train.groupby("product_id").agg({"adj_demand" : np.mean}).to_dict()
def prod_mean(product_id):
    return prod_means["adj_demand"][product_id]

In [None]:
### Level 2 ###

'''# Create prediction using data for similar products in similar stores
sProd_sStore_medians = x_train.groupby(["store_cluster", "prod_cluster"]).agg({"adj_demand" : np.median}).to_dict()
def sProd_sStore_median(prod_cluster, store_cluster):
    return sProd_sStore_medians["adj_demand"][store_cluster, prod_cluster]

# Create prediction using data for similar products in store_id
sProd_store_medians = x_train.groupby(["store_id", "prod_cluster"]).agg({"adj_demand" : np.median}).to_dict()
def sProd_store_median(prod_cluster, store_id):
    return sProd_store_medians["adj_demand"][store_id, prod_cluster]'''

# Create prediction using data for product_id in similar stores
prod_sStore_means = x_train.groupby(["store_cluster", "product_id"]).agg({"adj_demand" : np.mean}).to_dict()
def prod_sStore_mean(product_id, store_cluster):
    return prod_sStore_means["adj_demand"][store_cluster, product_id]

# Create prediction using data for product_id in store_id
prod_store_means = x_train.groupby(["store_id", "product_id"]).agg({"adj_demand" : np.mean}).to_dict()
def prod_store_mean(product_id, store_id):
    return prod_store_means["adj_demand"][store_id, product_id]

In [None]:
### Level 3 ###

'''# Create prediction using data for similar products in similar stores from similar depots
sProd_sStore_sDepot_medians = x_train.groupby(["depot_cluster", "store_cluster", 
                                               "prod_cluster"]).agg({"adj_demand" : np.median}).to_dict()
def sProd_sStore_sDepot_median(prod_cluster, store_cluster, depot_cluster):
    return sProd_sStore_sDepot_medians["adj_demand"][depot_cluster, store_cluster, prod_cluster]

# Create prediction using data for similar products in similar stores from depot_id
sProd_sStore_depot_medians = x_train.groupby(["depot_id", "store_cluster", 
                                              "prod_cluster"]).agg({"adj_demand" : np.median}).to_dict()
def sProd_sStore_depot_median(prod_cluster, store_cluster, depot_id):
    return sProd_sStore_depot_medians["adj_demand"][depot_id, store_cluster, prod_cluster]

# Create prediction using data for similar products in store_id from similar depots
sProd_store_sDepot_medians = x_train.groupby(["depot_cluster", "store_id", 
                                              "prod_cluster"]).agg({"adj_demand" : np.median}).to_dict()
def sProd_store_sDepot_median(prod_cluster, store_id, depot_cluster):
    return sProd_store_sDepot_medians["adj_demand"][depot_cluster, store_id, prod_cluster]

# Create prediction using data for similar products in store_id from depot_id
sProd_store_depot_medians = x_train.groupby(["depot_id", "store_id", 
                                             "prod_cluster"]).agg({"adj_demand" : np.median}).to_dict()
def sProd_store_depot_median(prod_cluster, store_id, depot_id):
    return sProd_store_depot_medians["adj_demand"][depot_id, store_id, prod_cluster]'''

# Create prediction using data for product_id in similar stores from similar depots
prod_sStore_sDepot_means = x_train.groupby(["depot_cluster", "store_cluster", 
                                              "product_id"]).agg({"adj_demand" : np.mean}).to_dict()
def prod_sStore_sDepot_mean(product_id, store_cluster, depot_cluster):
    return prod_sStore_sDepot_means["adj_demand"][depot_cluster, store_cluster, product_id]

# Create prediction using data for product_id in similar stores from depot_id
prod_sStore_depot_means = x_train.groupby(["depot_id", "store_cluster", 
                                             "product_id"]).agg({"adj_demand" : np.mean}).to_dict()
def prod_sStore_depot_mean(product_id, store_cluster, depot_id):
    return prod_sStore_depot_means["adj_demand"][depot_id, store_cluster, product_id]

# Create prediction using data for product_id in store_id from similar depots
prod_store_sDepot_means = x_train.groupby(["depot_cluster", "store_id", 
                                             "product_id"]).agg({"adj_demand" : np.mean}).to_dict()
def prod_store_sDepot_mean(product_id, store_id, depot_cluster):
    return prod_store_sDepot_means["adj_demand"][depot_cluster, store_id, product_id]

# Create prediction using data for product_id in store_id from depot_id
prod_store_depot_means = x_train.groupby(["depot_id", "store_id", 
                                            "product_id"]).agg({"adj_demand" : np.mean}).to_dict()
def prod_store_depot_mean(product_id, store_id, depot_id):
    return prod_store_depot_means["adj_demand"][depot_id, store_id, product_id]

In [None]:
def solution(key):
    key = tuple(key)
    depot_id = key[0]
    store_id = key[1]
    prod_id = key[2]
    depot_cluster = key[3]
    store_cluster = key[4]
    prod_cluster = key[5]
    try:
        val = prod_store_depot_mean(prod_id, store_id, depot_id)
        #display("1")
    except:
        try:
            val = prod_store_sDepot_mean(prod_id, store_id, depot_cluster)
            #display("___2")
        except:
            try:
                val = prod_store_mean(prod_id, store_id)
                #display("______3")
            except:
                try:
                    val = prod_sStore_depot_mean(prod_id, store_cluster, depot_id)
                    #display("_________4")
                except:
                    try:
                        val = prod_sStore_sDepot_mean(prod_id, store_cluster, depot_cluster)
                        #display("____________5")
                    except:
                        try:
                            val =  prod_sStore_mean(prod_id, store_cluster)
                            #display("_______________6")
                        except:
                            try:
                                val = prod_mean(prod_id)
                                #display("__________________7")
                            except:
                                '''try:
                                    val = sProd_store_depot_median(prod_cluster, store_id, depot_id)
                                    #display("_____________________8")
                                except:
                                    try:
                                        val = sProd_store_sDepot_median(prod_cluster, store_id, depot_cluster)
                                        #display("_____________________9")
                                    except:
                                        try:
                                            val = sProd_store_median(prod_cluster, store_id)
                                            #display("_____________________10")
                                        except:
                                            try:
                                                val = sProd_sStore_depot_median(prod_cluster, store_cluster, depot_id)
                                                #display("_____________________11")
                                            except:
                                                try:
                                                    val = sProd_sStore_sDepot_median(prod_cluster, store_cluster, depot_cluster)
                                                    #display("_____________________12")
                                                except:
                                                    try:
                                                        val = sProd_sStore_median(prod_cluster, store_cluster)
                                                        #display("_____________________13")
                                                    except:
                                                        try:
                                                            val = sProd_median(prod_cluster)
                                                            #display("_____________________14")
                                                        except:'''
                                val = global_mean
                                #display("________________________15")

    return val


x_valid["solution"] = x_valid[["depot_id", "store_id", "product_id", "depot_cluster", "store_cluster", 
                                      "prod_cluster"]].apply(lambda x:solution(x), axis = 1)
display(str(rmsle(x_valid["solution"], y_valid)))


In [None]:
'''def sProd_median_test(key):
    key = tuple(key)
    prod_cluster = key[0]
    try:
        val = sProd_median(prod_cluster)
    except:
        val = global_median
    return val

x_valid["sProd"] = x_valid[["prod_cluster"]].apply(lambda x:sProd_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd"], y_valid)))

def prod_median_test(key):
    key = tuple(key)
    product_id = key[0]
    try:
        val = prod_median(product_id)
    except:
        val = global_median
    return val

x_valid["prod"] = x_valid[["product_id"]].apply(lambda x:prod_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod"], y_valid)))'''

In [None]:
'''def sProd_sStore_median_test(key):
    key = tuple(key)
    store_cluster = key[0]
    prod_cluster = key[1]
    try:
        val = sProd_sStore_median(prod_cluster, store_cluster)
    except:
        val = global_median
    return val

x_valid["sProd_sStore"] = x_valid[["store_cluster", "prod_cluster"]].apply(lambda x:sProd_sStore_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd_sStore"], y_valid)))

def sProd_store_median_test(key):
    key = tuple(key)
    store_id = key[0]
    prod_cluster = key[1]
    try:
        val = sProd_store_median(prod_cluster, store_id)
    except:
        val = global_median
    return val

x_valid["sProd_store"] = x_valid[["store_id", "prod_cluster"]].apply(lambda x:sProd_store_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd_store"], y_valid)))

def prod_sStore_median_test(key):
    key = tuple(key)
    store_cluster = key[0]
    product_id = key[1]
    try:
        val = prod_sStore_median(product_id, store_cluster)
    except:
        val = global_median
    return val

x_valid["prod_sStore"] = x_valid[["store_cluster", "product_id"]].apply(lambda x:prod_sStore_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod_sStore"], y_valid)))

def prod_store_median_test(key):
    key = tuple(key)
    store_id = key[0]
    product_id = key[1]
    try:
        val = prod_store_median(product_id, store_id)
    except:
        val = global_median
    return val

x_valid["prod_store"] = x_valid[["store_id", "product_id"]].apply(lambda x:prod_store_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod_store"], y_valid)))'''

In [None]:
'''def sProd_sStore_sDepot_median_test(key):
    key = tuple(key)
    depot_cluster = key[0]
    store_cluster = key[1]
    prod_cluster = key[2]
    try:
        val = sProd_sStore_sDepot_median(prod_cluster, store_cluster, depot_cluster)
    except:
        val = global_median
    return val

x_valid["sProd_sStore_sDepot"] = x_valid[["depot_cluster", "store_cluster", 
                                          "prod_cluster"]].apply(lambda x:sProd_sStore_sDepot_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd_sStore_sDepot"], y_valid)))

def sProd_sStore_depot_median_test(key):
    key = tuple(key)
    depot_id = key[0]
    store_cluster = key[1]
    prod_cluster = key[2]
    try:
        val = sProd_sStore_depot_median(prod_cluster, store_cluster, depot_id)
    except:
        val = global_median
    return val

x_valid["sProd_sStore_depot"] = x_valid[["depot_id", "store_cluster", 
                                          "prod_cluster"]].apply(lambda x:sProd_sStore_depot_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd_sStore_depot"], y_valid)))

def sProd_store_sDepot_median_test(key):
    key = tuple(key)
    depot_cluster = key[0]
    store_id = key[1]
    prod_cluster = key[2]
    try:
        val = sProd_store_sDepot_median(prod_cluster, store_id, depot_cluster)
    except:
        val = global_median
    return val

x_valid["sProd_store_sDepot"] = x_valid[["depot_cluster", "store_id", 
                                          "prod_cluster"]].apply(lambda x:sProd_store_sDepot_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd_store_sDepot"], y_valid)))

def sProd_store_depot_median_test(key):
    key = tuple(key)
    depot_id = key[0]
    store_id = key[1]
    prod_cluster = key[2]
    try:
        val = sProd_store_depot_median(prod_cluster, store_id, depot_id)
    except:
        val = global_median
    return val

x_valid["sProd_store_depot"] = x_valid[["depot_id", "store_id", 
                                          "prod_cluster"]].apply(lambda x:sProd_store_depot_median_test(x), axis = 1)
display(str(rmsle(x_valid["sProd_store_depot"], y_valid)))

def prod_sStore_sDepot_median_test(key):
    key = tuple(key)
    depot_cluster = key[0]
    store_cluster = key[1]
    product_id = key[2]
    try:
        val = prod_sStore_sDepot_median(product_id, store_cluster, depot_cluster)
    except:
        val = global_median
    return val

x_valid["prod_sStore_sDepot"] = x_valid[["depot_cluster", "store_cluster", 
                                          "product_id"]].apply(lambda x:prod_sStore_sDepot_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod_sStore_sDepot"], y_valid)))

def prod_sStore_depot_median_test(key):
    key = tuple(key)
    depot_id = key[0]
    store_cluster = key[1]
    product_id = key[2]
    try:
        val = prod_sStore_depot_median(product_id, store_cluster, depot_id)
    except:
        val = global_median
    return val

x_valid["prod_sStore_depot"] = x_valid[["depot_id", "store_cluster", 
                                          "product_id"]].apply(lambda x:prod_sStore_depot_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod_sStore_depot"], y_valid)))

def prod_store_sDepot_median_test(key):
    key = tuple(key)
    depot_cluster = key[0]
    store_id = key[1]
    product_id = key[2]
    try:
        val = prod_store_sDepot_median(product_id, store_id, depot_cluster)
    except:
        val = global_median
    return val

x_valid["prod_store_sDepot"] = x_valid[["depot_cluster", "store_id", 
                                          "product_id"]].apply(lambda x:prod_store_sDepot_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod_store_sDepot"], y_valid)))

def prod_store_depot_median_test(key):
    key = tuple(key)
    depot_id = key[0]
    store_id = key[1]
    product_id = key[2]
    try:
        val = prod_store_depot_median(product_id, store_id, depot_id)
    except:
        val = global_median
    return val

x_valid["prod_store_depot"] = x_valid[["depot_id", "store_id", 
                                          "product_id"]].apply(lambda x:prod_store_depot_median_test(x), axis = 1)
display(str(rmsle(x_valid["prod_store_depot"], y_valid)))'''