In [None]:
import os
import json
import numpy as np
import pandas as pd
from IPython.display import display
from matplotlib import pyplot as plt
from plotly import offline as py
from plotly import graph_objs as go

In [None]:
os.listdir("../input")

In [None]:
def read_json(path):
    
    with open(path, 'r') as f:
        data = json.load(f)
    
    data['seriess'] = data['seriess'][0]
    df = pd.DataFrame(data = data)
    
    return df


def display_data_meta_pair(metadata, data, ref):
    
    print(ref)
    print()
    
    print("(1) " + ref + " metadata")
    display(metadata)
    print()
    
    print("(2) " + ref + " data")
    display(data)
    print("\n")
    
    
def display_data_list(data_list, ref_list):
    
    if len(data_list) != len(ref_list):
        raise Exception("data and ref legnth are not equal")
        
    n = len(ref_list)
    for i in range(n):
        print(ref_list[i])
        display(data_list[i])
        print("\n\n")

        
def data_refine(data):

    # drop na, "." value
    return_data = data.dropna(axis = 0, how = "any", inplace = False)

    if return_data["value"].dtype == np.object:
        return_data = return_data[~(return_data["value"] == ".")]
        return_data["value"] = pd.to_numeric(return_data["value"])

    # make datetime object
    # set index
    idx = pd.to_datetime(return_data["date"])
    return_data.index = idx
    
    # drop realtime_end & start
    # drop date column
    return_data.drop(["realtime_end", "realtime_start", "date"], axis = 1, inplace = True)
    
    return return_data


def moving_sum(value_arr, months, backward = True, average = False):
    
    #check month
    if type(months) != int:
        raise Exception("input months value shoud be int, given month value: ({}) {}".format(type(months), month))
    
    #check value array shape
    shape = value_arr.shape

    if len(shape) == 1:
        n = shape[0]
        target_arr = value_arr
    
    elif (len(shape) == 2) and (shape[1] == 1):
        n = shape[0]
        target_arr = value_arr.reshape(-1)
        
    elif (len(shape) == 2) and (shape[0] == 1):
        n = shape[1]
        target_arr = value_arr.reshape(-1)
        
    else:
        raise Exception("value array shape error, input shape: {}".format(shape))
        
        
    # make sum matrix
    mat = np.ones(shape = (n, n), dtype = np.float)
    
    if backward:
        mat = np.tril(np.triu(mat, k = -(months - 1)))
    else:
        mat = np.triu(np.tril(mat, k = (months - 1)))
            
            
    # summing up
    v = np.dot(mat,target_arr)
    
    # denominator term
    # if average: divide
    # if sum: multiply adjust term for last month
    if average:
        d = mat.sum(axis = 1)
    else:
        d = mat.sum(axis = 1)/months
 
    return v/d


def data_expand_moving(data):
    
    target_data = data.iloc[:]
    month_value = target_data["value"].values
    
    for average in [False, True]:
        for backward in [True, False]:
            for months in [1, 3, 6, 9, 12, 24]:
                
                if average:
                    head = "avg_"
                else:
                    head = "sum_"

                if backward:
                    mid = "backward_"
                else:
                    mid = "forward_"

                tail = "months_{}".format(months)

                col_name = head + mid + tail
                col_value = moving_sum(value_arr = month_value, months = months, backward = backward, average = average)
                
                target_data[col_name] = col_value
    
    return target_data

In [None]:
# read csv
sales = pd.read_csv("../input/total-business-sales.csv")
sales1 = pd.read_csv("../input/total-business-sales_1.csv")
sales2 = pd.read_csv("../input/total-business-sales_2.csv")

invs = pd.read_csv("../input/total-business-inventories.csv")
invs1 = pd.read_csv("../input/total-business-inventories_1.csv")
invs2 = pd.read_csv("../input/total-business-inventories_2.csv")

invs_sales_ratio = pd.read_csv("../input/total-business-inventories-to-sales-ratio.csv")
invs_sales_ratio1 = pd.read_csv("../input/total-business-inventories-to-sales-ratio_1.csv")

data = [sales, sales1, sales2, invs, invs1, invs2, invs_sales_ratio, invs_sales_ratio1]
data_refs = ["sales", "sales 1", "sales 2", "invs", "invs 1", "invs 2", "invs sales ratio", "invs sales ratio 1"]


# read json
sales_meta = read_json("../input/total-business-sales_metadata.json")
sales_meta1 = read_json("../input/total-business-sales_metadata_1.json")
sales_meta2 = read_json("../input/total-business-sales_metadata_2.json")

invs_meta = read_json("../input/total-business-inventories_metadata.json")
invs_meta1 = read_json("../input/total-business-inventories_metadata_1.json")
invs_meta2 = read_json("../input/total-business-inventories_metadata_2.json")

invs_sales_ratio_meta = read_json("../input/total-business-inventories-to-sales-ratio_metadata.json")
invs_sales_ratio_meta1 = read_json("../input/total-business-inventories-to-sales-ratio_metadata_1.json")

metadata = [sales_meta, sales_meta1, sales_meta2, invs_meta, invs_meta1, invs_meta2, invs_sales_ratio_meta, invs_sales_ratio_meta1]
metadata_refs = ["meta " + ref for ref in data_refs]

In [None]:
pd.set_option("max_rows", 14)

display_data_meta_pair(sales_meta, sales, "sales")
display_data_meta_pair(sales_meta1, sales1, "sales 1")
display_data_meta_pair(sales_meta2, sales2, "sales 2")
print("\n\n")

display_data_meta_pair(invs_meta, invs, "invs")
display_data_meta_pair(invs_meta1, invs1, "invs 1")
display_data_meta_pair(invs_meta2, invs2, "invs 2")
print("\n\n")

display_data_meta_pair(invs_sales_ratio_meta, invs_sales_ratio, "invs sales ratio")
display_data_meta_pair(invs_sales_ratio_meta1, invs_sales_ratio1, "invs sales ratio 1")

In [None]:
display_data_list(metadata, metadata_refs)

In [None]:
print("sales dtype")
print(sales.dtypes)
print("*************************")
print()

print("sales 1 dtype")
print(sales1.dtypes)
print("*************************")
print()

print("invs dtype")
print(invs.dtypes)
print("*************************")
print()

print("invs 1 dtype")
print(invs1.dtypes)

In [None]:
# rename csv frames for context
# drop realtime_end & realtime_start columns
# set date as index
# don't use data with units of ratio, because it is rounded

sales_sa_n = data_refine(sales)
sales_sa_y = data_refine(sales1)

invs_sa_n = data_refine(invs)
invs_sa_y = data_refine(invs1)

# make value expand
# forward/backward and sum/average of n-months
sales_sa_n = data_expand_moving(sales_sa_n)
sales_sa_y = data_expand_moving(sales_sa_y)

invs_sa_n = data_expand_moving(invs_sa_n)
invs_sa_y = data_expand_moving(invs_sa_y)

In [None]:
sales_sa_n.plot(figsize = (25, 15))