In [7]:
import pandas as pd
import pandas as pd
from datetime import date
from dateutil.relativedelta import relativedelta


In [23]:
sales_file = pd.read_csv('data/sales_data_aaa.csv', delimiter=';')
stock_file = pd.read_csv('data/stock_data_aaa.csv', delimiter=';')

# VARIABLES
predict_month = 12

# FORMATS
date_format = '%y/%m'  # date formatting

# LOCAL VARIABLES
# date_format = variables.date_format
# predict_month = variables.predict_month
start_date = date.today() - relativedelta(years=1)
end_date = start_date + relativedelta(months=predict_month + 1) - relativedelta(days=1)
date_start_txt = start_date.strftime(date_format)
date_end_txt = end_date.strftime(date_format)

In [31]:
selected_supplier = 'ACME Corp'

In [32]:
def cleanup_sales_stock_data(sales_file_d, start_date_d, end_date_d, supplier_d, stocks_file, date_format_d):
    # change type of date column
    sales_file_d['datatempo'] = pd.to_datetime(sales_file_d['datatempo']).dt.date

    # supplier filtering
    sales_file_d = sales_file_d[(sales_file_d['supplier'] == supplier_d)]

    # filter data for past year
    sales_file_d = sales_file_d[(sales_file_d['datatempo'] >= start_date_d) & (sales_file_d['datatempo'] <= end_date_d)]

    # create marker for year and month only
    sales_file_d['dateF'] = sales_file_d.apply(lambda x: x['datatempo'].strftime(date_format_d), axis=1)

    # total sales
    sales_file_d['total'] = sales_file_d['qtt'] * sales_file_d['unit_price']

    # supplier filtering on stocks
    stock_file_d = stocks_file[(stocks_file['suppliers'] == supplier_d)]
    
    # cleanup stock data
    stock_file_d = stock_file_d[['ref', 'design', 'stock', 'unit_price']]

    return sales_file_d, stock_file_d

sales, stock = cleanup_sales_stock_data(sales_file, start_date, end_date, selected_supplier, stock_file,  date_format)
stock

Unnamed: 0,ref,design,stock,unit_price
6,41124,Crunch Master Cracker,97.0,3
7,41126,Blue Diamond Almond Crackers,0.0,3
10,41131,Fairlife Fat Free Milk,20.0,4
11,41132,Califia Creamer Hazelnut,11.0,6
14,41136,Almond Breeze Unsweetened Vanilla,4.0,9
15,41137,Califia Almond Milk Unsweetened,6.0,9
26,41148,Clearly Organic Almond Butter,0.0,19
28,41150,Eating Well Gnocci with Vegetables,7.0,15
36,41163,Chilly Cow Frozen Bars,15.0,18
37,41166,Breyers Delight Raspberry,16.0,1


In [33]:
def prep_data_for_main_table(sales_file_d, predict_month_d):
    # only carry necessary cols
    sales_file_d = sales_file_d[['dateF', 'ref', 'design', 'qtt']]

    # create pivot table
    sales_data_d = sales_file_d.pivot_table(index=['ref', 'design'], columns='dateF', values='qtt',
                                            aggfunc='sum').reset_index().rename_axis(None, axis=1)

    # turn NaN to 0
    sales_data_d.loc[:, :] = sales_data_d.loc[:, :].fillna(0)

    # calculate time period sales
    name_of_col_f = 'sales_' + str(predict_month_d) + '_months'

    # sum all columns except the current months sales
    sales_data_d[name_of_col_f] = sales_data_d[[col for col in sales_data_d.columns
                                                if (col.startswith('2'))]].sum(axis=1)

    return sales_data_d, name_of_col_f

sales_data, name_of_col = prep_data_for_main_table(sales, predict_month)
sales_data

Unnamed: 0,ref,design,22/11,22/12,23/01,23/02,23/03,23/04,23/05,23/06,23/07,23/09,23/10,sales_12_months
0,41124,Crunch Master Cracker,0.0,1.0,26.0,9.0,4.0,0.0,0.0,0.0,2.0,11.0,19.0,72.0
1,41131,Fairlife Fat Free Milk,0.0,3.0,3.0,5.0,0.0,0.0,0.0,0.0,7.0,6.0,3.0,27.0
2,41132,Califia Creamer Hazelnut,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
3,41136,Almond Breeze Unsweetened Vanilla,0.0,12.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0
4,41137,Califia Almond Milk Unsweetened,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0
5,41163,Chilly Cow Frozen Bars,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0
6,41166,Breyers Delight Raspberry,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,7.0
7,41168,Bluebell No Sugar Added Low Fat Ice Cream,10.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,27.0
8,41180,Bumblebee Tuna Salad Ready to Eat,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0
9,41195,Chicken of the Sea Salmon Pouch,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0


In [34]:
# merge stocks with sales
def merge_stocks_sales(sales_data_d, stock_clean_d, name_of_col_d):
    # join sales table with stocks
    sales_data_d['ref'] = sales_data_d['ref'].apply(lambda x: x.strip())
    stock_clean_d['ref'] = stock_clean_d['ref'].apply(lambda x: x.strip())
    merged_stocks_sales_d = pd.merge(sales_data_d, stock_clean_d, on='ref', how='right')

    # drop useless columns
    merged_stocks_sales_d = merged_stocks_sales_d.drop(['design_x'], axis=1)

    # fill NA
    merged_stocks_sales_d.loc[:, :] = merged_stocks_sales_d.loc[:, :].fillna(0)

    # filter to products that had sales
    merged_stocks_sales_d = merged_stocks_sales_d[(merged_stocks_sales_d['sales_12_months'] > 0)]

    # calculate sales / stock ratio
    merged_stocks_sales_d['ratio'] = (merged_stocks_sales_d['stock'] / merged_stocks_sales_d[name_of_col_d]).round(2)
    
    # correct for months without sales for selected supplier
    columns_list = []
    for col in merged_stocks_sales_d.columns:
        if "2" in col:
            merged_stocks_sales_d.append({"name": ['20' + col[:2], col[-3:-1]], "id": col})
        else:
            merged_stocks_sales_d.append({"name": ['', col], "id": col})

    return merged_stocks_sales_d

merged_data = merge_stocks_sales(sales_data, stock, name_of_col)
merged_data

Unnamed: 0,ref,22/11,22/12,23/01,23/02,23/03,23/04,23/05,23/06,23/07,23/09,23/10,sales_12_months,design_y,stock,unit_price,ratio
0,41124,0.0,1.0,26.0,9.0,4.0,0.0,0.0,0.0,2.0,11.0,19.0,72.0,Crunch Master Cracker,97.0,3,1.35
2,41131,0.0,3.0,3.0,5.0,0.0,0.0,0.0,0.0,7.0,6.0,3.0,27.0,Fairlife Fat Free Milk,20.0,4,0.74
3,41132,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,Califia Creamer Hazelnut,11.0,6,5.5
4,41136,0.0,12.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,Almond Breeze Unsweetened Vanilla,4.0,9,0.31
5,41137,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,Califia Almond Milk Unsweetened,6.0,9,3.0
8,41163,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,Chilly Cow Frozen Bars,15.0,18,3.75
9,41166,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,7.0,Breyers Delight Raspberry,16.0,1,2.29
11,41168,10.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,27.0,Bluebell No Sugar Added Low Fat Ice Cream,45.0,19,1.67
12,41180,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,Bumblebee Tuna Salad Ready to Eat,7.0,10,2.33
14,41195,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,Chicken of the Sea Salmon Pouch,5.0,24,2.5


In [94]:
# correct for months without sales for selected supplier
names= merged_data.columns
first_col = start_date.strftime(date_format)
col_check=[first_col]
value = ''

print(type(str(int(col_check[0][-2:]))))

for i in range(12):
    # value to append
    if int(col_check[i-1][-1:]) == 12:  
        value = str(int(col_check[i-1][:2]) + 1) + str('/01')
    else:
        value = (str(col_check[i-1][:3])) + str(int(col_check[i-1][-2:]) + 1)
    
    col_check.append(value)

cycle = 0

for i in names:
    if i.startswith('2') and not i.endswith('e'):
        cycle+=1
#     if cycle !=12:
        
print(first_col)
print(names)
print(col_check)

<class 'str'>
22/11
Index(['ref', '22/11', '22/12', '23/01', '23/02', '23/03', '23/04', '23/05',
       '23/06', '23/07', '23/09', '23/10', 'sales_12_months', 'design_y',
       'stock', 'unit_price', 'ratio', '23/11e'],
      dtype='object')
['22/11', '22/12', '22/12', '22/13', '22/13', '22/14', '22/14', '22/15', '22/15', '22/16', '22/16', '22/17', '22/17']


In [48]:
# create predictions
def sales_predictions(final_df_d, date_start_txt_d, predict_month_d, date_format_d):
    # create prediction for end of current month
    try:
        final_df_d[(date.today().strftime(date_format_d) + 'e')] = final_df_d[date.today().strftime(date_format_d)] + \
                                                                 final_df_d[date_start_txt_d]
    except:
        try:
            final_df_d[(date.today().strftime(date_format_d) + 'e')] = final_df_d[date.today().strftime(date_format_d)]
        except:
            final_df_d[(date.today().strftime(date_format_d) + 'e')] = final_df_d[date_start_txt_d]

#     # drop current sales of this month
#     try:
#         final_df_d = final_df_d.drop([date.today().strftime(date_format_d)], axis=1)
#     except:
#         print("sales_predictions: drop sales for the current month - no sales for this month")

#     # correct first month
#     try:
#         final_df_d[date_start_txt_d] = final_df_d[date_start_txt_d] + final_df_d[(date_start_txt_d + 'e')]
#         # drop remaining of the first month
#         final_df_d = final_df_d.drop([(date_start_txt_d + 'e')], axis=1)
#     except:
#         final_df_d = final_df_d.rename(columns={(date_start_txt_d + 'e'): date_start_txt_d})

#     # start creating stock prediction
#     final_df_d[(date.today().strftime(date_format_d) + 'e')] = final_df_d['stock'] - final_df_d[
#         (date.today().strftime(date_format_d) + 'e')]

    # create month variables
    date_month_pred = date.today().replace(day=1) + relativedelta(months=1)

#     # create prediction months for stocks
#     for i in range(1, predict_month_d):
#         date_pred = date_month_pred + relativedelta(months=i - 1)
#         date_previous = date_month_pred + relativedelta(months=i - 2)
#         month_corresp = date_month_pred + relativedelta(months=i - 13)
#         col_name = (date_pred.strftime(date_format_d) + 'e')
#         col_name_anterior = (date_previous.strftime(date_format_d) + 'e')
#         col_name_mes_corresp = month_corresp.strftime(date_format_d)

#         final_df_d[col_name] = final_df_d[col_name_anterior] - final_df_d[col_name_mes_corresp]

#     final_df_d = final_df_d.sort_values(by=['ref'])

#     # move column 'design' and 'ref' to the beginning
#     final_df_d = final_df_d[['design_y'] + [col for col in final_df_d.columns if col != 'design_y']]
#     final_df_d = final_df_d[['ref'] + [col for col in final_df_d.columns if col != 'ref']]

#     # create list of dates on the table
#     col_list = []
#     for col in final_df_d.columns:
#         if ("2" in col) & ("e" not in col):
#             col_list.append(col)

#     # Drop historical sales months
#     final_df_d = final_df_d.drop(col_list, axis=1)

#     # rename columns
#     final_df_d = final_df_d.rename(columns={"design_y": "name", "sales_12_months": "sales", "ref": "code"})

    return final_df_d

final_table = sales_predictions(merged_data, date_start_txt, predict_month, date_format)
final_table

Unnamed: 0,ref,22/11,22/12,23/01,23/02,23/03,23/04,23/05,23/06,23/07,23/09,23/10,sales_12_months,design_y,stock,unit_price,ratio,23/11e
0,41124,0.0,1.0,26.0,9.0,4.0,0.0,0.0,0.0,2.0,11.0,19.0,72.0,Crunch Master Cracker,97.0,3,1.35,0.0
2,41131,0.0,3.0,3.0,5.0,0.0,0.0,0.0,0.0,7.0,6.0,3.0,27.0,Fairlife Fat Free Milk,20.0,4,0.74,0.0
3,41132,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,Califia Creamer Hazelnut,11.0,6,5.5,0.0
4,41136,0.0,12.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,Almond Breeze Unsweetened Vanilla,4.0,9,0.31,0.0
5,41137,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,Califia Almond Milk Unsweetened,6.0,9,3.0,0.0
8,41163,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0,Chilly Cow Frozen Bars,15.0,18,3.75,3.0
9,41166,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,7.0,Breyers Delight Raspberry,16.0,1,2.29,0.0
11,41168,10.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,27.0,Bluebell No Sugar Added Low Fat Ice Cream,45.0,19,1.67,10.0
12,41180,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,Bumblebee Tuna Salad Ready to Eat,7.0,10,2.33,0.0
14,41195,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,Chicken of the Sea Salmon Pouch,5.0,24,2.5,0.0


In [49]:
sales_data

Unnamed: 0,ref,design,22/11,22/12,23/01,23/02,23/03,23/04,23/05,23/06,23/07,23/09,23/10,sales_12_months
0,41124,Crunch Master Cracker,0.0,1.0,26.0,9.0,4.0,0.0,0.0,0.0,2.0,11.0,19.0,72.0
1,41131,Fairlife Fat Free Milk,0.0,3.0,3.0,5.0,0.0,0.0,0.0,0.0,7.0,6.0,3.0,27.0
2,41132,Califia Creamer Hazelnut,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
3,41136,Almond Breeze Unsweetened Vanilla,0.0,12.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0
4,41137,Califia Almond Milk Unsweetened,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0
5,41163,Chilly Cow Frozen Bars,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.0
6,41166,Breyers Delight Raspberry,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,7.0
7,41168,Bluebell No Sugar Added Low Fat Ice Cream,10.0,1.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,27.0
8,41180,Bumblebee Tuna Salad Ready to Eat,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0
9,41195,Chicken of the Sea Salmon Pouch,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
