In [9]:
import pandas as pd
import numpy as np 
import datetime
inventory = pd.read_csv("data/inventory.csv")
products = pd.read_csv("data/products_ext.csv")
promotions = pd.read_csv("data/promotions.csv")
transactions = pd.read_csv("data/transactions.csv")

#create a reference dataframe
dayofyear = list(range(365))
date = [datetime.datetime(2018, 1, 1) + datetime.timedelta(day) for day in dayofyear]
df_date = pd.DataFrame({"day":dayofyear, "date":date})

import ast
for colname in inventory.columns[2:]:
    inventory[colname] = [ast.literal_eval(i) for i in inventory[colname]]
    #converting inventory rows to list of tuples from string

In [10]:
products.head()

Unnamed: 0,category,product_id,description,size,std_sales_price,bio,basic,purchase_price
0,vegetable,v_1,Komkommer,1st,0.69,0,0,0.18
1,vegetable,v_2,Courgette,1st,0.75,0,0,0.17
2,vegetable,v_3,SnoepVegetable tomaat,500g,1.99,0,0,0.5
3,vegetable,v_4,Paprika Mix,3st,1.79,0,0,0.4
4,vegetable,v_5,Paprika,1st,0.99,0,0,0.23


In [11]:
inventory[['day', 'before or after delivery','Blauwe bessen']].head()

Unnamed: 0,day,before or after delivery,Blauwe bessen
0,0,before,[]
1,0,after,"[(20, 50)]"
2,3,before,"[(20, 14)]"
3,3,after,"[(20, 14), (23, 50)]"
4,7,before,"[(23, 32)]"


In [12]:
product = 'Biologische knoflook' #select the product to analyze
transactions[transactions["description"] == product] #transactions for that product

Unnamed: 0,day,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
370,1/1/2018,13:52:41,23.0,53801240.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
504,1/1/2018,14:41:10,28.0,29159112.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
631,1/1/2018,16:13:24,37.0,77943433.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
644,1/1/2018,16:13:24,37.0,77943433.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
869,1/1/2018,18:41:40,48.0,,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
352607,28-12-2018,13:58:45,20567.0,80448895.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0
353181,29-12-2018,11:20:57,20599.0,13185251.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0
353411,29-12-2018,13:54:53,20612.0,62257065.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0
353940,30-12-2018,9:14:20,20641.0,423613.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0


In [13]:
def date_converter(row):
    converted_date = datetime.datetime(2018, 1, 1) + datetime.timedelta(row["day"])
    return converted_date

inventory["date"] = inventory.apply(date_converter, axis = 1) #convert day of year to actual timestamp

In [14]:
def week_of_year(row):
    week_nr = datetime.date(row["date"].year, row["date"].month, row["date"].day).isocalendar()[1]
    return week_nr

inventory["week"] = inventory.apply(week_of_year, axis = 1)
#add nr week of year as an extra column to inventory

In [15]:
#only keep relevant columns in inventory
filter_inventory = inventory[["day", "before or after delivery", "date", "week", product]]
#filters promotion table to only contain discounts for chosen product
promotions_filter = promotions[promotions["description"] == product]

def check_discount(row):
    if row["week"] in promotions_filter["week"].values:
        discount_amount = promotions_filter[promotions_filter["week"] == row["week"]]["discount"]
        return discount_amount.values[0]
    else:
        return 0
#adds extra column to filter inventory containing discount amount
filter_inventory["discount"] = filter_inventory.apply(check_discount, axis = 1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [16]:
def total_inv(row):
    total = 0
    if len(row[product]) > 0:
        for batch in row[product]:
            total += batch[1]
    return total
#adds total nr items in inventory for the filtered dataframe
filter_inventory["total inventory"] = filter_inventory.apply(total_inv, axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [17]:
best_before = []
for element_list in filter_inventory[product]:
    for batch in element_list:
        best_before.append(batch[0])
bb_dates = np.unique(best_before)
#list of each unique best before dates
bb_dates

array([ 14,  17,  21,  24,  28,  31,  35,  38,  42,  45,  49,  52,  56,
        59,  63,  66,  70,  73,  77,  80,  84,  87,  91,  94,  98, 101,
       105, 108, 112, 115, 119, 122, 126, 129, 133, 136, 140, 143, 147,
       150, 154, 157, 161, 164, 168, 171, 175, 178, 182, 185, 189, 192,
       196, 199, 203, 206, 210, 213, 217, 220, 224, 227, 231, 234, 238,
       241, 245, 248, 252, 255, 259, 262, 266, 269, 273, 276, 280, 283,
       287, 290, 294, 297, 301, 304, 308, 311, 315, 318, 322, 325, 329,
       332, 336, 339, 343, 346, 350, 353, 357, 360, 364, 367, 371, 374])

In [18]:
df_waste = pd.DataFrame({"best before":bb_dates})
df_waste #build initial df with points where waste can occur

Unnamed: 0,best before
0,14
1,17
2,21
3,24
4,28
...,...
99,360
100,364
101,367
102,371


In [19]:
def input_inv(row):
    for element_list in filter_inventory[product]:
        if len(element_list) > 0:
            for batch in element_list:
                if batch[0] == row["best before"]:
                    return batch[1]

df_waste["amount"] = df_waste.apply(input_inv, axis = 1)
df_waste #amount that was received with particular expiry date

Unnamed: 0,best before,amount
0,14,30
1,17,30
2,21,25
3,24,25
4,28,25
...,...,...
99,360,25
100,364,25
101,367,25
102,371,25


In [20]:
df_waste = df_waste[df_waste["best before"] < 365] #only keep rows where expiry date is in same year
df_waste

Unnamed: 0,best before,amount
0,14,30
1,17,30
2,21,25
3,24,25
4,28,25
...,...,...
96,350,25
97,353,25
98,357,25
99,360,25


In [21]:
transactions["day"] = pd.to_datetime(transactions["day"])
filter_transactions = transactions[transactions["description"] == product]
#converts date then filters transactions to contain selected product purchases

first_restock = df_date[df_date["day"] == 3]["date"].values[0]
#first_restock check purchases mde before the first restock
filter_transactions[filter_transactions["day"] < first_restock]
#PROBLEM: IF A PRODUCT HASN'T BEEN BOUGHT IN ONE DAY IT HAS A ZERO, CAUSES AN ERROR FEW CELLS 

Unnamed: 0,day,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
370,2018-01-01,13:52:41,23.0,53801240.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
504,2018-01-01,14:41:10,28.0,29159112.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
631,2018-01-01,16:13:24,37.0,77943433.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
644,2018-01-01,16:13:24,37.0,77943433.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
869,2018-01-01,18:41:40,48.0,,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
1050,2018-01-01,19:59:10,58.0,53245671.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.48,1.0,0.0
55935,2018-01-03,9:22:31,3326.0,32342952.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0
55943,2018-01-03,9:22:31,3326.0,32342952.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0
56088,2018-01-03,11:30:50,3333.0,73458689.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0
56303,2018-01-03,14:02:46,3348.0,33780213.0,vegetable,v_14,Biologische knoflook,100g,0.69,0.69,1.0,0.0


In [22]:
#cumulative sum of purchases from that product
purchases_per_day = filter_transactions.groupby("day").count()["product_id"].cumsum()
#purchases made per day from that product
purchases_per_day_plotting = filter_transactions.groupby("day").count()["product_id"]
#idk know what is this
purchases_per_day_plotting.index = purchases_per_day_plotting.index.dayofyear/7
purchases_per_day_plotting

day
0.142857     6
0.428571     6
0.571429     1
0.714286     6
0.857143     5
            ..
51.428571    7
51.571429    1
51.714286    4
51.857143    2
52.000000    2
Name: product_id, Length: 348, dtype: int64

In [23]:
purchases_per_day.index[0].year
weeknr = []
for ind in purchases_per_day.index:
    week = datetime.date(ind.year, ind.month, ind.day).isocalendar()[1]
    weeknr.append(week)
#week number for each purchase day


In [24]:
df_waste.drop(index=df_waste.tail(9).index, inplace=True)
#drop the last rows not to cause index issues
#STILL NEED TO BE FIXED
df_waste

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,best before,amount
0,14,30
1,17,30
2,21,25
3,24,25
4,28,25
...,...,...
87,318,25
88,322,25
89,325,25
90,329,25


In [25]:
df_waste["purchases"] = [purchases_per_day[i-1] for i in df_waste["best before"]]
#add cumulative sum of purchases up until that day (based on expiry dates)
df_waste

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,best before,amount,purchases
0,14,30,70
1,17,30,79
2,21,25,90
3,24,25,102
4,28,25,121
...,...,...,...
87,318,25,1307
88,322,25,1324
89,325,25,1330
90,329,25,1350


In [26]:
df_waste["prev purchases"] = df_waste["purchases"].diff()
df_waste #difference in purchases since a previous batch of product expired

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,best before,amount,purchases,prev purchases
0,14,30,70,
1,17,30,79,9.0
2,21,25,90,11.0
3,24,25,102,12.0
4,28,25,121,19.0
...,...,...,...,...
87,318,25,1307,10.0
88,322,25,1324,17.0
89,325,25,1330,6.0
90,329,25,1350,20.0


In [30]:
length = df_waste.shape[0]
lst_waste = [] #nr rows in waste dataframe
#amount left from the item
df_waste["remaining"] = df_waste["amount"] - df_waste["prev purchases"] 
#the first row is different as there is no prev pruchases available
df_waste["remaining"].iloc[0] = df_waste["amount"].iloc[0] - df_waste["purchases"].iloc[0]

for i in range(length):
    #iterates through the rows of the waste dataframe
    current_row = df_waste.iloc[i]
    if i == 0:
        #
        lst_waste.append(current_row["remaining"])
        continue     
    else:
        prev_row = df_waste.iloc[i-1] #previous row in dataframe
        if lst_waste[-1] < 0:
            #no idea whats happening here, I've had enough
            waste = lst_waste[-1] + current_row["remaining"]
            lst_waste.append(lst_waste[-1] + current_row["remaining"])
        else:
            lst_waste.append(current_row["remaining"])
    
df_waste["waste"] = lst_waste
def date_converter(row):
    converted_date = datetime.datetime(2018, 1, 1) + datetime.timedelta(row["best before"])
    return converted_date
purchase_price = products[products['description'] == product]['purchase_price']
df_waste["date"] = df_waste.apply(date_converter, axis = 1)
df_waste["week"] = df_waste.apply(week_of_year, axis = 1)
df_waste[df_waste["waste"] < 0]["waste"] = 0
df_waste['weekPrice'] = [float(products[products['description'] == product]['std_sales_price'])] * len(df_waste)
#df_waste.set_index('week', inplace = True)
#print(df_waste.head())
for week_ind in list(promotions[promotions['description'] == product].index):
    week = promotions.at[week_ind, 'week']
    print(week_ind)
    # print(df_waste[['week']].dtypes)
    if week in list(np.unique(df_waste.week)):
        print(week)
        df_waste.loc[df_waste[df_waste['week'] == week]['weekPrice'].index, 'weekPrice'] = (1 - (promotions.at[week_ind, 'discount'] / 100)) * df_waste.loc[df_waste[df_waste['week'] == week]['weekPrice'].index, 'weekPrice']
        #break
df_waste['lost_Revenue'] = df_waste.waste * df_waste.weekPrice
df_waste['purc_price'] = [float(purchase_price)] * len(df_waste)
df_waste['lost_profit'] = df_waste['waste'] * (df_waste['weekPrice'] - df_waste['purc_price'])
df_waste['cost_wasted'] = df_waste['waste'] * df_waste['purc_price']
df_waste.head(50)

0
107
8
260
18
388
26
701
47
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/u

Unnamed: 0,best before,amount,purchases,prev purchases,remaining,waste,date,week,weekPrice,lost_Revenue,lost_profit,cost_wasted,purc_price
0,14,30,70,,-40.0,-40.0,2018-01-15,3,0.69,-27.6,-21.6,-6.0,0.15
1,17,30,79,9.0,21.0,-19.0,2018-01-18,3,0.69,-13.11,-10.26,-2.85,0.15
2,21,25,90,11.0,14.0,-5.0,2018-01-22,4,0.69,-3.45,-2.7,-0.75,0.15
3,24,25,102,12.0,13.0,8.0,2018-01-25,4,0.69,5.52,4.32,1.2,0.15
4,28,25,121,19.0,6.0,6.0,2018-01-29,5,0.69,4.14,3.24,0.9,0.15
5,31,25,133,12.0,13.0,13.0,2018-02-01,5,0.69,8.97,7.02,1.95,0.15
6,35,25,155,22.0,3.0,3.0,2018-02-05,6,0.69,2.07,1.62,0.45,0.15
7,38,25,164,9.0,16.0,16.0,2018-02-08,6,0.69,11.04,8.64,2.4,0.15
8,42,25,173,9.0,16.0,16.0,2018-02-12,7,0.69,11.04,8.64,2.4,0.15
9,45,25,178,5.0,20.0,20.0,2018-02-15,7,0.69,13.8,10.8,3.0,0.15


In [232]:
#promotions[(promotions['description'] == product) & (promotions['week'] == week)]['discount']
#promotions[promotions['description'] == product]['week']
promotions[(promotions['description'] == product) ]

Unnamed: 0,week,category,product_id,description,discount
0,1,vegetable,v_14,Biologische knoflook,30
107,8,vegetable,v_14,Biologische knoflook,15
260,18,vegetable,v_14,Biologische knoflook,5
388,26,vegetable,v_14,Biologische knoflook,30
701,47,vegetable,v_14,Biologische knoflook,25


In [32]:
df_waste[df_waste['waste']>0][['waste', 'lost_Revenue', 'lost_profit', 'cost_wasted']].sum()

waste           897.000
lost_Revenue    612.168
lost_profit     477.618
cost_wasted     134.550
dtype: float64

In [200]:
print('The number of products wasted in the dataset for {} is {} items'.format(product, df_waste[df_waste['waste']>0]['waste'].sum()))

The number of products wasted in the dataset for Biologische knoflook is 897.0 items


In [131]:
df_waste.loc[:,:].where(df_waste.loc[:,'waste']>0)

Unnamed: 0,best before,amount,purchases,prev purchases,remaining,waste,date,week
0,,,,,,,NaT,
1,,,,,,,NaT,
2,,,,,,,NaT,
3,,,,,,,NaT,
4,28.0,25.0,113.0,9.0,16.0,12.0,2018-01-29,5.0
...,...,...,...,...,...,...,...,...
87,318.0,25.0,1236.0,10.0,15.0,15.0,2018-11-15,46.0
88,322.0,25.0,1252.0,16.0,9.0,9.0,2018-11-19,47.0
89,325.0,25.0,1261.0,9.0,16.0,16.0,2018-11-22,47.0
90,329.0,25.0,1271.0,10.0,15.0,15.0,2018-11-26,48.0


In [132]:
df_waste[df_waste['waste']>0]['waste'].sum()

985.0

In [31]:
transactions[transactions['description'] == 'Biologische prei'].groupby('day').count().iloc[0:14, :]

Unnamed: 0_level_0,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-01-01,2,2,1,2,2,2,2,2,2,2,2
2018-01-02,1,1,0,1,1,1,1,1,1,1,1
2018-01-03,3,3,3,3,3,3,3,3,3,3,3
2018-01-04,9,9,9,9,9,9,9,9,9,9,9
2018-01-05,4,4,4,4,4,4,4,4,4,4,4
2018-01-06,3,3,1,3,3,3,3,3,3,3,3
2018-01-07,7,7,4,7,7,7,7,7,7,7,7
2018-01-08,5,5,5,5,5,5,5,5,5,5,5
2018-01-09,5,5,5,5,5,5,5,5,5,5,5
2018-01-10,1,1,1,1,1,1,1,1,1,1,1


In [131]:
df_waste["waste nn"] = [i if i > 0 else 0 for i in df_waste["waste"]]
#takes nonnegative values which is counted as waste
waste_group = df_waste.groupby("week").sum()
waste_group["weekly purchases"] = waste_group["purchases"].diff()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



## `df_waste` is prepared for plotting

In [132]:
waste_group

Unnamed: 0_level_0,best before,amount,purchases,prev purchases,remaining,waste,waste nn,weekly purchases
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3,31,52,119,5.0,-10.0,-41.0,0.0,
4,45,52,179,36.0,16.0,3.0,6.0,60.0
5,59,52,238,25.0,27.0,27.0,27.0,59.0
6,73,52,283,23.0,29.0,29.0,29.0,45.0
7,87,52,327,24.0,28.0,28.0,28.0,44.0
8,101,52,383,24.0,28.0,28.0,28.0,56.0
9,115,52,430,26.0,26.0,26.0,26.0,47.0
10,129,52,492,31.0,21.0,21.0,21.0,62.0
11,143,52,537,21.0,31.0,31.0,31.0,45.0
12,157,52,589,33.0,19.0,19.0,19.0,52.0


In [133]:
waste_group["week"] = waste_group.index

In [134]:
import plotly.express as px
fig = px.line(waste_group, x = "week", y = "waste nn", title="Absolute waste of {} per week".format(product))
fig.add_scatter(x=waste_group["week"], y=waste_group["amount"], mode="lines")
fig.add_scatter(x=waste_group["week"], y=waste_group["weekly purchases"], mode="lines")
fig.update_yaxes(title="Waste amount")
fig.show()

In [112]:
purchases_per_day

day
2018-01-01       8
2018-01-02      13
2018-01-03      18
2018-01-04      30
2018-01-05      40
              ... 
2018-12-26    2913
2018-12-27    2918
2018-12-28    2921
2018-12-29    2934
2018-12-30    2943
Name: product_id, Length: 364, dtype: int64

In [71]:
transactions["day"] = pd.to_datetime(transactions["day"])
filter_transactions = transactions[transactions["description"] == product]

In [86]:
daydate = df_date[df_date["day"] == 2]["date"].values[0]

filter_transactions[filter_transactions["day"] <= daydate]


Unnamed: 0,day,time,customer,bank acount,category,product_id,description,size,std_sales_price,purchase_price,bio,basic
89,2018-01-01,10:40:13,6.0,54211938.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
467,2018-01-01,14:21:35,26.0,51433968.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
573,2018-01-01,15:36:46,33.0,30794082.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
700,2018-01-01,16:56:20,39.0,15832817.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
808,2018-01-01,18:15:29,46.0,90399261.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
821,2018-01-01,18:15:29,46.0,90399261.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
824,2018-01-01,18:15:29,46.0,90399261.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
879,2018-01-01,18:46:48,49.0,15382238.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
29093,2018-01-02,12:52:06,1714.0,28719528.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0
29134,2018-01-02,13:44:14,1718.0,39309224.0,vegetable,v_1,Komkommer,1st,0.69,0.69,0.0,0.0


In [111]:
purchases_per_day = filter_transactions.groupby("day").count()["product_id"].cumsum()
purchases_per_day

day
2018-01-01       8
2018-01-02      13
2018-01-03      18
2018-01-04      30
2018-01-05      40
              ... 
2018-12-26    2913
2018-12-27    2918
2018-12-28    2921
2018-12-29    2934
2018-12-30    2943
Name: product_id, Length: 364, dtype: int64

In [52]:
daydate

numpy.datetime64('2018-01-15T00:00:00.000000000')

In [15]:
dayofyear = list(range(365))
date = [datetime.datetime(2018, 1, 1) + datetime.timedelta(day) for day in dayofyear]
df_date = pd.DataFrame({"day":dayofyear, "date":date})