# Lokad_Purchase Orders

### Reading CSV file

In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

purchase_data = pd.read_csv('Lokad_PurchaseOrders.csv')
purchase_data

Unnamed: 0,Id,Loc,Ref,DeliveryDate,Quantity,Received,NetAmount,Supplier,PONumber,Date,Currency
0,15641711/Los Angeles,Los Angeles,15641711,6/22/2016,50,50,164.999998,FHL,PO-200193,4/20/2016,USD
1,15641711/Los Angeles,Los Angeles,15641711,7/29/2016,47,47,155.099998,FHL,PO-200243,7/26/2016,USD
2,16075571/Los Angeles,Los Angeles,16075571,6/22/2016,100,100,300.000000,Logipro,PO-200193,4/20/2016,GBP
3,16075571/Los Angeles,Los Angeles,16075571,7/29/2016,300,300,900.000000,Logipro,PO-200243,7/26/2016,GBP
4,16075571/Los Angeles,Los Angeles,16075571,1/1/2001,121,0,363.000000,Logipro,PO-200271,8/24/2016,GBP
5,14759689/Los Angeles,Los Angeles,14759689,1/1/2001,103,0,336.809998,Drecom,PO-200271,8/24/2016,GBP
6,14095459/Los Angeles,Los Angeles,14095459,1/1/2001,100,0,4654.999920,Drecom,PO-200225,8/6/2016,USD
7,15586718/Los Angeles,Los Angeles,15586718,1/1/2001,200,0,11400.000000,Drecom,PO-200225,8/6/2016,GBP
8,10095232/Los Angeles,Los Angeles,10095232,5/8/2015,150,150,4002.000045,Office First,102196,5/27/2015,EUR
9,10095232/Los Angeles,Los Angeles,10095232,6/21/2016,100,100,2668.000030,Office First,PO-200069,11/25/2015,EUR


### Code to verify the Data types for purchase_data

In [2]:
purchase_data.dtypes

Id               object
Loc              object
Ref               int64
DeliveryDate     object
Quantity          int64
Received          int64
NetAmount       float64
Supplier         object
PONumber         object
Date             object
Currency         object
dtype: object

## Supply Delay

In [3]:
purchase_data['Date'] = pd.to_datetime(purchase_data['Date'])
purchase_data['DeliveryDate'] = pd.to_datetime(purchase_data['DeliveryDate'])

purchase_data.dtypes

Id                      object
Loc                     object
Ref                      int64
DeliveryDate    datetime64[ns]
Quantity                 int64
Received                 int64
NetAmount              float64
Supplier                object
PONumber                object
Date            datetime64[ns]
Currency                object
dtype: object

### Lead_Time = SupplyDelay + Constant(Reorder delay)
### Reorder delay is kept constant as it depends on the business agreement with the supplier. For our case right now taken reorder as zero

In [4]:
## supply_delay: is the delay in the delivery of the consignment(calculated in days)
## For the supply delay calculation we are going to calculate the difference between the Date column of Lokad_PurchaseOrders and 
## DeliveryDate of the same 
## Date: defines the date when the order was place
## DeliveryDate: defines when the order was delivered to the store.

import math

supply_delay = abs(purchase_data['DeliveryDate'] - purchase_data['Date'])

#print("Supply Delay: {}\n", format(purchase_data['supply_delay'].apply(np.int64)))

In [5]:
new_column = pd.DataFrame({"SupplyDelay": supply_delay})
purchase_data = purchase_data.merge(new_column, left_index = True, right_index = True)
#purchase_data = purchase_data.to_csv(SupplyDelay)
purchase_data.to_csv('Lokad_PurchaseOrders.csv')
purchase_data

Unnamed: 0,Id,Loc,Ref,DeliveryDate,Quantity,Received,NetAmount,Supplier,PONumber,Date,Currency,SupplyDelay
0,15641711/Los Angeles,Los Angeles,15641711,2016-06-22,50,50,164.999998,FHL,PO-200193,2016-04-20,USD,63 days
1,15641711/Los Angeles,Los Angeles,15641711,2016-07-29,47,47,155.099998,FHL,PO-200243,2016-07-26,USD,3 days
2,16075571/Los Angeles,Los Angeles,16075571,2016-06-22,100,100,300.000000,Logipro,PO-200193,2016-04-20,GBP,63 days
3,16075571/Los Angeles,Los Angeles,16075571,2016-07-29,300,300,900.000000,Logipro,PO-200243,2016-07-26,GBP,3 days
4,16075571/Los Angeles,Los Angeles,16075571,2001-01-01,121,0,363.000000,Logipro,PO-200271,2016-08-24,GBP,5714 days
5,14759689/Los Angeles,Los Angeles,14759689,2001-01-01,103,0,336.809998,Drecom,PO-200271,2016-08-24,GBP,5714 days
6,14095459/Los Angeles,Los Angeles,14095459,2001-01-01,100,0,4654.999920,Drecom,PO-200225,2016-08-06,USD,5696 days
7,15586718/Los Angeles,Los Angeles,15586718,2001-01-01,200,0,11400.000000,Drecom,PO-200225,2016-08-06,GBP,5696 days
8,10095232/Los Angeles,Los Angeles,10095232,2015-05-08,150,150,4002.000045,Office First,102196,2015-05-27,EUR,19 days
9,10095232/Los Angeles,Los Angeles,10095232,2016-06-21,100,100,2668.000030,Office First,PO-200069,2015-11-25,EUR,209 days


## Average Supply delay

In [6]:
## The average lead time calculates the av_lead time for the paticular products for all its instances

In [7]:
purchase_data['SupplyDelay'] = (purchase_data['SupplyDelay'] / np.timedelta64(1, 'D')).astype(int)
purchase_data.dtypes


Id                      object
Loc                     object
Ref                      int64
DeliveryDate    datetime64[ns]
Quantity                 int64
Received                 int64
NetAmount              float64
Supplier                object
PONumber                object
Date            datetime64[ns]
Currency                object
SupplyDelay              int32
dtype: object

In [8]:
purchase_data

Unnamed: 0,Id,Loc,Ref,DeliveryDate,Quantity,Received,NetAmount,Supplier,PONumber,Date,Currency,SupplyDelay
0,15641711/Los Angeles,Los Angeles,15641711,2016-06-22,50,50,164.999998,FHL,PO-200193,2016-04-20,USD,63
1,15641711/Los Angeles,Los Angeles,15641711,2016-07-29,47,47,155.099998,FHL,PO-200243,2016-07-26,USD,3
2,16075571/Los Angeles,Los Angeles,16075571,2016-06-22,100,100,300.000000,Logipro,PO-200193,2016-04-20,GBP,63
3,16075571/Los Angeles,Los Angeles,16075571,2016-07-29,300,300,900.000000,Logipro,PO-200243,2016-07-26,GBP,3
4,16075571/Los Angeles,Los Angeles,16075571,2001-01-01,121,0,363.000000,Logipro,PO-200271,2016-08-24,GBP,5714
5,14759689/Los Angeles,Los Angeles,14759689,2001-01-01,103,0,336.809998,Drecom,PO-200271,2016-08-24,GBP,5714
6,14095459/Los Angeles,Los Angeles,14095459,2001-01-01,100,0,4654.999920,Drecom,PO-200225,2016-08-06,USD,5696
7,15586718/Los Angeles,Los Angeles,15586718,2001-01-01,200,0,11400.000000,Drecom,PO-200225,2016-08-06,GBP,5696
8,10095232/Los Angeles,Los Angeles,10095232,2015-05-08,150,150,4002.000045,Office First,102196,2015-05-27,EUR,19
9,10095232/Los Angeles,Los Angeles,10095232,2016-06-21,100,100,2668.000030,Office First,PO-200069,2015-11-25,EUR,209


In [9]:
avg_supp = purchase_data.groupby('Ref', as_index = True)['SupplyDelay'].mean().astype(int)

In [10]:
#print(avg_supp)

In [11]:
colnames=['Ref', 'AvgSupplyDelay'] 
avg_supp.to_csv('Lokad_AverageSupplyDelay.csv')

data = pd.read_csv('Lokad_AverageSupplyDelay.csv', names = colnames, header = None)

data

Unnamed: 0,Ref,AvgSupplyDelay
0,9952426,58
1,10014552,1421
2,10095232,144
3,10099950,2213
4,10132892,669
5,10136626,885
6,10210677,1049
7,10255812,886
8,10422793,85
9,10592438,783


In [12]:
new_purchase_data = purchase_data.merge(data, on = 'Ref', right_index = True)

new_purchase_data

Unnamed: 0,Id,Loc,Ref,DeliveryDate,Quantity,Received,NetAmount,Supplier,PONumber,Date,Currency,SupplyDelay,AvgSupplyDelay
0,15641711/Los Angeles,Los Angeles,15641711,2016-06-22,50,50,164.999998,FHL,PO-200193,2016-04-20,USD,63,2277
1,15641711/Los Angeles,Los Angeles,15641711,2016-07-29,47,47,155.099998,FHL,PO-200243,2016-07-26,USD,3,2277
138,15641711/New-York,New-York,15641711,2016-12-04,800,800,2639.999960,FHL,PO-200121,2016-01-25,USD,314,2277
139,15641711/New-York,New-York,15641711,2001-01-01,100,0,329.999995,FHL,PO-200123,2016-01-26,USD,5503,2277
765,15641711/Chicago,Chicago,15641711,2001-01-01,100,0,329.999995,FHL,PO-200122,2016-01-26,USD,5503,2277
2,16075571/Los Angeles,Los Angeles,16075571,2016-06-22,100,100,300.000000,Logipro,PO-200193,2016-04-20,GBP,63,886
3,16075571/Los Angeles,Los Angeles,16075571,2016-07-29,300,300,900.000000,Logipro,PO-200243,2016-07-26,GBP,3,886
4,16075571/Los Angeles,Los Angeles,16075571,2001-01-01,121,0,363.000000,Logipro,PO-200271,2016-08-24,GBP,5714,886
140,16075571/New-York,New-York,16075571,2016-06-22,200,200,600.000000,Logipro,PO-200131,2016-09-02,GBP,72,886
141,16075571/New-York,New-York,16075571,2016-01-08,2400,2300,7200.000000,Logipro,PO-200240,2016-07-19,GBP,193,886


### DecisionTreeForecasting

In [13]:
## Forecasting the Leadtime for the product

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

X = new_purchase_data[['Date', 'DeliveryDate']]

y = new_purchase_data['AvgSupplyDelay']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

dt = DecisionTreeClassifier().fit(X_train, y_train)
# dt.score(X_train,y_train) * 100

In [14]:
pred = dt.predict(X_test)
print("Accuracy Score: ", accuracy_score(pred, y_test) * 100)

Accuracy Score:  4.08921933086


### Feature imporatnce

In [15]:
# Feature imporatnce using DecisionTreelassifier
print(dt.feature_importances_)

[ 0.60457215  0.39542785]


### Forecasting using EtraTreeClassifier

In [16]:

#from sklearn import datasets
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
# load the iris datasets
X = new_purchase_data[['Date', 'DeliveryDate']]

y = new_purchase_data['AvgSupplyDelay']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)
# fit an Extra Trees model to the data
model = ExtraTreesClassifier()
model.fit(X, y)
# display the relative importance of each attribute
print(model.feature_importances_)

[ 0.59455141  0.40544859]


In [None]:
model.score(X_test, y_test)

0.21189591078066913

In [None]:
# Svm forecasting

from sklearn.svm import SVC

#Load data from the Dataframe

X = new_purchase_data[['Date', 'DeliveryDate']]

y = new_purchase_data['AvgSupplyDelay']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0)

#fit the SVM model to the data
clf = SVC(C = 10, kernel = 'linear').fit(X, y)
# print the score for the same
clf.score(X_test, y_test)