# Let's get this party started

In [1]:
#Dependencies
import os
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
import re

# 1. Framing the Problem & Big Picture look (see readme file)

# 2. Get the Data

In [2]:
#set path to datasets
path = os.getcwd()
os.chdir("../datasets/")
filename = "sampledata.xlsx"
ser_path = "./serialized/"

In [3]:
#Importing company datasets
#prod_hist = pd.read_excel(filename, sheet_name="production_hist")
#ship_hist = pd.read_excel(filename, sheet_name="shipment_hist")
#atp_hist = pd.read_excel(filename, sheet_name="atp_hist")
#inventory = pd.read_excel(filename, sheet_name="inventory")

In [4]:
#setting file names
# data = [inventory, atp_hist, prod_hist, ship_hist]
# fnames = ["inventory", "atp_hist", "prod_hist","ship_hist"]
# count = 0
# for df in data:
#     df.name = fnames[count]
#     count+=1

In [5]:
#saving dataframes into pickle files
# for df in data:
#     pklfile = df.name + ".pkl"
#     df.to_pickle(ser_path + pklfile)

#saving dataframes into feather (not working)
for df in data:
    feafile = df.name
    df.to_feather(ser_path+feafile)

In [6]:
#see pklfile name order
pklfiles = os.listdir("./serialized")
pklfiles

['ship_hist.pkl', 'inventory.pkl', 'atp_hist.pkl', 'prod_hist.pkl']

In [7]:
#Fast Upload using pickle files
inventory = pd.read_pickle(ser_path+pklfiles[1])
atp_hist = pd.read_pickle(ser_path+pklfiles[2])
prod_hist = pd.read_pickle(ser_path+pklfiles[3])
ship_hist = pd.read_pickle(ser_path+pklfiles[0])

## Cleaning Up

In [8]:
def clean_columns(dataframe):
    for col in dataframe.columns:
        dataframe.rename(columns={col:re.sub(r'([a-z](?=[A-Z])|[A-Z](?=[A-Z][a-z]))', r'\1 ', col)}, inplace=True)
    dataframe.columns = dataframe.columns.str.strip().str.lower().str.replace(" ","_")
    try:
        dataframe["calendar_day"] = dataframe["calendar_day"].astype("datetime64")
    except:
        pass
    try:
        dataframe["snapshot_date"] = dataframe["snapshot_date"].astype("datetime64")
    finally:
        return dataframe.columns

In [9]:
# attempt to get rid of nulls, need to be more specific with fills to not loose valuable insight. 
# data = [inventory, atp_hist, prod_hist, ship_hist]
# for df in data:
#     df = df.fillna(0, inplace=True)

### Inventory

In [10]:
clean_columns(inventory)

Index(['calendar_day', 'plant', 'material', 'maktx', 'ph3', 'ph5', 'ph',
       'material_type', 'tons', 'shape', 'size_for_qual_cert', 'strgr',
       'block_resource', 'work_center', 'atp'],
      dtype='object')

In [11]:
inventory.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 850873 entries, 0 to 850872
Data columns (total 15 columns):
calendar_day          850873 non-null datetime64[ns]
plant                 850873 non-null int64
material              850873 non-null int64
maktx                 850873 non-null object
ph3                   850873 non-null object
ph5                   850873 non-null object
ph                    850873 non-null object
material_type         850873 non-null object
tons                  850873 non-null float64
shape                 848401 non-null object
size_for_qual_cert    848401 non-null object
strgr                 850873 non-null int64
block_resource        626217 non-null object
work_center           850866 non-null float64
atp                   175364 non-null float64
dtypes: datetime64[ns](1), float64(3), int64(3), object(8)
memory usage: 97.4+ MB


In [12]:
#Filling in empty atp rows with zero
inventory["atp"].fillna(0, inplace=True)

In [13]:
# inventory["work_center"] = inventory["work_center"].astype("int64")

In [14]:
inventory = inventory.loc[inventory["material_type"]=="ZERT"]
inventory["material_type"].unique()

array(['ZERT'], dtype=object)

In [15]:
inventory_reduced = inventory[["calendar_day","material","maktx", "ph5", "shape", "size_for_qual_cert", "block_resource", "tons", "atp"]]
inventory_reduced.rename(columns={"material":"sku", "maktx":"material_description", "size_for_qual_cert":"size"}, inplace=True)
inventory_reduced.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,calendar_day,sku,material_description,ph5,shape,size,block_resource,tons,atp
0,2020-02-23,106022474,"L 2X2X1/4 A36/44W/A529-50 20'00""",LS-Mills-Mer-Mer-Lght Shp,SHAPE_L,2X2X1/4,"2"" ANG",0.009,0.0
1,2020-02-20,106022474,"L 2X2X1/4 A36/44W/A529-50 20'00""",LS-Mills-Mer-Mer-Lght Shp,SHAPE_L,2X2X1/4,"2"" ANG",0.009,0.0
2,2020-02-19,106022474,"L 2X2X1/4 A36/44W/A529-50 20'00""",LS-Mills-Mer-Mer-Lght Shp,SHAPE_L,2X2X1/4,"2"" ANG",0.009,0.0
3,2020-02-18,106022474,"L 2X2X1/4 A36/44W/A529-50 20'00""",LS-Mills-Mer-Mer-Lght Shp,SHAPE_L,2X2X1/4,"2"" ANG",0.009,0.0
4,2020-02-17,106022474,"L 2X2X1/4 A36/44W/A529-50 20'00""",LS-Mills-Mer-Mer-Lght Shp,SHAPE_L,2X2X1/4,"2"" ANG",0.009,0.0


In [16]:
inventory_reduced.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 649986 entries, 0 to 850872
Data columns (total 9 columns):
calendar_day            649986 non-null datetime64[ns]
sku                     649986 non-null int64
material_description    649986 non-null object
ph5                     649986 non-null object
shape                   647514 non-null object
size                    647514 non-null object
block_resource          626217 non-null object
tons                    649986 non-null float64
atp                     649986 non-null float64
dtypes: datetime64[ns](1), float64(2), int64(1), object(5)
memory usage: 49.6+ MB


In [17]:
inventory_reduced[inventory_reduced.isna().any(axis=1)]

Unnamed: 0,calendar_day,sku,material_description,ph5,shape,size,block_resource,tons,atp
3819,2020-02-23,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3820,2020-02-20,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3821,2020-02-19,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3822,2020-02-18,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3823,2020-02-17,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3824,2020-02-16,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3825,2020-02-13,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3826,2020-02-12,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3827,2020-02-11,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0
3828,2020-02-10,109004211,"PR 3/8"" 1018 20'00"" CD",LS-Mills-Mer-Mer-MBQ,PENCIL_ROD,"3/8""",,-0.002,0.0


### ATP History

In [18]:
clean_columns(atp_hist)

Index(['plant', 'plant_description', 'sku', 'atp_floor', 'snapshot_date'], dtype='object')

In [19]:
atp_hist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 235706 entries, 0 to 235705
Data columns (total 5 columns):
plant                235706 non-null int64
plant_description    235706 non-null object
sku                  235706 non-null object
atp_floor            235706 non-null float64
snapshot_date        235706 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(1), int64(1), object(2)
memory usage: 9.0+ MB


In [20]:
atp_reduced = atp_hist[["sku", "atp_floor", "snapshot_date"]]
atp_reduced.head()

Unnamed: 0,sku,atp_floor,snapshot_date
0,102003353,2.0,2019-11-01
1,102003353,2.0,2019-10-17
2,102003353,2.0,2019-10-16
3,102003353,2.0,2019-10-15
4,102003353,2.0,2019-10-14


### Production History

In [21]:
clean_columns(prod_hist)

Index(['calendar_day', 'plant', 'plant_description', 'production_process',
       'production_process_description', 'work_center', 'wc_description',
       'material', 'material_description', 'ph3', 'ph5', 'ton', 'block',
       'shape', 'size', 'stratedgy', 'month/year', 'year', 'monthyear'],
      dtype='object')

In [22]:
prod_hist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41935 entries, 0 to 41934
Data columns (total 19 columns):
calendar_day                      41935 non-null datetime64[ns]
plant                             41935 non-null int64
plant_description                 41935 non-null object
production_process                41935 non-null int64
production_process_description    41935 non-null object
work_center                       37833 non-null object
wc_description                    41935 non-null object
material                          41935 non-null int64
material_description              41935 non-null object
ph3                               36579 non-null object
ph5                               36579 non-null object
ton                               41935 non-null int64
block                             14851 non-null object
shape                             36115 non-null object
size                              36115 non-null object
stratedgy                         36129 non-nul

In [23]:
# prod_hist["stratedgy"] = prod_hist["stratedgy"].astype("int64")

In [24]:
prod_reduced = prod_hist[["calendar_day", "production_process", "production_process_description","stratedgy","work_center","wc_description","block","shape","size","material","material_description","ton"]]
prod_reduced.rename(columns={"material":"sku", "stratedgy":"strategy"}, inplace=True)
prod_reduced.head()

Unnamed: 0,calendar_day,production_process,production_process_description,strategy,work_center,wc_description,block,shape,size,sku,material_description,ton
0,2013-02-06,4310,Rolling,63.0,25000001,ROLLING - STRUCTURAL,"4"" ANG",SHAPE_L,4X4X3/8,106023315,"L 4X4X3/8 44W 60'00""",40
1,2013-02-06,4310,Rolling,63.0,25000001,ROLLING - STRUCTURAL,"4"" ANG",SHAPE_L,4X4X3/8,106022771,"L 4X4X3/8 A36/44W/A529-50 40'00""",390
2,2013-02-06,4310,Rolling,63.0,25000001,ROLLING - STRUCTURAL,"4"" ANG",SHAPE_L,4X4X3/8,106022760,"L 4X4X3/8 A36/44W/A529-50 20'00""",345
3,2013-02-06,4310,Rolling,63.0,25000001,ROLLING - STRUCTURAL,"4"" ANG",SHAPE_L,4X4X3/8,106022755,"L 4X4X3/8 55W 60'00""",205
4,2013-02-06,4310,Rolling,63.0,25000001,ROLLING - STRUCTURAL,"4"" ANG",SHAPE_L,4X4X3/8,106022749,"L 4X4X3/8 50W 60'00""",32


In [25]:
#Removing strategy 10 (Note: check assumption that these are all billets)
prod_reduced = prod_reduced.loc[(prod_reduced["strategy"] !=10)]

In [26]:
#Removing prod_processes up the supply chain: melt, scrap etc.
prod_reduced = prod_reduced.loc[(prod_reduced["production_process_description"] != "Melt shop") & (prod_reduced["production_process_description"] != "Scrap yard")]
prod_reduced = prod_reduced.loc[(prod_reduced["production_process_description"] != "Finishing production")]

In [27]:
#Identifying strategy=NaN rows
prod_reduced[prod_reduced["strategy"].isna()]

Unnamed: 0,calendar_day,production_process,production_process_description,strategy,work_center,wc_description,block,shape,size,sku,material_description,ton
36154,2015-02-27,4310,Rolling,,25000002,ROLLING - BAR,,,,40000148,COPROD - FG YIELD LOSS (ROLLING MILL),10
36155,2016-12-29,4310,Rolling,,25000002,ROLLING - BAR,,,,40000148,COPROD - FG YIELD LOSS (ROLLING MILL),-10


In [28]:
#Identifying size=NaN rows
#prod_reduced[prod_reduced["size"].isna()]

In [29]:
#Dropping selective NaN rows
prod_reduced.dropna(subset=["strategy"], inplace=True)

In [30]:
#Identifying unique inputs by column
unique_counts = pd.DataFrame.from_records([(col, prod_reduced[col].nunique()) for col in prod_reduced.columns],
                          columns=['Column_Name', 'Num_Unique']).sort_values(by=['Num_Unique'])
unique_counts

Unnamed: 0,Column_Name,Num_Unique
1,production_process,1
2,production_process_description,1
3,strategy,2
4,work_center,4
5,wc_description,4
7,shape,6
6,block,41
8,size,175
11,ton,1299
9,sku,1862


In [31]:
# Changing data types for colums, 
for col in prod_reduced.columns:
    if prod_reduced[col].nunique() < 10:
        prod_reduced[col] = prod_reduced[col].astype("category")
# not including "block" as it used as object elsewhere

prod_reduced["sku"] =prod_reduced["sku"].astype("object")

In [32]:
prod_reduced.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14820 entries, 0 to 36135
Data columns (total 12 columns):
calendar_day                      14820 non-null datetime64[ns]
production_process                14820 non-null category
production_process_description    14820 non-null category
strategy                          14820 non-null category
work_center                       13450 non-null category
wc_description                    14820 non-null category
block                             14815 non-null object
shape                             14795 non-null category
size                              14795 non-null object
sku                               14820 non-null object
material_description              14820 non-null object
ton                               14820 non-null int64
dtypes: category(6), datetime64[ns](1), int64(1), object(4)
memory usage: 898.1+ KB


In [33]:
prod_reduced[prod_reduced["block"].isna()]["production_process_description"]

35617    Rolling
35618    Rolling
35631    Rolling
35632    Rolling
36135    Rolling
Name: production_process_description, dtype: category
Categories (1, object): [Rolling]

In [34]:
prod_reduced[prod_reduced["block"].isna()].groupby("production_process_description").count()

Unnamed: 0_level_0,calendar_day,production_process,strategy,work_center,wc_description,block,shape,size,sku,material_description,ton
production_process_description,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Rolling,5,5,5,3,5,0,5,5,5,5,5


### Ship History

In [35]:
clean_columns(ship_hist)

Index(['shipping_day', 'plant', 'plant_description', 'sales_organization',
       'sales_doc_type', 'outside_rep_ship-to', 'inside_rep_ship-to',
       'sold-to_party', 'sold-to_part_name', 'ship-to_party',
       'ship-to_party_name', 'postal_code', 'location', 'region',
       'region_description', 'country', 'country_description', 'material',
       'material_description', 'div_id', 'div', 'prod_hier_level_3',
       'prod_hier_level_5', 'shipment_tons', 'manager', 'segment',
       'plantcountry', 'businessdaycounter', 'block', 'lvorm', 'mrptype',
       'proctype', 'relativemonth', 'shipping_week', 'strgr', 'shape',
       'sizeforqualcert', 'dailyshiprate3', 'monthdelta', 'dailyshiprate6',
       'dailyshiprate12', 'workcenter', 'wc_desc', 'bayou_flag'],
      dtype='object')

In [36]:
ship_hist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156796 entries, 0 to 156795
Data columns (total 44 columns):
shipping_day            156796 non-null datetime64[ns]
plant                   156796 non-null int64
plant_description       156796 non-null object
sales_organization      156796 non-null object
sales_doc_type          156796 non-null object
outside_rep_ship-to     156796 non-null object
inside_rep_ship-to      156796 non-null object
sold-to_party           156796 non-null int64
sold-to_part_name       156796 non-null object
ship-to_party           156796 non-null int64
ship-to_party_name      156796 non-null object
postal_code             156796 non-null object
location                156796 non-null object
region                  156796 non-null object
region_description      156796 non-null object
country                 156796 non-null object
country_description     156796 non-null object
material                156796 non-null int64
material_description    156796 non-null

In [37]:
#Filling in NaN Shipment field rows to zero
ship_hist["shipment_tons"].fillna(0, inplace=True)

In [38]:
# Attempt to get correct data types, needs work.
# ship_hist["businessdaycounter"] = ship_hist["businessdaycounter"].astype("int64")
# ship_hist["strgr"] = ship_hist["strgr"].astype("int64")
# ship_hist["workcenter"] = ship_hist["workcenter"].astype("int64")

In [39]:
#Reducing main data frame
ship_reduced = ship_hist[["shipping_day", "block", "strgr", "shape", "sizeforqualcert", "div", "material", "material_description","shipment_tons", "segment", "sold-to_party","ship-to_party", "postal_code","region_description", "country"]]
ship_reduced.rename(columns={"material":"sku", "strgr":"strategy","sizeforqualcert":"size"}, inplace=True)
ship_reduced.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


Unnamed: 0,shipping_day,block,strategy,shape,size,div,sku,material_description,shipment_tons,segment,sold-to_party,ship-to_party,postal_code,region_description,country
0,2020-01-29,"8"" CHN",40.0,SHAPE_C,8 X 11.5#,Merchants,106021716,"C 8X11.5# GGMULTI 40'00""",0.0,NA - DISTRIBUTION,100092779,100099953,72301-7103,Arkansas,US
1,2020-01-07,"8"" CHN",40.0,SHAPE_C,8 X 11.5#,Merchants,106021716,"C 8X11.5# GGMULTI 40'00""",0.0,NA - DISTRIBUTION,100092779,100099953,72301-7103,Arkansas,US
2,2020-01-24,"8"" CHN",40.0,SHAPE_C,8 X 11.5#,Merchants,106021716,"C 8X11.5# GGMULTI 40'00""",0.0,NA - DISTRIBUTION,100092318,100099655,37218-3813,Tennessee,US
3,2020-01-29,"3 X 2"" ANG",40.0,SHAPE_L,3X2X1/2,Merchants,106022002,"L 3X2X1/2 GGMULTI 40'00""",0.0,NA - DISTRIBUTION,100092779,100099953,72301-7103,Arkansas,US
4,2020-01-27,"12"" CHN",40.0,SHAPE_C,12 X 20.7#,Merchants,106020260,"C 12X20.7# GGMULTI 40'00""",0.0,NA - DISTRIBUTION,100092309,100095766,35954,Alabama,US


In [40]:
#Check unique Divisions
ship_hist["div"].unique()

array(['Merchants', 'Rebar', 'Billets', 'SBQ Bar', 'Scrap'], dtype=object)

In [41]:
#Removing billets and scrap from dataframe
ship_reduced = ship_reduced.loc[(ship_reduced["div"] != "Billets") & (ship_reduced["div"] != "Scrap")]

In [42]:
ship_reduced.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 156711 entries, 0 to 156795
Data columns (total 15 columns):
shipping_day            156711 non-null datetime64[ns]
block                   153079 non-null object
strategy                156700 non-null float64
shape                   156700 non-null object
size                    156700 non-null object
div                     156711 non-null object
sku                     156711 non-null int64
material_description    156711 non-null object
shipment_tons           156711 non-null float64
segment                 154025 non-null object
sold-to_party           156711 non-null int64
ship-to_party           156711 non-null int64
postal_code             156711 non-null object
region_description      156711 non-null object
country                 156711 non-null object
dtypes: datetime64[ns](1), float64(2), int64(3), object(9)
memory usage: 19.1+ MB


In [43]:
#Show Sample of NaN fields
ship_reduced[ship_reduced.isna().any(axis=1)]

Unnamed: 0,shipping_day,block,strategy,shape,size,div,sku,material_description,shipment_tons,segment,sold-to_party,ship-to_party,postal_code,region_description,country
91,2020-01-28,"2-1/2"" ANG",40.0,SHAPE_L,2 1/2X2 1/2X1/4,Merchants,106028782,"L 2 1/2X2 1/2X1/4 GGMULTI 20'00""",0.0,,100051158,100051158,77029,Texas,US
152,2019-10-29,"2-1/2"" ANG",40.0,SHAPE_L,2 1/2X2 1/2X1/4,Merchants,106028782,"L 2 1/2X2 1/2X1/4 GGMULTI 20'00""",0.0,,100051158,100051158,77029,Texas,US
314,2020-01-06,,40.0,FLAT,1/4 X 5,Merchants,109005365,"F 1/4X5 GGMULTI 20'00""",0.0,NA - DISTRIBUTION,100091890,100094606,N2R 1E6,Ontario,CA
430,2020-02-04,"8"" FLT",40.0,FLAT,1/2 X 8,Merchants,109008964,"F 1/2X8 GGMULTI 20'00""",0.0,,100092155,100478827,S6V 5P9,Saskatchewan,CA
431,2020-02-04,"2"" ANG",40.0,SHAPE_L,2X2X1/8,Merchants,106029077,"L 2X2X1/8 GGMULTI 20'00""",0.0,,100092155,100478827,S6V 5P9,Saskatchewan,CA
432,2020-02-04,"2"" ANG",40.0,SHAPE_L,2X2X1/4,Merchants,106029023,"L 2X2X1/4 GGMULTI 20'00""",0.0,,100092155,100478827,S6V 5P9,Saskatchewan,CA
433,2020-02-19,"6"" FLT",40.0,FLAT,1/4 X 6,Merchants,109005464,"F 1/4X6 GGMULTI 20'00""",0.0,,100092155,100104016,T1H 6K3,Alberta,CA
434,2020-02-19,"2"" ANG",40.0,SHAPE_L,2X2X3/16,Merchants,106029053,"L 2X2X3/16 GGMULTI 20'00""",0.0,,100092155,100104016,T1H 6K3,Alberta,CA
435,2020-02-14,"2"" ANG",40.0,SHAPE_L,2X2X1/4,Merchants,106029025,"L 2X2X1/4 GGMULTI 40'00""",0.0,,100092155,100477013,T9E 0R8,Alberta,CA
436,2020-02-14,TRN,40.0,FLAT,1/4 X 8,Merchants,109005373,"F 1/4X8 GGMULTI 20'00""",0.0,,100092155,100095353,T2C 4W3,Alberta,CA


In [44]:
#Show the shape that show as null within the block
ship_reduced[ship_reduced["block"].isna()].groupby("shape").count()

Unnamed: 0_level_0,shipping_day,block,strategy,size,div,sku,material_description,shipment_tons,segment,sold-to_party,ship-to_party,postal_code,region_description,country
shape,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
ASDRAWING,2,0,2,2,2,2,2,2,2,2,2,2,2,2
FLAT,2457,0,2457,2457,2457,2457,2457,2457,2249,2457,2457,2457,2457,2457
PENCIL_ROD,81,0,81,81,81,81,81,81,81,81,81,81,81,81
ROUND,246,0,246,246,246,246,246,246,216,246,246,246,246,246
SHAPE_C,237,0,237,237,237,237,237,237,231,237,237,237,237,237
SHAPE_L,415,0,415,415,415,415,415,415,384,415,415,415,415,415
SHAPE_MC,31,0,31,31,31,31,31,31,31,31,31,31,31,31
SQUARE,152,0,152,152,152,152,152,152,139,152,152,152,152,152


### Cleaned Reduced Datasets

In [45]:
data_reduced = [ship_reduced, inventory_reduced, atp_reduced, prod_reduced]


In [46]:
#storing cleaned Prod
%store prod_reduced

#storing cleaned Ship
%store ship_reduced

#storing cleaned Inventory
%store inventory_reduced

#storing cleaned ATP
%store atp_reduced

Stored 'prod_reduced' (DataFrame)
Stored 'ship_reduced' (DataFrame)
Stored 'inventory_reduced' (DataFrame)
Stored 'atp_reduced' (DataFrame)


## Add Sales Order Data

# 3. Exploratory Data Analysis