# Concatenation and Pivoting of New Parts Usage

In [1]:
# Import needed Libraries.
from datetime import datetime as dt
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
# %matplotlib inline

In [2]:
# Import the raw data from excel files.  This is going to take a minute or five.
report1 = pd.read_excel('report1.xlsx') # 2015 usage part 1
report2 = pd.read_excel('report2.xlsx') # 2015 usage part 2
report3 = pd.read_excel('report3.xlsx') # 2016 usage part 1

In [3]:
# Now let's combine all these into one table.
new_usage = pd.concat([report1,report2,report3], ignore_index=True)
new_usage.shape

(347905, 4)

In [4]:
# Let's get rid of any row that has nan(blank) as the transactions id. 
new_usage = new_usage.dropna(subset=['Trans'])

In [5]:
# We need to make Trans and Line strings.
new_usage["Trans"] = new_usage["Trans"].astype(int).astype('str')
# new_usage["Line"] = new_usage["Line"].astype(int).astype('str')

In [6]:
# We want to pivot this by week so we need to add a column for week.
# Since we need this ordered by year also we will add a column for year.
# We will throw in month to make me feel better.

new_usage["Week"] = new_usage["Date"].dt.week
new_usage["Month"] = new_usage["Date"].dt.month
new_usage["Year"] = new_usage["Date"].dt.year

In [7]:
new_usage.tail()

Unnamed: 0,Trans,Item Number,Qty Change,Date,Week,Month,Year
347898,25429819,2510847C91,-20.0,2016-06-13,24,6,2016
347899,25429822,1837731C1,-100.0,2016-06-13,24,6,2016
347900,25429823,1837732C1,-100.0,2016-06-13,24,6,2016
347901,25429825,1844474C1,-4.0,2016-06-13,24,6,2016
347902,25429829,1854006C1,-100.0,2016-06-13,24,6,2016


In [8]:
tmp_df = new_usage

In [9]:
# Split the data set up so that we can modify the week number.
week_53 = tmp_df[:][(tmp_df["Year"] == 2016) & (tmp_df["Week"] == 53)]
all_other_2015 = tmp_df[:][(tmp_df["Year"] == 2015)]
all_other_2016 = tmp_df[:][(tmp_df["Year"] == 2016) & (tmp_df["Week"] != 53)]
all_other_2016.head()

Unnamed: 0,Trans,Item Number,Qty Change,Date,Week,Month,Year
240214,24679253,1848172C2OSP1,-2.0,2016-01-04,1,1,2016
240215,24679255,1848172C2OSP1,-2.0,2016-01-04,1,1,2016
240216,24679256,1824383C2,-20.0,2016-01-04,1,1,2016
240217,24679260,1824400C1,-10.0,2016-01-04,1,1,2016
240218,24679263,1817960C1,-80.0,2016-01-04,1,1,2016


In [10]:
# Get rid of all instances of week 53 from 2016.
week_53["Week"] = 1
# Add a week to the rest of the weeks of 2016.
all_other_2016["Week"] = all_other_2016["Week"] + 1
all_other_2016.head()

Unnamed: 0,Trans,Item Number,Qty Change,Date,Week,Month,Year
240214,24679253,1848172C2OSP1,-2.0,2016-01-04,2,1,2016
240215,24679255,1848172C2OSP1,-2.0,2016-01-04,2,1,2016
240216,24679256,1824383C2,-20.0,2016-01-04,2,1,2016
240217,24679260,1824400C1,-10.0,2016-01-04,2,1,2016
240218,24679263,1817960C1,-80.0,2016-01-04,2,1,2016


In [12]:
# Put the data sets back together.
tmp_df = pd.concat([week_53, all_other_2015, all_other_2016])
# Check that we still have the same amount of data as our original data set.
if tmp_df.shape == new_usage.shape:
    new_usage = tmp_df
    print('Success!')
else:
    print('new: {}, original: {}').format(tmp_df.shape, new_usage.shape)

Success!


In [13]:
# Pivot the data grouping the items and displaying the usage by week.
pivoted = pd.pivot_table(tmp_df, values="Qty Change", index="Item Number", columns=["Year", "Week"], aggfunc=np.sum)
pivoted.head()

Year,2015,2015,2015,2015,2015,2015,2015,2015,2015,2015,...,2016,2016,2016,2016,2016,2016,2016,2016,2016,2016
Week,1,2,3,4,5,6,7,8,9,10,...,16,17,18,19,20,21,22,23,24,25
Item Number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0171446,,,,,,,,,,,...,,,,,,,,,,
033-6033,,,,,,,,,,,...,-604.0,-696.0,-600.0,,,-500.0,-916.0,,-489.0,
06032168304,,,,-80.0,,,-40.0,,-40.0,,...,,,,,,,,,,
06032168313,,,,,-60.0,,,,,,...,,,,,,,,,,
06032168333,,,,,-130.0,,,,,,...,,,,,,,,,,


In [19]:
# Save the pivoted data back to an Excel file.
pivoted.to_excel('pivoted_new.xlsx', sheet_name="pivot", na_rep=0)

In [20]:
# Save the core usage to Excel. This has the week 53 in Jan, 2016.
new_usage.to_excel('new_usage.xlsx', na_rep=0, index=False)
# Save the corrected data back to Excel. This does not have week 53 in Jan, 2016.
# tmp_df.to_excel('tmp_df.xlsx', na_rep=0)