# Using Python for Data Manipulation

In [None]:
# Import needed Libraries.
from datetime import datetime as dt
import numpy as np
import pandas as pd

In [None]:
# Import the raw data from excel files.  This is going to take a minute or five.
report1 = pd.read_excel('report1.xlsx') # 2015 usage part 1
report2 = pd.read_excel('report2.xlsx') # 2015 usage part 2
report3 = pd.read_excel('report3.xlsx') # 2016 usage part 1

In [None]:
# Let's Look at one of these dataframes
# First the top
report1.head()

In [None]:
# Now the bottom
report1.tail()

In [None]:
# Now let's combine all these into one table.
new_usage = pd.concat([report1,report2,report3], ignore_index=True)
new_usage.shape

In [None]:
# Let's get rid of any row that has nan(blank) as the transactions id. 
new_usage = new_usage.dropna(subset=['Trans'])

In [None]:
new_usage.tail()

In [None]:
new_usage.shape

In [None]:
# We need to make Trans a string.
new_usage["Trans"] = new_usage["Trans"].astype(int).astype('str')

In [None]:
# Let's check the types.
new_usage.dtypes

In [None]:
# We want to pivot this by week so we need to add a column for week.
# Since we need this ordered by year also we will add a column for year.
# We will throw in month to make me feel better.

new_usage["Week"] = new_usage["Date"].dt.week
new_usage["Month"] = new_usage["Date"].dt.month
new_usage["Year"] = new_usage["Date"].dt.year

In [None]:
new_usage.tail()

In [None]:
# Whenever I do something scary I create a new dataframe.
tmp_df = new_usage.copy()

In [None]:
# Split the data set up so that we can modify the week number.
# Currently the week number reflects the ISO Week number. This can be confusing to people.
# I am going to fix it.
week_53 = tmp_df[:][(tmp_df["Year"] == 2016) & (tmp_df["Week"] == 53)]
all_other_2015 = tmp_df[:][(tmp_df["Year"] == 2015)]
all_other_2016 = tmp_df[:][(tmp_df["Year"] == 2016) & (tmp_df["Week"] != 53)]
week_53.head()

In [None]:
# Get rid of all instances of week 53 from 2016.
week_53["Week"] = 1
# Add a week to the rest of the weeks of 2016.
all_other_2016["Week"] = all_other_2016["Week"] + 1
week_53.head()

In [None]:
# Put the data sets back together.
tmp_df = pd.concat([week_53, all_other_2015, all_other_2016])
# Check that we still have the same amount of data as our original data set.
if tmp_df.shape == new_usage.shape:
    new_usage = tmp_df
    print('Success!')
else:
    print('new: {}, original: {}').format(tmp_df.shape, new_usage.shape)

In [None]:
# Pivot the data grouping the items and displaying the usage by week.
pivoted = pd.pivot_table(tmp_df, values="Qty_Change", index="Item_Number", columns=["Year", "Week"], aggfunc=np.sum)
pivoted.head()

In [None]:
# Save the pivoted data back to an Excel file.
pivoted.to_excel('pivoted_new.xlsx', sheet_name="pivot", na_rep=0)

In [None]:
# Save the core usage to Excel. This has the week 53 in Jan, 2016.
new_usage.to_excel('new_usage.xlsx', na_rep=0, index=False)
# Save the corrected data back to Excel. This does not have week 53 in Jan, 2016.
# tmp_df.to_excel('tmp_df.xlsx', na_rep=0)