In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("sales_data.csv")
data

Unnamed: 0,date,store,item,sales
0,01-01-13,1,1,13
1,02-01-13,1,1,11
2,03-01-13,1,1,14
3,04-01-13,1,1,13
4,05-01-13,1,1,10
...,...,...,...,...
912995,27-12-17,10,50,63
912996,28-12-17,10,50,59
912997,29-12-17,10,50,74
912998,30-12-17,10,50,62


In [3]:
print("There are {:,} transactions in the raw dataset.".format(len(data)))

There are 913,000 transactions in the raw dataset.


#### Format date

In [4]:
print("Start Processing ...")
data['date'] = pd.to_datetime(data['date'])
data['month'] = data['date'].dt.month
data['year'] = data['date'].dt.year
data['month-year'] = data[['month', 'year']].astype(str).apply(lambda t: t['month'].zfill(2) + '-' + t['year'], axis = 1)

Start Processing ...


#### month year columns

In [5]:
my_cols = [str(i).zfill(2) + '-' + str(j) for j in range(2013, 2018) for i in range(1, 13)]

#### pivot by store item

In [6]:
data_store = pd.pivot_table(data, values = 'sales', index = ['store', 'item'],
                           columns = ['month-year'], aggfunc=np.sum).fillna(0)
data_store

Unnamed: 0_level_0,month-year,01-2013,01-2014,01-2015,01-2016,01-2017,02-2013,02-2014,02-2015,02-2016,02-2017,...,11-2013,11-2014,11-2015,11-2016,11-2017,12-2013,12-2014,12-2015,12-2016,12-2017
store,item,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,1,408,459,504,557,556,364,460,441,520,518,...,557,619,695,639,647,421,486,497,560,576
1,2,1060,1231,1322,1502,1441,1028,1317,1305,1411,1472,...,1380,1586,1652,1724,1891,1188,1344,1343,1542,1615
1,3,679,738,823,878,954,680,781,727,848,912,...,874,1004,1073,1092,1056,752,845,898,944,978
1,4,384,439,485,526,585,392,455,467,551,526,...,506,635,611,656,695,432,513,499,582,618
1,5,319,380,425,490,468,309,379,375,466,476,...,425,491,509,547,572,383,419,441,458,503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,46,1390,1592,1621,1832,1794,1403,1541,1534,1717,1739,...,1780,1907,2136,2209,2269,1475,1638,1743,1949,1966
10,47,500,555,648,660,697,454,539,563,695,704,...,660,748,721,831,872,502,601,655,703,746
10,48,1212,1449,1485,1588,1641,1100,1350,1389,1594,1565,...,1492,1704,1822,1894,1944,1306,1454,1472,1686,1709
10,49,684,739,815,864,868,663,746,736,900,862,...,841,1030,1048,1070,1111,717,886,907,936,962


#### Sort by ascending order

In [7]:
data_store.sort_values(['store', 'item'], ascending = [True, True], inplace = True)
print("Processing is completed.")
print("{:,} lines in your final report".format(len(data_store)))

Processing is completed.
500 lines in your final report


#### Reorder columns (order by months then year)

In [8]:
data_store = data_store[my_cols]
data_store

Unnamed: 0_level_0,month-year,01-2013,02-2013,03-2013,04-2013,05-2013,06-2013,07-2013,08-2013,09-2013,10-2013,...,03-2017,04-2017,05-2017,06-2017,07-2017,08-2017,09-2017,10-2017,11-2017,12-2017
store,item,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,1,408,364,513,492,520,562,620,555,512,501,...,621,659,761,795,819,741,699,705,647,576
1,2,1060,1028,1314,1245,1466,1526,1666,1444,1335,1384,...,1684,1908,1989,1988,2177,2020,1887,1751,1891,1615
1,3,679,680,832,825,928,916,1014,937,831,832,...,1049,1165,1383,1226,1416,1253,1130,1124,1056,978
1,4,384,392,465,541,622,562,597,542,495,507,...,635,688,713,720,821,739,646,692,695,618
1,5,319,309,415,411,467,469,478,510,405,411,...,531,621,642,603,648,613,585,562,572,503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,46,1390,1403,1587,1800,1825,1904,1993,1840,1705,1745,...,2116,2345,2515,2488,2736,2497,2259,2238,2269,1966
10,47,500,454,623,585,666,740,708,677,632,679,...,756,880,934,893,1110,1011,819,837,872,746
10,48,1212,1100,1436,1507,1585,1678,1790,1602,1556,1436,...,1904,2072,2152,2141,2391,2191,2000,2037,1944,1709
10,49,684,663,851,857,866,969,1015,864,845,857,...,1029,1194,1226,1205,1382,1290,1124,1152,1111,962


#### Final report


In [9]:
print("Start saving report.")
data_store.to_excel("sales_report.xlsx")
print("Your report is saved.")

Start saving report.
Your report is saved.


### Summary
This script performs the following tasks:
1. Import data
2. Format date
3. Pivot table of sales by month for each pair store, item
4. Sort records by store and item
5. Save final report in an Excel file
---
After all, we will create an executable (.exe) that runs this script and can be used without having Python installed on the computer.

#### Use Pyinstaller 
Save the .py script in the same directory as the initial raw file. 
Once the exe file is built, everyone can use this file along with their raw sales_data.csv as long as they are both in the same directory.<br>

---
### Conclusion
The company's productivity is heighten and people can automate boring tasks. This can be coupled with cool visualization using Seaborn, connections to external APIs to fetch data, a ML model to forecast future sales. 
