In [1]:
import pandas as pd
from datetime import timedelta, datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

import warnings
warnings.filterwarnings("ignore")

import acquire 
import prepare

In [2]:
# plotting defaults
plt.rc('figure', figsize=(13, 7))
plt.style.use('seaborn-whitegrid')
plt.rc('font', size=16)


### `strfrtime` Format Cheat Sheet
| Units   | Specifier   | Description                                                    |
|:--------|:------------|:---------------------------------------------------------------|
| seconds | %S          | Second of the minute (00..60)                                  |
| minutes | %M          | Minute of the hour (00..59)                                    |
| hours   | %H          | Hour of the day, 24-hour clock (00..23)                        |
|         | %I          | Hour of the day, 12-hour clock (01..12)                        |
| days    | %d          | Day of the month                                               |
|         | %a          | The abbreviated weekday name ("Sun")                           |
|         | %A          | The full weekday name ("Sunday")                               |
|         | %j          | Day of the year (001..366)                                     |
|         | %w          | Day of the week, Sunday is 0 (0..6)                            |
| weeks   | %U          | Week of the year, Sunday is the first day of the week (00..53) |
|         | %W          | Week of the year, Monday is the first day of the week (00..53) |
| months  | %b          | The abbreviated month name ("Jan")                             |
|         | %B          | The full month name ("January")                                |
|         | %d          | Day of the month (01..31)                                      |
|         | %m          | Month of the year (01..12)                                     |
| years   | %y          | Year without a century (00..99)                                |
|         | %Y          | Year with century (1999)                                       |
| misc    | %z          | Time zone offset (-0500)                                       |
|         | %Z          | Time zone name ("CDT")                                         |
|         | %p          | Meridian indicator ("AM" or "PM")                              |
|         | %c          | The preferred local date and time representation               |
|         | %x          | Preferred representation for the date alone, no time           |
|         | %X          | Preferred representation for the time alone, no date           |

Do your work for this exercise in a notebook named explore. Use the techniques in the lesson to explore the store item demand dataset and the opsd dataset.

For the store item demand data, you will need to choose a method of aggregating such that each observation is a unique date. 

For both datasets you should choose a method of splitting your data and only explore the training split.

In [3]:
# get power data
power = prepare.prep_power()
# get store data
overall_sales = prepare.prep_overall_sales()

 3 of 3: https://python.zgulde.net/api/v1/items?page=3ge=183

### EDA of Power Data

In [5]:
print('Date Range:', power.index.min(), 'to', power.index.max())
print('Shape:', power.shape)
power.head()

Date Range: 2006-01-01 00:00:00 to 2017-12-31 00:00:00
Shape: (4383, 6)


Unnamed: 0_level_0,Consumption,Wind,Solar,Wind+Solar,month_name,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2006-01-01,1069.184,0.0,0.0,0.0,January,2006
2006-01-02,1380.521,0.0,0.0,0.0,January,2006
2006-01-03,1442.533,0.0,0.0,0.0,January,2006
2006-01-04,1457.217,0.0,0.0,0.0,January,2006
2006-01-05,1477.131,0.0,0.0,0.0,January,2006


#### Split Power into train and test samples

#### Exploration of the train data

### EDA of Sales data

In [6]:
print('Date Range:', overall_sales.index.min(), 'to', overall_sales.index.max())
print('Shape:', overall_sales.shape)
overall_sales.head()

Date Range: 2013-01-01 00:00:00+00:00 to 2017-12-31 00:00:00+00:00
Shape: (913000, 15)


Unnamed: 0_level_0,sale_id,sale_amount,item_id,item_name,item_price,item_upc12,item_upc14,store,store_address,store_city,store_state,store_zipcode,month_name,day_name,sales_total
sale_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2013-01-01 00:00:00+00:00,1,13.0,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,1,12125 Alamo Ranch Pkwy,San Antonio,TX,78253,January,Tuesday,10.92
2013-01-01 00:00:00+00:00,211817,26.0,12,Mueller Sport Care Basic Support Level Medium ...,8.4,74676640211,74676640211,7,12018 Perrin Beitel Rd,San Antonio,TX,78217,January,Tuesday,218.4
2013-01-01 00:00:00+00:00,832657,27.0,46,Pizza Sauce,4.65,35457770664,35457770664,7,12018 Perrin Beitel Rd,San Antonio,TX,78217,January,Tuesday,125.55
2013-01-01 00:00:00+00:00,213643,54.0,12,Mueller Sport Care Basic Support Level Medium ...,8.4,74676640211,74676640211,8,15000 San Pedro Ave,San Antonio,TX,78232,January,Tuesday,453.6
2013-01-01 00:00:00+00:00,215469,35.0,12,Mueller Sport Care Basic Support Level Medium ...,8.4,74676640211,74676640211,9,735 SW Military Dr,San Antonio,TX,78221,January,Tuesday,294.0


#### Split Sales into train and test samples

#### Exploration of the train data