In [4]:
import pandas as pd
from datetime import timedelta, datetime
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from env import *
import acquire as acq
#import warnings
#warnings.filterwarnings("ignore")

In [5]:
def get_store_data():
    filename = 'tsa_item_demand.csv'

    if not os.path.exists(filename):
        sql_db = "tsa_item_demand"
        url = get_db_url(sql_db)
        query = '''
                select * 
                from sales
                left join items using(item_id)
                left join stores using(store_id)
                '''
        df = pd.read_sql(query,url)
        df.to_csv(filename, index=False)
        print(f"Saving {filename}...")
        
        return df
    else:
        print(f"File {filename} already exists. Loading {filename}...")
        
        return pd.read_csv(filename,index_col=False)

In [8]:
def clean_store_data(df):
    # drop old columns from sql database
    df = df.drop(columns = ["store_id","item_id","sale_id"])
    
    # convert 'sale_date' column to datetime type
    df['sale_date'] = pd.to_datetime(df['sale_date'])
    
    # set 'sale_date' column as index and sort it
    df = df.set_index("sale_date").sort_index()
    
    # feature engineer: add month & day of week using date index
    df['month'] = df.index.month
    df['day_of_week'] = df.index.dayofweek
    
    # feature engineer: add 'sales_total' column multiplying 'sale_amount' & 'item_price'
    df["sales_total"] = df["sale_amount"]*df["item_price"]
    
    return df

In [6]:
store_df = get_store_data()

File tsa_item_demand.csv already exists. Loading tsa_item_demand.csv...


In [9]:
store_df = clean_store_data(store_df)

In [11]:
store_df.head(3)

Unnamed: 0_level_0,sale_amount,item_upc14,item_upc12,item_brand,item_name,item_price,store_address,store_zipcode,store_city,store_state,month,day_of_week,sales_total
sale_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-01-01,13,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,1,1,10.92
2013-01-01,26,74676640211,74676640211,Mueller,Mueller Sport Care Basic Support Level Medium ...,8.4,12018 Perrin Beitel Rd,78217,San Antonio,TX,1,1,218.4
2013-01-01,27,35457770664,35457770664,Mama Marys,Pizza Sauce,4.65,12018 Perrin Beitel Rd,78217,San Antonio,TX,1,1,125.55
