In [1]:
import pandas as pd
from env import host, user, password

In [28]:
def get_connection(db, user=user, host=host, password=password):
    '''
    This function uses my info from my env file to
    create a connection url to access the Codeup db.
    It takes in a string name of a database as an argument.
    '''
    return f'mysql+pymysql://{user}:{password}@{host}/{db}'
    

def new_store_item_sales_data():
    '''
    This function reads the zillow data from the Codeup db into a df.
    '''
    sql_query = """
                SELECT stores.*, items.*, sales.sale_date, sales.sale_amount \
                FROM sales \
                JOIN stores USING (store_id) \
                JOIN items USING (item_id);

                """
    # Included single family residential, Residential general, rural residence, mobile home, townhouse, cluster home, condominium, row house, bungalow, patio home
    # Read in DataFrame from Codeup db.
    df = pd.read_sql(sql_query, get_connection('tsa_item_demand'))
    
    return df

def get_store_item_sales_data():
    '''
    This function reads in iris data from Codeup database, writes data to
    a csv file if a local file does not exist, and returns a df.
    '''
    if os.path.isfile('store_item_sales.csv'):
        
        # If csv file exists read in data from csv file.
        df = pd.read_csv('store_item_sales.csv', index_col=0)
        
    else:
        
        # Read fresh data from db into a DataFrame
        df = new_store_item_sales_data()
        
        # Cache data
        df.to_csv('store_item_sales.csv')
        
    return df

In [29]:
df = new_store_item_sales_data()

In [30]:
df.head()

Unnamed: 0,store_id,store_address,store_zipcode,store_city,store_state,item_id,item_upc14,item_upc12,item_brand,item_name,item_price,sale_date,sale_amount
0,1,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,1,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,2013-01-01,13
1,1,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,1,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,2013-01-02,11
2,1,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,1,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,2013-01-03,14
3,1,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,1,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,2013-01-04,13
4,1,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,1,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,2013-01-05,10


In [32]:
df.to_csv('store_item_sales.csv')

In [8]:
df.sale_date = pd.to_datetime(df.sale_date)

In [10]:
df = df.set_index('sale_date').sort_index()

KeyError: "None of ['sale_date'] are in the columns"

In [11]:
df['month'] = df.index.month_name()
df['day_of_the_week'] = df.index.day_name()
df['sales_total'] = df.sale_amount * df.item_price

In [5]:
df.columns

Index(['sale_id', 'sale_date', 'store_id', 'item_id', 'sale_amount', 'item_id',
       'item_upc14', 'item_upc12', 'item_brand', 'item_name', 'item_price',
       'store_id', 'store_address', 'store_zipcode', 'store_city',
       'store_state'],
      dtype='object')

In [12]:
df.head()

Unnamed: 0_level_0,sale_id,store_id,item_id,sale_amount,item_id,item_upc14,item_upc12,item_brand,item_name,item_price,store_id,store_address,store_zipcode,store_city,store_state,month,day_of_the_week,sales_total
sale_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2013-01-01,1,1,1,13,1,35200264013,35200264013,Riceland,Riceland American Jazmine Rice,0.84,1,12125 Alamo Ranch Pkwy,78253,San Antonio,TX,January,Tuesday,10.92
2013-01-01,211817,7,12,26,12,74676640211,74676640211,Mueller,Mueller Sport Care Basic Support Level Medium ...,8.4,7,12018 Perrin Beitel Rd,78217,San Antonio,TX,January,Tuesday,218.4
2013-01-01,832657,7,46,27,46,35457770664,35457770664,Mama Marys,Pizza Sauce,4.65,7,12018 Perrin Beitel Rd,78217,San Antonio,TX,January,Tuesday,125.55
2013-01-01,213643,8,12,54,12,74676640211,74676640211,Mueller,Mueller Sport Care Basic Support Level Medium ...,8.4,8,15000 San Pedro Ave,78232,San Antonio,TX,January,Tuesday,453.6
2013-01-01,215469,9,12,35,12,74676640211,74676640211,Mueller,Mueller Sport Care Basic Support Level Medium ...,8.4,9,735 SW Military Dr,78221,San Antonio,TX,January,Tuesday,294.0


In [15]:
df.shape

(913000, 18)

In [27]:
df.drop(columns=["store_id", "item_id"], axis=1).shape

(913000, 14)

In [23]:
df.shape

(913000, 18)