In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from sklearn.linear_model import LinearRegression
import numpy as np
import json


data_path = "data/NPD_Data_2018_2019_V3.csv"
npd_data = pd.read_csv(data_path)

In [None]:
clean_data = npd_data.dropna()


API Call

In [None]:
import requests
import json

# Google API Key
from api_key import api

In [None]:
my_phrase = "Campgrounds near Shasta, CA"
target_url = "https://maps.googleapis.com/maps/api/place/textsearch/json"

params = {
    "query": my_phrase,
    "key": api
}

response = requests.get(target_url, params)

# print(response.url)
camps = response.json()
# print(json.dumps(camps, indent=4, sort_keys=True))

counter = 0
try:
    for place in camps["results"]:
        print(place["name"])
        print(place["formatted_address"])
        counter += 1
        if counter == 5:
            break
except KeyError as name:
    print("Skipping")

Purchase Method Graph

In [None]:
nineteen = clean_data[clean_data["Year"]==2019]
twenty = clean_data[clean_data["Year"]==2020]

nineteen_clothes = nineteen[(nineteen['Category'] == 'PANT/CAPRI') | (nineteen['Category'] == 'JACKET/VEST')]
twenty_clothes = twenty[(twenty['Category'] == 'PANT/CAPRI') | (twenty['Category'] == 'JACKET/VEST')]

nineteen_clothes['TY AUR'] = nineteen_clothes['TY AUR'].astype(float)
twenty_clothes['TY AUR'] = twenty_clothes['TY AUR'].astype(float)

nineteen_stores = round(nineteen_clothes.groupby(['StoreType']).mean(),2)
twenty_stores = round(twenty_clothes.groupby(['StoreType']).mean(),2)

store_type_aur = pd.concat([nineteen_stores, twenty_stores])

labels = ['B&M', 'ECOMMERCE', 'OUTLET']
x = np.arange(len(labels))
width = 0.35 
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, nineteen_stores["TY AUR"], width, label='2019')
rects2 = ax.bar(x + width/2, twenty_stores["TY AUR"], width, label='2020')


ax.set_title('AUR 2019 VS 2020')
ax.set_ylabel('AVERAGE PRICE ($)')
ax.set_xlabel('STORE TYPE')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
pd.set_option('mode.chained_assignment', None)
fig.tight_layout()
plt.savefig("Graphs/Purchase_Method")
plt.show()

Clothes Graph

In [None]:
nineteen_clothes['Units'] = nineteen_clothes['Units'].str.replace(',', '').astype(float)
twenty_clothes['Units'] = twenty_clothes['Units'].str.replace(',', '').astype(float)

a = nineteen_clothes.groupby(['Month'],sort = False).sum()
b = twenty_clothes.groupby(['Month'], sort = False).sum()

plt.plot(a["Units"],color="blue")
plt.plot(b["Units"],color="orange")

pd.set_option('mode.chained_assignment', None)
plt.title("YoY APPAREL SALES")
plt.xlabel("MONTHS")
plt.ylabel("UNITS")
plt.legend(["2019","2020"])
plt.savefig("Graphs/clothes_sales")
plt.show()

Daypack Graphs

In [None]:
df1 = df[df['Category']=='DAYPACK']\
    .reset_index()\
    .copy()
df1 = df1\
    .drop(columns='TY AUR')\
    .assign(**{'Month': pd.to_datetime(df1['Month']),
              'Units': df1['Units'].str.replace('(','-').str.replace(')','').str.replace(',','').astype(int).fillna(0)})
df1

In [None]:
df2 = df1.groupby(['StoreType','Month']).agg({'Units':'sum'}).reset_index()
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
for storetype in df2['StoreType'].unique().tolist():
    # storetype = 'ECOMMERCE'
    df3 = df2[(df2['StoreType']==storetype) & (df2['Month']>='2019-02-01') & (df2['Month']<'2020-12-31')].copy().reset_index()
    ax.plot(df3['Month'], df3['Units'], label=storetype)
ax.legend()
ax.set_yscale('log')
ax.set_xlabel('Time')
ax.set_ylabel('Units')
ax.set_title('Daypack Units for different StoreType From 2019-01 to 2020-12')
plt.savefig("Graphs/daypack_sales_by")

In [None]:
df1['Year'] = df1['Month'].astype(str).str[:4]
df1.head()
df1['Month1'] = df1['Month'].astype(str).str[5:7]
df1['Month2'] = df1['Month'].apply(lambda x: x.strftime('%b'))
df1.head()

In [None]:
df4_2019 = df1[(df1['Month']>='2019-01-01') & (df1['Month']<'2021-01-01')]\
            .sort_values(['Year','Month1','Month2'])\
            .groupby(['Year','Month1','Month2'], sort=False)\
                .agg({'Units': 'sum'})\
                .reset_index()
df4_2019['Month1'] = df4_2019['Month1'].astype(int).apply(lambda x: (x + 10) % 12)
df4_2019 = df4_2019.sort_values(['Year', 'Month1']).reset_index(drop=True)

labels = df4_2019[df4_2019['Year']=='2019']['Month2'].tolist()
df_19 = df4_2019[df4_2019['Year']=='2019']['Units'].tolist()
df_20 = df4_2019[df4_2019['Year']=='2020']['Units'].tolist()

In [None]:
x = np.arange(len(labels))  # the label locations
width = 0.35  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, df_19, width, label='2019')
rects2 = ax.bar(x + width/2, df_20, width, label='2020')

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Units')
ax.set_title('2019 VS 2020 Daypack Units comparison')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()


fig.tight_layout()
plt.savefig("Graphs/daypack_units_year")
plt.show()

Camping Gear Graph

In [None]:
purchase_data_df = pd.DataFrame(npd_data)
#Remove commas from the numbers
npd_data['Units'] = npd_data['Units'].replace(',','', regex=True)

#Convert the Units column as int
npd_data['Units'] = npd_data['Units'].astype(float)
purchase_data_df.describe()

In [None]:
camping_gear = purchase_data_df[['Month','Category','Units']]
camping_gear.head()

In [None]:
months = purchase_data_df.groupby('Month')
months.size()

In [None]:
retail = months.sum()
data_points = purchase_data_df.groupby(["Year","Month"]).count()["Category"]

data_points.unstack(0).plot(kind="bar", color=['blue','orange'])
plt.title("Total Units Sold by Month")
plt.ylabel("Total Units Sold")
pd.set_option('mode.chained_assignment', None)
plt.show()


# Save the plotted figure as .pngs
plt.savefig('Graphs/camping_gear')

In [None]:
nineteen_tent = nineteen[(nineteen['Category'] == 'TENT') | (nineteen['Category'] == 'TENT')]
twenty_bag = twenty[(twenty['Category'] == 'SLEEPING BAG') | (twenty['Category'] == 'SLEEPING BAG')]

In [None]:
nineteen_tent['Units'] = nineteen_tent['Units'].str.replace(',', '').astype(float)
twenty_bag['Units'] = twenty_bag['Units'].str.replace(',', '').astype(float)

a = nineteen_tent.groupby(['Month'],sort = False).sum()
b = twenty_bag.groupby(['Month'], sort = False).sum()

plt.plot(a["Units"],color="blue")
plt.plot(b["Units"],color="orange")
plt.legend(["2019","2020"])
plt.title("Tents & Sleeping Bags")
plt.xlabel("Month")
plt.ylabel("Units")
pd.set_option('mode.chained_assignment', None)
plt.show()

# Save the plotted figure as .pngs
plt.savefig('Graphs/Tents_sleeping')

Total Units Sold Graph

In [None]:
#Group by year and category
grp_cat = npd_data.groupby(["Year", "Category"])

#Get the sum of the units in the new groupby object
sum_by_cat = grp_cat['Units'].sum()
sum_by_cat

#Create bar plot
ax = sum_by_cat.unstack(0).plot(kind="bar", rot=45, color=["blue", "orange"])
plt.xlabel("Category")
plt.ylabel("Total Units Sold")
plt.title("Total Units Sold by Category")

#Add the total values on the chart
for i in ax.patches:
    ax.text(i.get_x(), i.get_height(), \
           str(round((i.get_height()), 2)), fontsize=8, color="black")
    
plt.grid(axis="y")
plt.savefig('Graphs/total_units')
plt.show()
plt.tight_layout()

In [None]:
sum_table = sum_by_cat.unstack(0)
sum_table.columns.name = None

#Create a column for the percent change
sum_table['Percent Change'] = round((sum_table[2020] - sum_table[2019]) / sum_table[2019] * 100, 2)

#Reset index
sum_table = sum_table.reset_index()

#Get the sum of all the values
ttotal = sum_table.sum()

#Gets the total percentage change
total_change = round(((ttotal[2020] - ttotal[2019]) / ttotal[2019])*100, 2)

#Adds a new row below everything have the total values
sum_table = sum_table.append({'Category': 'TOTAL'}, ignore_index=True)

#Adds the totals in the dataframe
sum_table.iloc[5,1] = ttotal[2019]
sum_table.iloc[5,2] = ttotal[2020]
sum_table.iloc[5,3] = total_change

#Set the index to be category to get rid of index number
sum_table = sum_table.set_index('Category')
sum_table

In [None]:
#Total Units of Camping Equipment Sold
#Group by year and month
month_df = npd_data.groupby(['Year', 'Month'], sort=False).sum()

#Create line chart
month_df.unstack(0).plot(kind="line", y='Units', color=['blue','orange'])
plt.title("Total Units Sold by Month")
plt.ylabel("Total Units Sold")
plt.savefig('Graphs/total_units_month')
plt.show()
plt.tight_layout()