In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.colors as pc


In [2]:
def save_fig(file_name, fig_to_save):
  with open(f'{file_name}.html', 'w') as f:
    f.write(fig_to_save.to_html(full_html=False, include_plotlyjs='cdn'))

In [3]:
df = pd.read_csv('purchases_2024.csv')

In [4]:
df['Date purchased'] = pd.to_datetime(df['Date purchased'])
df["days_since_prev"] = df['Date purchased'].diff().dt.days
df['purchased_month'] = df['Date purchased'].dt.month
df['Utility'] = df['Utility'].fillna("other")

In [5]:
df.head()

Unnamed: 0,Item,Date purchased,On sale?,Price,Brand,Colour,Colour family,Utility,Category,Season,Satisfaction,Regret?,days_since_prev,purchased_month
0,Insulated parka,2024-01-03,Yes,174.71,Eddie Bauer,#0f0e2e,Blue,staple ðŸ‘‘,winter coat,winter,10.0,No,,1
1,Knit ribbed cardigan,2024-02-15,No,55.9,Uniqlo,#b29c91,Brown,staple ðŸ‘‘,cardigan,"fall, spring",10.0,No,43.0,2
2,Volume short sleeve shirt,2024-02-15,No,44.7,Uniqlo,#f5f5f0,White,other,summer blouse,summer,8.0,No,0.0,2
3,Cropped sleeveless V-neck bra top,2024-02-15,No,39.08,Uniqlo,#d2b1b5,Purple,versatile ðŸ”®,bra top,"fall, spring, winter",8.0,No,0.0,2
4,Drapey denim pleated pants,2024-02-16,No,67.0,Uniqlo,#7197ad,Blue,staple ðŸ‘‘,pants,"fall, spring, summer",10.0,No,1.0,2


### price stats

In [6]:
df['On sale?'].value_counts() / len(df)

Unnamed: 0_level_0,count
On sale?,Unnamed: 1_level_1
No,0.653846
Yes,0.346154


In [7]:
df.Price.sum()

1367.1100000000001

In [8]:
df.Price.describe()

Unnamed: 0,Price
count,26.0
mean,52.581154
std,37.140261
min,14.45
25%,28.4
50%,41.89
75%,67.0
max,174.71


In [9]:
fig = px.histogram(df, x="Price",nbins=50)
fig.update_layout(
    xaxis_title="Price ($CAD) of item",
    yaxis_title="Count",
)
fig.update_layout(autosize=False, width=800, height=500, bargap=0.2)
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
fig.show()


In [10]:
fig = px.violin(df, points='all',y="Price", box=True)
fig.update_layout(
    yaxis_title="Price ($CAD) of item",
    xaxis_title=None,
    xaxis=dict(showticklabels=False)
)
fig.update_layout(margin=dict(l=20, r=20, t=20, b=20))
fig.update_traces(hoveron="points")

fig.update_layout(autosize=False, width=500, height=400)
fig.show()
save_fig('price_distribution', fig)

### interval stats

In [11]:
def get_color(value, vmin, vmax):
    colorscale = pc.get_colorscale("tempo")
    norm_value = (value - vmin) / (vmax - vmin)
    norm_value = max(0, min(1, norm_value))
    color = pc.sample_colorscale(colorscale, norm_value)
    return color

In [12]:
fig = go.Figure()
vals = [(df['Date purchased'].min() - pd.Timestamp('2024-01-01')).days]
vals += list(df["days_since_prev"].values[1:])
vals += [(pd.Timestamp('2025-01-01') - df['Date purchased'].max()).days]
for v in vals:
  fig.add_trace(go.Bar(
      y=['days_elapsed'],
      x=[v],
      orientation='h',
      marker=dict(
          color=get_color(v,0,100,),
          line=dict(color='black', width=2)
          ),
      )
  )

fig.update_layout(barmode='stack',showlegend=False, yaxis_title=None)
fig.update_layout(
    plot_bgcolor="white",
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis=dict(visible=False),
    yaxis=dict(visible=False)
    )

fig.update_layout(margin=dict(l=20, r=20, t=0, b=0))
fig.update_traces(width=0.25, hovertemplate='%{value} days<extra></extra>')

fig.update_layout(autosize=False, width=1050, height=150)
fig.show()
save_fig('intervals', fig)

In [13]:
df["days_since_prev"].max()

73.0

### monthly plots

In [14]:
df_monthly = df.groupby('purchased_month')['Price'].agg('sum').reset_index()
df_monthly_filled = pd.DataFrame({"Month":np.arange(1, 13)})
df_monthly_merged = df_monthly.merge(df_monthly_filled, how='right',
                                     left_on='purchased_month', right_on='Month')
df_monthly_merged = df_monthly_merged.fillna(0).drop(columns=['purchased_month'])

fig1 = px.line(df_monthly_merged, x="Month", y="Price", markers=True)
fig1.update_traces(textposition="top center")
fig1.update_layout(yaxis_title="Price ($CAD) purchased")
fig1.update_layout(margin=dict(l=20, r=20, t=20, b=20))

fig1.update_layout(autosize=False, width=1000, height=300)
fig1.show()
save_fig('price_per_month', fig1)

In [15]:
df_monthly = df.groupby('purchased_month')['Price'].agg('count').reset_index()
df_monthly_filled = pd.DataFrame({"Month":np.arange(1, 13)})
df_monthly_merged = df_monthly.merge(df_monthly_filled, how='right',
                                     left_on='purchased_month',right_on='Month')
df_monthly_merged = df_monthly_merged.fillna(0).drop(columns=['purchased_month'])
df_monthly_merged.rename(columns={'Price': 'Num items'}, inplace=True)
fig2 = px.line(df_monthly_merged, x="Month", y="Num items", markers=True)
fig2.update_yaxes(title="Number of items purchased")
fig2.update_layout(margin=dict(l=20, r=20, t=20, b=20))

fig2.update_layout(autosize=False, width=1000, height=300)
fig2.show()
save_fig('items_per_month', fig2)


### pies and waffles

#### breakdowns by category

In [16]:
category_cnts = df.groupby('Category')['Item'].agg('count').reset_index()
category_cnts.sort_values(by='Item', ascending=False, inplace=True)
category_cnts.reset_index(inplace=True)

fig3 = px.pie(category_cnts, values='Item', names='Category',
              color_discrete_sequence=px.colors.qualitative.Set3,
              title='Types of items bought')
fig3.update_layout(autosize=False, width=600, height=600)

fig3.show()

In [17]:
z = []
hover_text = []
n = len(category_cnts)
category_scale = []
discrete_palette = px.colors.qualitative.Prism
for i,r in category_cnts.iterrows():
  v = r['Item']
  hover_text += [r['Category']] * v
  z += [i] * v
  category_scale.append([i/(n-1), discrete_palette[i]])


In [18]:
fig = go.Figure(data=go.Heatmap(z=[z], xgap=5, ygap=5,
                                colorscale=category_scale, colorbar=None,
                                text=[hover_text], hoverinfo='text'))
fig.update_layout(
    plot_bgcolor="white",
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis=dict(visible=False, scaleanchor="y", scaleratio=1),
    yaxis=dict(visible=False),
    hoverlabel=dict(bgcolor="white"),
    hovermode="closest"
)
fig.update_traces(showscale=False)
fig.update_layout(margin=dict(l=20, r=20, t=0, b=0))

fig.update_layout(autosize=False, width=1000, height=150)
fig.show()
save_fig('category_count', fig)

#### breakdown by utility

In [19]:
util_cnts = df.groupby('Utility')['Item'].agg('count').reset_index()
util_cnts.sort_values(by='Item', ascending=False, inplace=True)
util_cnts.reset_index(inplace=True)

fig4 = px.pie(util_cnts, values='Item', names='Utility',
              color_discrete_sequence=px.colors.qualitative.Set3,
              title='Utility of items bought')
fig4.update_layout(autosize=False, width=600, height=600)
fig4.show()

In [20]:
z = []
hover_text = []
for i,r in util_cnts.iterrows():
  v = r['Item']
  hover_text += [r['Utility']] * v
  z += [i] * v

utility_scale = [
    [0.0, "#bec2be"],
    [0.33, "#f7d365"],
    [0.66, "#9ae6f5"],
    [1, "#cea8ed"]
]


In [21]:
fig = go.Figure(data=go.Heatmap(z=[z], xgap=5, ygap=5, colorscale=utility_scale,
                    colorbar=None, text=[hover_text], hoverinfo='text'))
fig.update_layout(
    plot_bgcolor="white",
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis=dict(visible=False, scaleanchor="y", scaleratio=1),
    yaxis=dict(visible=False),
    hoverlabel=dict(bgcolor="white"),
    hovermode="closest"
)
fig.update_traces(showscale=False)
fig.update_layout(margin=dict(l=20, r=20, t=0, b=0))

fig.update_layout(autosize=False, width=1000, height=150)
fig.show()
save_fig('utility_count', fig)


#### breakdown by brand

In [22]:
brand_cnts = df.groupby('Brand')['Item'].agg('count').reset_index()
brand_cnts.sort_values(by='Item', ascending=False, inplace=True)
brand_cnts.reset_index(inplace=True)

fig5 = px.pie(brand_cnts, values='Item', names='Brand',
              color_discrete_sequence=px.colors.qualitative.Set3,
              title='Brand of items bought')
fig5.update_layout(autosize=False, width=600, height=600)

fig5.show()

In [23]:
z = []
hover_text = []
n = len(brand_cnts)
brand_scale = []
discrete_palette = px.colors.qualitative.Pastel
for i,r in brand_cnts.iterrows():
  v = r['Item']
  hover_text += [r['Brand']] * v
  z += [i] * v
  brand_scale.append([i/(n-1), discrete_palette[i]])


In [24]:
fig = go.Figure(data=go.Heatmap(
                    z=[z], xgap=5, ygap=5, colorscale=brand_scale,
                    colorbar=None, text=[hover_text], hoverinfo='text'))
fig.update_layout(
    plot_bgcolor="white",
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    xaxis=dict(visible=False, scaleanchor="y", scaleratio=1),
    yaxis=dict(visible=False),
    hoverlabel=dict(bgcolor="white"),
    hovermode="closest"
)
fig.update_traces(showscale=False)
fig.update_layout(margin=dict(l=20, r=20, t=0, b=0))

fig.update_layout(autosize=False, width=1000, height=150)
fig.show()
save_fig('brand_count', fig)

### colour sunburst

In [25]:
core_txt = 'Colours'
colour_families = sorted(df['Colour family'].unique())
label_list = [core_txt]
parent_list = [""]
value_list = [len(df)]
color_list = ['none']
hover_info= ['']
cmap = {'none':'white', 'Brown':'#85604d', 'Blue':'#3c61c7',
        'Multicolor': 'pink', 'Green': "#5d875c",
        'Gray': "#909190", "Beige":"#d9cbb4",
        'Black': '#111214', 'Purple': "#9d7ed6", "White": "#fcfcfa"}

In [26]:
for cf in colour_families:
  label_list.append(cf)
  parent_list.append(core_txt)
  value_list.append(df['Colour family'].value_counts()[cf])
  color_list.append(cf)
  hover_info.append(f"Count: {value_list[-1]}")

In [27]:
for ind, r in df.iterrows():
  curr_c = r['Colour']
  parent_list.append(r['Colour family'])
  value_list.append(1)
  hover_info.append("")
  if not curr_c:
    color_list.append('Multicolor')
    label_list.append("")
  elif curr_c == "#000000":
    color_list.append('Black')
    label_list.append("")
  else:
    label_list.append(curr_c)
    color_list.append(curr_c)
    cmap[curr_c] = curr_c

In [28]:
data = dict(item_color=label_list, parent=parent_list, count=value_list, color=color_list, hover_info=hover_info)

fig6 = px.sunburst(
    data,
    names='item_color',
    parents='parent',
    values='count',
    color='color',
    color_discrete_map=cmap,
    branchvalues ='total',
    hover_data={'hover_info': False}
)

fig6.update_layout(autosize=False, width=500, height=500)
fig6.update_layout(margin=dict(l=20, r=20, t=20, b=20))
fig6.update_traces(hovertemplate='count: %{value}')

fig6.update_traces(customdata=data['hover_info'])
fig6.show()
save_fig('colour_wheel', fig6)

### season combos

In [29]:
season_cnts = df['Season'].value_counts(ascending=False)
labels = ["spring","summer","fall","winter"]
colors=['#c8f7a3', '#ffea82', '#c2a080', '#8b90cc']
fig = make_subplots(1, 7, horizontal_spacing=0.0,specs=[[{'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}, {'type':'domain'}]])

for ind, (seasons, cnt) in enumerate(season_cnts.items()):
  s = seasons.split(", ")
  curr_colors = []
  show_legend = True
  for i, l in enumerate(labels):
    if l in s:
      curr_colors.append(colors[i])
    else:
      curr_colors.append('white')
      show_legend = False


  fig.add_trace(go.Pie(labels=labels, values=[cnt]*4,
                       textinfo='none',
                       showlegend=show_legend,
                       marker=dict(colors=curr_colors,
                                   line=dict(color='#f5f5f5', width=0)
                                   ),
                       scalegroup='one',
                       hovertemplate='Items: %{value}<extra></extra>',
                       hoverlabel=dict(bgcolor="white"),
                       name=f"{seasons}"
                       ),
                1, ind+1)

fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
    legend=dict(
        orientation='h',  # Set the legend orientation to horizontal
        x=0.96,  # Center the legend horizontally
        xanchor='right',  # Anchor the legend at the center horizontally
        y=0.8,  # Place the legend below the chart
        yanchor='bottom',  # Anchor the legend at the top vertically
    )
)

fig.update_layout(autosize=False, height=300, width=1050)
fig.show()
save_fig('seasons', fig)