In [1]:
import pandas as pd
import altair as alt
import warnings
warnings.simplefilter(action= 'ignore', category = FutureWarning)

In [2]:
shopping_behavior = pd.read_csv("shopping_behavior_updated.csv")
shopping_behavior.head()

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Free Shipping,Yes,Yes,31,PayPal,Annually


In [3]:
state_to_region = {
    # Northeast
    'Connecticut': 'Northeast',
    'Maine': 'Northeast',
    'Massachusetts': 'Northeast',
    'New Hampshire': 'Northeast',
    'Rhode Island': 'Northeast',
    'Vermont': 'Northeast',
    'New Jersey': 'Northeast',
    'New York': 'Northeast',
    'Pennsylvania': 'Northeast',
    
    # Midwest
    'Illinois': 'Midwest',
    'Indiana': 'Midwest',
    'Michigan': 'Midwest',
    'Ohio': 'Midwest',
    'Wisconsin': 'Midwest',
    'Iowa': 'Midwest',
    'Kansas': 'Midwest',
    'Minnesota': 'Midwest',
    'Missouri': 'Midwest',
    'Nebraska': 'Midwest',
    'North Dakota': 'Midwest',
    'South Dakota': 'Midwest',
    'Kentucky': 'Midwest',
    
    # South
    'Delaware': 'South',
    'Florida': 'South',
    'Georgia': 'South',
    'Maryland': 'South',
    'North Carolina': 'South',
    'South Carolina': 'South',
    'Virginia': 'South',
    'West Virginia': 'South',
    'Alabama': 'South',
    'Mississippi': 'South',
    'Tennessee': 'South',
    'Arkansas': 'South',
    'Louisiana': 'South',
    'Oklahoma': 'South',
    'Texas': 'South',
    'District of Columbia': 'South',
    
    # West
    'Arizona': 'West',
    'Colorado': 'West',
    'Idaho': 'West',
    'Montana': 'West',
    'Nevada': 'West',
    'New Mexico': 'West',
    'Utah': 'West',
    'Wyoming': 'West',
    'Alaska': 'West',
    'California': 'West',
    'Hawaii': 'West',
    'Oregon': 'West',
    'Washington': 'West'}


shopping_behavior['Region'] = shopping_behavior['Location'].map(state_to_region)

In [4]:
df_grouped = shopping_behavior.groupby(['Location', 'Region']).agg({'Purchase Amount (USD)': 'mean'}).reset_index()

region_options = ['Midwest', 'West', 'South', 'Northeast', None]
labels = ['Midwest', 'West', 'South', 'Northeast', 'All']

region_dropdown = alt.binding_select(options=region_options, name='Region: ', labels = labels)
region_selection = alt.selection_point(fields=['Region'], bind=region_dropdown)

chart = alt.Chart(df_grouped).mark_bar().encode(
    x='Location:N',  
    y='Purchase Amount (USD):Q',  
    color='Region:N',  
    tooltip=['Location', 'Purchase Amount (USD)', 'Region'] 
).add_params(
    region_selection  
).transform_filter(
    region_selection  
).properties(
    width=600,
    height=400,
    title="Average Basket Size by State"
)

chart

In [5]:
labels = ['All', 'Male', 'Female']

gender_radio = alt.binding_radio(options=[None, 'Male', 'Female'], name='Gender:', labels = labels)
gender_selection = alt.selection_point(fields=['Gender'], bind=gender_radio)

category_counts = shopping_behavior.groupby('Category').size().reset_index(name='Count')

chart = alt.Chart(category_counts).mark_bar().encode(
    x=alt.X('Category:N', title='Product Category'),
    y=alt.Y('Count:Q', title='Count of Purchases'),
    color='Category:N',  # Color by category
    tooltip=['Category', 'Count']  # Tooltip to show category and count
).properties(
    width=600,
    height=400,
    title='Most Common Categories in Purchases By Gender'
).add_params(
    gender_selection
).transform_filter(
    gender_selection
)

chart

In [6]:
season_dropdown = alt.binding_select(options=['Winter', 'Fall', 'Summer', 'Spring', None], name='Season: ', labels=['Winter', 'Fall', 'Summer', 'Spring', 'All'])
season_selection = alt.selection_point(fields=['Season'], bind=season_dropdown)

# Step 2: Count occurrences of each item purchased by season and category
item_counts = shopping_behavior.groupby(['Item Purchased', 'Season', 'Category']).size().reset_index(name='Count')

# Step 3: Create the horizontal bar chart and sort the items in descending order by Count, coloring by Category
chart = alt.Chart(item_counts).mark_bar().encode(
    y=alt.Y('Item Purchased:N', title='Item Purchased', sort=alt.SortField('Count', order='descending')),  # Sort by Count in descending order
    x=alt.X('Count:Q', title='Count of Purchases'),  # Count of purchases on the x-axis
    color='Category:N',  # Color by category
    tooltip=['Item Purchased', 'Count', 'Category']  # Tooltip to show item, count, and category
).properties(
    width=600,
    height=400,
    title='Items Purchased By Season'
).add_params(
    season_selection
).transform_filter(
    season_selection
)

# Show the chart
chart