In [1]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display, clear_output
import datetime as dt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)
%matplotlib inline
plt.style.use('seaborn-dark')
df = pd.read_csv('train.csv')
df['Gender'] = df['Gender'].replace(['F', 'M'], ['Female', 'Male'])
df['Marital_Status'] = df['Marital_Status'].replace([0,1], ['Single', 'Married'])

In [8]:
def get_dashboard_info(product_id):
    df_product = df[df['Product_ID'] == product_id]
    plt.clf()
    num_users = df_product['User_ID'].nunique()
    total_purchase = df_product["Purchase"].sum()
    median_purchase = total_purchase/num_users
    gender_counts = df_product.groupby('Gender')['User_ID'].nunique()
    age_gender_counts = df_product.groupby(['Age', 'Gender'])['User_ID'].nunique().reset_index()
    male_data = age_gender_counts[age_gender_counts['Gender'] == 'Male']
    female_data = age_gender_counts[age_gender_counts['Gender'] == 'Female']
    purchase_by_gender = df_product.groupby('Gender')['Purchase'].sum()
    purchase_ratio =purchase_by_gender / total_purchase
    sizes = purchase_by_gender.values
    
#-----------------------------------------------------------------------
unique_counts = df.groupby('Product_ID')['User_ID'].nunique()
product_dropdown = widgets.Dropdown(options=sorted(df['Product_ID'].unique()), description='Product ID:')
output = widgets.Output()
#------------------------------------------------------------------------
def update_output(change):
    with output:
        clear_output()
        product_id = change.new
        df_product = df[df['Product_ID'] == product_id]
        dashboard_info = get_dashboard_info(product_id)
        product_category_list = []
        product_category_list.append(f"Product Category 1: {df_product['Product_Category_1'].iloc[0]}")
        if not np.isnan(df_product['Product_Category_2'].iloc[0]):
            product_category_list.append(f"Product Category 2: {int(df_product['Product_Category_2'].iloc[0])}")
        else:
            product_category_list.append('Product Category 2: N/A')
            
        if not np.isnan(df_product['Product_Category_3'].iloc[0]):
            product_category_list.append(f"Product Category 3: {int(df_product['Product_Category_3'].iloc[0])}")
        else:
            product_category_list.append('Product Category 3: N/A')
        count = unique_counts.get(product_id, 0)
        num_users = df_product['User_ID'].nunique()
        total_purchase = df_product["Purchase"].sum()
        median_purchase = total_purchase/num_users
        married_count = len(df_product[df_product['Marital_Status'] == 'Married'])
        single_count = len(df_product[df_product['Marital_Status'] == 'Single'])
        total_count = married_count + single_count
        married_percent = married_count / total_count * 100
        single_percent = single_count / total_count * 100
        revenue = df_product['Purchase'].sum()
        display(widgets.HTML(f"ID Sản Phẩm: {product_id}"))
        display(widgets.HTML('<br>'.join(product_category_list)))
        display(widgets.HTML(f"{count} hóa đơn mua sản phẩm {product_id}"))
        display(widgets.HTML(f"Tổng doanh thu: {revenue:,.0f} INR"))
        display(widgets.HTML(f"Khách hàng đã kết hôn: {married_count} ({married_percent:.2f}%)"))
        display(widgets.HTML(f"Khách hàng còn độc thân: {single_count} ({single_percent:.2f}%)"))
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 5))
        age_gender_counts = df_product.groupby(['Age', 'Gender'])['User_ID'].nunique().reset_index()
        male_data = age_gender_counts[age_gender_counts['Gender'] == 'Male']
        female_data = age_gender_counts[age_gender_counts['Gender'] == 'Female']
        ax1.bar(male_data['Age'], male_data['User_ID'], color='cornflowerblue',alpha = 1,label='Nam',align='edge', width=-0.4)
        ax1.bar(female_data['Age'], female_data['User_ID'], color='orange', alpha = 1,label='Nữ',align='edge', width=0.4)
        ax1.set_xlabel('Nhóm tuổi',fontsize = 20)
        ax1.set_ylabel('Khách hàng',fontsize = 20)
        ax1.legend()
        for index, row in male_data.iterrows():
            ax1.text(row['Age'], row['User_ID'], str(row['User_ID']), ha='right', va='bottom')
        for index, row in female_data.iterrows():
            ax1.text(row['Age'], row['User_ID'], str(row['User_ID']), ha='left', va='bottom')
        gender_counts = df_product.groupby('Gender')['User_ID'].nunique()
        colors = ['orange', 'cornflowerblue']
        ax2.pie(gender_counts,  autopct='%1.1f%%',colors=colors, wedgeprops={'edgecolor': 'black', 'linewidth': 1})
        ax2.set_title('Tỉ lệ giới tính',fontweight='bold')
        purchase_by_gender = df_product.groupby('Gender')['Purchase'].sum()
        sizes = purchase_by_gender.values
        ax3.pie([1], radius=0.3, colors=['white'],wedgeprops={'edgecolor': 'black', 'linewidth': 1})
        wedges, texts, autotexts = ax3.pie(sizes, colors=colors, autopct='%1.1f%%', startangle=90, pctdistance=0.85, labeldistance=1.1,
                                   wedgeprops={'edgecolor': 'black', 'linewidth': 1})
        centre_circle = plt.Circle((0,0),0.68,color='black', fc='white',linewidth=1.25)
        fig.gca().add_artist(centre_circle)
        revenue_text = 'Doanh thu trung bình theo giới tính:\nNam: {:,.0f} INR\nNữ: {:,.0f} INR'.format(purchase_by_gender['Male']/df['User_ID'].nunique(), purchase_by_gender['Female']/df['User_ID'].nunique())
        plt.text(0,0,s=revenue_text, ha='center', va='center', fontsize=9.5)
        ax3.set_title('Tỉ lệ doanh thu theo giới tính khách hàng',fontweight='bold')
        display(fig)
        plt.close(fig)
#-----------------------------------------------------------------
product_dropdown.observe(update_output, names='value')
display(widgets.VBox([product_dropdown, output]))   

VBox(children=(Dropdown(description='Product ID:', options=('P00000142', 'P00000242', 'P00000342', 'P00000442'…