# 📊 Wishlist Dataset - EDA with Saved Graphs

In [2]:

# 📊 EDA: Wishlist Dataset

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Папка для збереження графіків
output_dir = "analytics/reports"
os.makedirs(output_dir, exist_ok=True)

# Завантаження даних
df = pd.read_csv('../data/wishlist_sample.csv')

# Перші 5 рядків
print(df.head())

# Типи даних та пропущені значення
print(df.info())
print(df.isnull().sum())

# Основна статистика по числових колонках
print(df.describe())

# Унікальні значення
print("🎯 Unique values per column:")
print(df.nunique())

print("\n🎁 Gifts:")
print(df['gift'].value_counts())

print("\n📅 Groups:")
print(df['group'].value_counts())

# Розподіл подій
plt.figure(figsize=(6,4))
sns.countplot(data=df, x='group')
plt.title('🎈 Distribution by Event Group')
plt.savefig(os.path.join(output_dir, "event_group_distribution.png"))
plt.close()

# Розподіл подарунків (Top 10)
top_gifts = df['gift'].value_counts().head(10).index
plt.figure(figsize=(8,5))
sns.countplot(data=df[df['gift'].isin(top_gifts)], y='gift', order=top_gifts)
plt.title('🎁 Top 10 Gifts')
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "top_10_gifts.png"))
plt.close()

# Якщо є gender, age, industry
if 'gender' in df.columns:
    plt.figure()
    sns.countplot(data=df, x='gender')
    plt.title('🧍 Distribution by Gender')
    plt.savefig(os.path.join(output_dir, "gender_distribution.png"))
    plt.close()

if 'age' in df.columns:
    plt.figure()
    sns.histplot(data=df, x='age', bins=20, kde=True)
    plt.title('🎂 Age Distribution')
    plt.savefig(os.path.join(output_dir, "age_distribution.png"))
    plt.close()

if 'industry' in df.columns:
    plt.figure(figsize=(8,5))
    sns.countplot(data=df, y='industry', order=df['industry'].value_counts().index)
    plt.title('🏢 Industries')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "industry_distribution.png"))
    plt.close()


   user_id       user_name     group          gift  \
0        1    Justin Cooke  Birthday    Board Game   
1        2    Tina Mcbride   Wedding  Indoor Plant   
2        3  Diana Cummings   Wedding      Cookbook   
3        4   Claudia Brown  Birthday  Indoor Plant   
4        5   William White  Birthday  Indoor Plant   

                                        link  gender    industry  age  
0    https://fakeshop.com/product/board_game  Female  Healthcare   21  
1  https://fakeshop.com/product/indoor_plant    Male         Art   38  
2      https://fakeshop.com/product/cookbook    Male   Education   57  
3  https://fakeshop.com/product/indoor_plant  Female      Retail   60  
4  https://fakeshop.com/product/indoor_plant    Male  Healthcare   56  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   user_id    100 non-null    int64 
 1   user_name  100 non

  plt.savefig(os.path.join(output_dir, "event_group_distribution.png"))
  plt.tight_layout()
  plt.savefig(os.path.join(output_dir, "top_10_gifts.png"))
  plt.savefig(os.path.join(output_dir, "gender_distribution.png"))
  plt.savefig(os.path.join(output_dir, "age_distribution.png"))
  plt.tight_layout()
  plt.savefig(os.path.join(output_dir, "industry_distribution.png"))
