In [None]:
data.shape

### Check Data types

In [None]:
data.info()

### Mean

In [None]:
data.describe()

### Median

In [None]:
#numerical columns
num_col = ['Age', 'Quantity', 'Price per Unit', 'Total Amount']

median = data[num_col].median()

print("Median Of All Numeric Columns:\n",median)

### Mode 

In [None]:
mode = data.mode().iloc[0]
mode

### Check Null Values

In [None]:
data.isnull().sum()

### Time Series Analysis - Monthly Sales Trend

In [None]:
#Convert Date column datatype to dateime format
data['Date'] = pd.to_datetime(data['Date'])

#Set Date column to index
data.set_index('Date', inplace = True)

In [None]:
#Resampe data by month to analyze monthly trends
all_months = pd.date_range(start = "2023-01-01", end = "2024-01-31", freq='M')
mon_sale = data['Total Amount'].resample('M').sum().reindex(all_months, fill_value=0)

#Plotting
plt.figure(figsize=(10,6))
plt.plot(mon_sale, marker = 'o', label='Monthly Sales')
plt.title('Monthly Sales Trends', color='green')
plt.xlabel('Date', color='red')
plt.ylabel('Sales', color='red')

# Setting x-axis ticks to show each month
plt.xticks(pd.date_range(start='2023-01-01', end='2024-01-31', freq='M'), 
           labels=[date.strftime('%Y-%m') for date in pd.date_range(start='2023-01-01', end='2024-01-31', freq='M')],
           rotation=45)
plt.legend()
plt.grid(True)
plt.show()

### Time Series Analysis - Monthly Sales Wth 3 Month Average Trend

In [None]:
Three_month_avg = mon_sale.rolling(window=3).mean()

plt.figure(figsize=(10,6))
plt.plot(mon_sale, label='Monthly Sales')
plt.plot(Three_month_avg, label='3-Month Average Sales', color='orange')
plt.title('Monthly Sales with 3-Months Average', color='green')
plt.xlabel('Date', color='red')
plt.ylabel('Sales', color='red')

# Setting x-axis ticks to show each month
plt.xticks(pd.date_range(start='2023-01-01', end='2024-01-31', freq='M'), 
           labels=[date.strftime('%Y-%m') for date in pd.date_range(start='2023-01-01', end='2024-01-31', freq='M')],
           rotation=40)
plt.legend()
plt.grid(True)
plt.show()

### Customer & Product Analysis

#### Customer Analaysis

In [None]:
gender_dist = data['Gender'].value_counts()
gender_dist.plot(kind='bar')
plt.title('Gender Distribution', color='green')
plt.xticks(rotation=10)
plt.show()

In [None]:
data['Age'].plot(kind='hist')
plt.title('Age Distribution', color='green')
plt.show()

In [None]:
data['Age Group'] = pd.cut(data['Age'], bins=[18, 25, 35, 45, 55, 65, 100], labels=['18-25', '26-35', '36-45', '46-55', '56-65', '65+'])
age_sales = data.groupby('Age Group')['Total Amount'].sum()
age_sales.plot(kind='bar')
plt.title('Sales by Age Group', color='green')
plt.xticks(rotation=10)
plt.show()

#### Product Analysis

In [None]:
pop_product = data.groupby('Product Category')['Total Amount'].sum()
pop_product.plot(kind='bar')
plt.title('Popular Products Total Sales', color='green')
plt.xticks(rotation=0)
plt.show()

In [None]:
quan_per_prod = data.groupby('Product Category')['Quantity'].sum()
quan_per_prod.plot(kind='bar')
plt.title('Quantity Purchased per Product', color='green')
plt.xticks(rotation=0)
plt.show()

In [None]:
gender_per_prod = data.groupby(['Product Category','Gender']).size().unstack()
gender_per_prod
gender_per_prod.plot(kind='bar', figsize=(10,5))
plt.xticks(rotation=0)
plt.title('Product Category Counts by Gender', color='green')
plt.xlabel("Gender", color='red')
plt.ylabel("Number of Purchases", color='red')
plt.legend(title="Product Category")
plt.show()

### Heatmap

In [None]:
numerical_data = data[['Age', 'Quantity', 'Price per Unit', 'Total Amount']]

corr = numerical_data.corr()

plt.figure(figsize=(10,5))
sns.heatmap(corr, annot=True, cmap='coolwarm', linewidths=0.1)
plt.title("Correlation Heatmap of Numerical Features", color='green')
plt.show()