In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Import necessary library
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#Load your data into a Dataframe
data = pd.read_csv('../input/customer-shopping-trends-dataset/shopping_trends.csv')

In [None]:
#Explore the data to understand the structure
print(data.head())

In [None]:
# Now, we can start analyzing sales trends. For example, let's visualize the total purchase amount over time (season).
# Group the data by season and sum the purchase amounts
seasonal_sales = data.groupby('Season')['Purchase Amount (USD)'].sum().reset_index()

In [None]:
# Create a bar plot to visualize seasonal sales
plt.figure(figsize=(10, 6))
sns.barplot(x='Season', y='Purchase Amount (USD)', data=seasonal_sales)
plt.title('Seasonal Sales Trends')
plt.xlabel('Season')
plt.ylabel('Total Purchase Amount (USD)')
plt.show()

In [None]:
# You can also analyze sales trends by category
category_sales = data.groupby('Category')['Purchase Amount (USD)'].sum().reset_index()

# Create a bar plot to visualize category-wise sales
plt.figure(figsize=(10, 6))
sns.barplot(x='Category', y='Purchase Amount (USD)', data=category_sales)
plt.title('Category-wise Sales Trends')
plt.xlabel('Category')
plt.ylabel('Total Purchase Amount (USD)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Analyze sales trends by location
location_sales = data.groupby('Location')['Purchase Amount (USD)'].sum().reset_index()

# Create a bar plot to visualize location-wise sales
plt.figure(figsize=(12, 6))
sns.barplot(x='Location', y='Purchase Amount (USD)', data=location_sales)
plt.title('Location-wise Sales Trends')
plt.xlabel('Location')
plt.ylabel('Total Purchase Amount (USD)')
plt.xticks(rotation=90)
plt.show()

In [None]:
# Analyze sales trends by gender
gender_sales = data.groupby('Gender')['Purchase Amount (USD)'].sum().reset_index()

# Create a bar plot to visualize gender-wise sales
plt.figure(figsize=(8, 6))
sns.barplot(x='Gender', y='Purchase Amount (USD)', data=gender_sales)
plt.title('Gender-wise Sales Trends')
plt.xlabel('Gender')
plt.ylabel('Total Purchase Amount (USD)')
plt.show()


In [None]:
# Analyze sales trends by age group
age_bins = [0, 20, 30, 40, 50, 60, 100]  # Define age bins
age_labels = ['<20', '20-30', '30-40', '40-50', '50-60', '60+']  # Labels for age groups
data['Age Group'] = pd.cut(data['Age'], bins=age_bins, labels=age_labels)

# Group the data by age group and calculate the total purchase amount
age_group_sales = data.groupby('Age Group')['Purchase Amount (USD)'].sum().reset_index()

# Create a bar plot to visualize age group-wise sales
plt.figure(figsize=(8, 6))
sns.barplot(x='Age Group', y='Purchase Amount (USD)', data=age_group_sales, order=age_labels)
plt.title('Age Group-wise Sales Trends')
plt.xlabel('Age Group')
plt.ylabel('Total Purchase Amount (USD)')
plt.show()


In [None]:
# Analyze sales trends by payment method
payment_method_sales = data.groupby('Payment Method')['Purchase Amount (USD)'].sum().reset_index()

# Create a bar plot to visualize payment method-wise sales
plt.figure(figsize=(10, 6))
sns.barplot(x='Payment Method', y='Purchase Amount (USD)', data=payment_method_sales)
plt.title('Payment Method-wise Sales Trends')
plt.xlabel('Payment Method')
plt.ylabel('Total Purchase Amount (USD)')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Analyze the distribution of review ratings
plt.figure(figsize=(8, 6))
sns.histplot(data['Review Rating'], bins=9, kde=True)
plt.title('Distribution of Review Ratings')
plt.xlabel('Review Rating')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Analyze the relationship between Review Ratings and Purchase Amount
plt.figure(figsize=(8, 6))
sns.scatterplot(x='Review Rating', y='Purchase Amount (USD)', data=data)
plt.title('Relationship between Review Ratings and Purchase Amount')
plt.xlabel('Review Rating')
plt.ylabel('Purchase Amount (USD)')
plt.show()

In [None]:

# Analyze the frequency of purchases
frequency_counts = data['Frequency of Purchases'].value_counts()

# Create a bar plot to visualize the frequency of purchases
plt.figure(figsize=(10, 6))
sns.barplot(x=frequency_counts.index, y=frequency_counts.values)
plt.title('Frequency of Purchases')
plt.xlabel('Frequency')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

Forecasting Part

In [None]:
import pandas as pd


# Convert "Season" into a categorical variable
data['Season'] = pd.Categorical(data['Season'])



In [None]:
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

# Split the data into training and testing sets
train_size = int(len(data) * 0.8)
train, test = data['Purchase_Amount_USD'][:train_size], data['Purchase_Amount_USD'][train_size:]

# Fit an STL model to capture seasonality
stl = sm.tsa.STL(train, seasonal=13)  # Adjust the seasonal parameter as needed
stl_fit = stl.fit()

# Make predictions on the testing set
seasonal_component = stl_fit.seasonal
predictions = test - seasonal_component[-len(test):]

# Evaluate the model
mse = mean_squared_error(test, predictions)
print(f'Mean Squared Error: {mse}')
