In [2]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import re

# Load in Data
asmr_df = pd.read_csv('Data/dac_maven.csv')


In [4]:
asmr_df.to_pickle('maven_toys.pkl')

In [7]:
maven_df = pd.read_pickle('Data/maven_toys.pkl')

In [8]:
maven_df.head()

Unnamed: 0,Store_Location,Store_Name,Product_Category,Product_Price,Product_Cost,Date,Units,Sale_Payment_Type,Product_Name,Customer_Satisfaction,Store_City,Sale_ID,Store_ID,Product_ID,Store_Open_Date,Customer_Reference
0,Airport,Maven Toys Ciudad de Mexico 2,Toys,$39.99,$34.99,2017-05-31,1,cash,Lego Bricks,9.0,Cuidad de Mexico,161215,31,18,2012-05-04,Social Media Ad
1,Airport,Maven Toys Ciudad de Mexico 2,Electronics,$25.99,$20.99,2018-06-04,1,cash,Toy Robot,7.0,Cuidad de Mexico,656852,31,34,2012-05-04,Word of Mouth
2,Airport,Maven Toys Ciudad de Mexico 2,Games,$10.99,$5.99,2017-06-05,1,credit,Glass Marbles,10.0,Cuidad de Mexico,168348,31,14,2012-05-04,Social Media Ad
3,Airport,Maven Toys Ciudad de Mexico 2,Electronics,$20.99,$14.99,2018-03-09,1,cash,Gamer Headphones,9.0,Cuidad de Mexico,514679,31,13,2012-05-04,Word of Mouth
4,Airport,Maven Toys Ciudad de Mexico 2,Games,$6.99,$3.99,2018-02-22,1,credit,Deck Of Cards,8.0,Cuidad de Mexico,494770,31,8,2012-05-04,Social Media Ad


In [None]:
# Import necessary libraries
import pandas as pd
import gdown

# Google Drive file id
file_id = '1aCAQpYi8yunjHlQtqPjodSwfxDGiJ9cL'
url = f'https://drive.google.com/uc?id={file_id}'

# Download the file
gdown.download(url, 'maven_toys_transactions.csv', quiet=False)

# Load the data into a pandas DataFrame
data = pd.read_csv('maven_toys_transactions.csv')

# Display the first few rows of the DataFrame
data.head()

In [None]:
# Check the shape of the DataFrame
data.shape

In [None]:
# Check the data types and missing values
data.info()

In [None]:
# Fill missing values in 'Customer_Satisfaction' with the median
data['Customer_Satisfaction'].fillna(data['Customer_Satisfaction'].median(), inplace=True)

# Check if there are any missing values left
data.isnull().sum()

In [None]:
# Convert 'Date' and 'Store_Open_Date' to datetime type
data['Date'] = pd.to_datetime(data['Date'])
data['Store_Open_Date'] = pd.to_datetime(data['Store_Open_Date'])

# Check the data types again to confirm
data.dtypes

In [None]:
# Find the top 10 best-selling products
top_products = data.groupby('Product_Name')['Units'].sum().sort_values(ascending=False).head(10)
top_products

In [None]:
# Find the most popular product categories
top_categories = data.groupby('Product_Category')['Units'].sum().sort_values(ascending=False)
top_categories

In [None]:
# Compare the sales of the stores in Guadalajara to the sales of stores in other areas
area_sales = data.groupby('Store_Location')['Units'].sum().sort_values(ascending=False)
area_sales

In [None]:
# Check the unique values in the 'Store_Name' column
data['Store_Name'].unique()

In [None]:
# Extract the city name from the 'Store_Name' column
data['City'] = data['Store_Name'].str.split(' ').str[2]

# Compare the sales of the stores in Guadalajara to the sales of stores in other cities
city_sales = data.groupby('City')['Units'].sum().sort_values(ascending=False)
city_sales

In [None]:
# Compare the sales of the individual stores in Guadalajara
guadalajara_stores = data[data['City'] == 'Guadalajara']
guadalajara_sales = guadalajara_stores.groupby('Store_Name')['Units'].sum().sort_values(ascending=False)
guadalajara_sales

In [None]:
# Calculate the average customer satisfaction
avg_satisfaction = data['Customer_Satisfaction'].mean()
avg_satisfaction

In [None]:
# Import necessary libraries
import matplotlib.pyplot as plt

# Remove the dollar sign from the 'Product_Price' column and convert it to float
data['Product_Price'] = data['Product_Price'].str.replace('$', '').astype(float)

# Create a scatter plot of 'Product_Price' and 'Customer_Satisfaction'
plt.figure(figsize=(10, 6))
plt.scatter(data['Product_Price'], data['Customer_Satisfaction'], alpha=0.5)
plt.title('Customer Satisfaction vs Product Price')
plt.xlabel('Product Price')
plt.ylabel('Customer Satisfaction')
plt.show()

In [None]:
# Calculate the average customer satisfaction for each payment type
payment_satisfaction = data.groupby('Sale_Payment_Type')['Customer_Satisfaction'].mean().sort_values(ascending=False)
payment_satisfaction

In [None]:
# Extract the month and year from the 'Date' column
data['YearMonth'] = data['Date'].dt.to_period('M')

# Calculate the total sales for each month
monthly_sales = data.groupby('YearMonth')['Units'].sum()

# Create a line plot of the total monthly sales
plt.figure(figsize=(10, 6))
monthly_sales.plot(kind='line')
plt.title('Total Monthly Sales')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.show()

In [None]:
# Calculate the monthly sales for each product category
category_sales = data.groupby(['YearMonth', 'Product_Category'])['Units'].sum().unstack()

# Create a line plot of the monthly sales for each product category
plt.figure(figsize=(10, 6))
category_sales.plot(kind='line', ax=plt.gca())
plt.title('Monthly Sales by Product Category')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.show()

In [None]:
# Calculate the monthly sales for each city
city_sales = data.groupby(['YearMonth', 'City'])['Units'].sum().unstack()

# Create a line plot of the monthly sales for each city
plt.figure(figsize=(10, 6))
city_sales.plot(kind='line', ax=plt.gca())
plt.title('Monthly Sales by City')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.show()

In [None]:
# Create a scatter plot of 'Customer_Satisfaction' and 'Units'
plt.figure(figsize=(10, 6))
plt.scatter(data['Units'], data['Customer_Satisfaction'], alpha=0.5)
plt.title('Customer Satisfaction vs Units Sold')
plt.xlabel('Units Sold')
plt.ylabel('Customer Satisfaction')
plt.show()