### 1. Prepare Data for Analysis

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler
import plotly.express as px

In [None]:
sales = pd.read_csv('Sales.csv')
display(sales)

In [None]:
sales.info()

In [None]:
sales['State'].unique()

In [None]:
sales['Group'].unique()

In [None]:
sales['Time'].unique()

In [None]:
sales.describe()

In [None]:
sales.corr(numeric_only=True)

# 2. Normalize the Data for Analysis

In [None]:
# This method didn't work for me as expected
scaler = StandardScaler()
sales_dataonly = sales[['Unit', 'Sales']]
normalize_data= scaler.fit_transform(sales_dataonly)
print(normalize_data)

In [None]:
# Using alternative method to normalize and scale the date
sales['ScaledUnits'] = (sales['Unit'] - sales['Unit'].min()) / (sales['Unit'].max() - sales['Unit'].min())
sales['ScaledSales'] = (sales['Sales'] - sales['Sales'].min()) / (sales['Sales'].max() - sales['Sales'].min())
display(sales[['Unit', 'ScaledUnits', 'Sales', 'ScaledSales']])
print(f'The max Scaled Unit is {sales['ScaledUnits'].max()}; the min Scaled Unit is {sales['ScaledUnits'].min()}.')
print(f'The max Scaled Sales is {sales['ScaledSales'].max()}; the min Scaled Sales is {sales['ScaledSales'].min()}.')

### 3. Visualize Overall Trends

In [None]:
sales['Date'] = pd.to_datetime(sales['Date'], format='%d-%b-%y')
dated_sales = sales.set_index('Date')
dated_sales

In [None]:
weekly_sales = dated_sales.resample('7D')['Sales'].sum()
weekly_units = dated_sales.resample('7D')['Unit'].sum()
fig1 = px.line(weekly_sales, title='3-Month Timeline of Sales')
fig1.show()
fig2 = px.line(weekly_units, title='3-Month Timeline of Units Sold')
fig2.show()

### 4. Analyze Monthly Data

In [None]:
monthly_sales = dated_sales.resample('ME')['Sales'].sum()
monthly_units = dated_sales.resample('ME')['Unit'].sum()
display(monthly_sales)
display(monthly_units)

In [None]:
fig3 = px.bar(monthly_sales, title = 'Monthly Totals for Sales')
fig3.show()
fig4 = px.bar(monthly_units, title = 'Monthly Totals for Units Sold')
fig4.show()

### 5. Describe Data

In [None]:
sales.describe()

In [None]:
sales_oct = dated_sales[dated_sales.index.month == 10]
sales_oct.describe()

In [None]:
sales_nov = dated_sales[dated_sales.index.month == 11]
sales_nov.describe()

In [None]:
sales_dec = dated_sales[dated_sales.index.month == 12]
sales_dec.describe()

### 6. Analyze Unit Data

In [None]:
dated_sales['Month'] = dated_sales.index.month
sns.boxplot(x='Month', y='Unit', data=dated_sales, hue='Month', palette='colorblind')
plt.title('Distribution of Units by Month')
plt.show()
sns.boxplot(x='Month', y='Sales', data=dated_sales, hue='Month', palette='Set2')
plt.title('Distribution of Sales by Month')
plt.show()

### 7. Explore Monthly Plots and Analysis

In [None]:
dated_sales.groupby('Month')['Sales'].sum()

In [None]:
dated_sales.groupby('Month')['Unit'].sum()

In [None]:
grouped_sales = dated_sales.groupby('Month')['Sales'].sum().reset_index()
sns.barplot(x='Month', y='Sales', data=grouped_sales, hue='Month', palette='Set2')
plt.show()

In [None]:
grouped_units = dated_sales.groupby('Month')['Unit'].sum().reset_index()
sns.barplot(x='Month', y='Unit', data=grouped_units, hue='Month', palette='colorblind')
plt.show()

In [None]:
sns.lineplot(x='Month', y='Sales', data=dated_sales)
plt.title('Consolidated Sales Over 3-Month Span')
plt.show()

### 8. Analyze Statewise Sales in the United States

In [None]:
state_sales = dated_sales.groupby('State')[['Unit', 'Sales']].sum().reset_index()
display(state_sales)

In [None]:
sns.barplot(x='State', y='Sales', data=state_sales, hue='State', palette='Set2')
plt.title('Total Sales by State')
plt.show()

sns.barplot(x='State', y='Unit', data=state_sales, hue='State', palette='colorblind')
plt.title('Total Units Sold by State')
plt.show()

### 9. Conduct Groupwise Analysis

In [None]:
group_sales = dated_sales.groupby('Group')[['Unit', 'Sales']].sum().reset_index()
display(group_sales)

In [None]:
sns.lineplot(x='Date', y='Sales', hue='Group', data=dated_sales, errorbar=None)
plt.title('Sales by Group')
plt.show()

In [None]:
sns.barplot(x='Group', y='Sales', hue='Group', palette='Set2', data=group_sales)
plt.title('Total Sales by Group')
plt.show()

sns.barplot(x='Group', y='Unit', hue='Group', palette='colorblind', data=group_sales)
plt.title('Total Units Sold by Group')
plt.show()

### 10. Explore Timewise Analysis

In [None]:
dated_sales2 = dated_sales.reset_index()
sns.lineplot(x='Date', y='Sales', hue='Time', palette='Set2', data=dated_sales2, errorbar=None)
plt.title('Total Sales by Time of Day')
plt.show()
sns.lineplot(x='Date', y='Unit', hue='Time', palette='colorblind', data=dated_sales2, errorbar=None)
plt.title('Total Units Sold by Time of Day')
plt.show()

In [None]:
sales_time = dated_sales.groupby('Time')[['Unit', 'Sales']].sum().reset_index()
display(sales_time)

In [None]:
sns.barplot(x='Time', y='Unit', hue='Time', palette='colorblind', data=sales_time)
plt.show()
sns.barplot(x='Time', y='Sales', hue='Time', palette='Set2', data=sales_time)
plt.show()