In [1]:
import numpy as np 
import pandas as pd 
import plotly.express as px
from scipy import stats as st  
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/items.csv
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/resturants.csv
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/sales_train.csv
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2020_01_04.jpg
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2019_07_15.jpg
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2021_09_13.jpg
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2021_06_15.jpg
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2019_05_31.jpg
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2021_12_17.jpg
/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/cam/2020_04_07.

In [2]:
# Load the tabular data.
df_sales = pd.read_csv("/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/sales_train.csv")
df_items = pd.read_csv("/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/items.csv")
df_resturants = pd.read_csv("/kaggle/input/demand-forecasting-with-tabular-textual-images/beach_demand_forecast/resturants.csv")

In [3]:
# Convert the date column to datetime format.
df_sales['date'] = pd.to_datetime(df_sales['date'], errors='coerce')

In [4]:
# Add weekday and week columns.
df_sales['weekday'] = df_sales['date'].dt.day_name()
start_date = df_sales['date'].min()
df_sales['week'] = (((df_sales['date'] - start_date).dt.days) / 7).astype(int)

In [5]:
# Calculate and plot total sales by date.
df_plot = df_sales[['date', 'item_count']].groupby('date').sum().reset_index()
fig = px.line(df_plot, x='date', y='item_count', title='Total Sales by Date')
fig.show()

In [6]:
# Calculate and plot total sales by weekday.
df_plot = df_sales[['weekday', 'item_count']].groupby('weekday').sum().reset_index()
day_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
df_plot = df_plot.set_index('weekday').loc[day_order].reset_index()
fig = px.bar(df_plot, x='weekday', y='item_count', title='Total Sales by Day of Week')
fig.show()

In [7]:
# Calculate and plot total sales for a year.
SINGLE_YEAR = 2020
df_sales_single_year = df_sales[df_sales['date'].dt.year == SINGLE_YEAR]
df_plot = df_sales_single_year[['date', 'item_count']].groupby('date').sum().reset_index()
fig = px.bar(df_plot, x='date', y='item_count', facet_col_spacing=0, title=f'Total Sales by Day {SINGLE_YEAR}')
fig.update_layout(bargap=0.0, bargroupgap=0.0)
fig.show()

In [8]:
# Calculate and plot moving average of sales.
df_plot['moving_average'] = df_plot['item_count'].rolling(window=7).mean()
fig = px.line(df_plot, x='date', y='moving_average', title='Moving Average of Sales')
fig.show()

In [9]:
df_plot['weekday'] = df_plot['date'].dt.day_name()

In [10]:
# Conduct a t-test to compare sales on weekdays.
weekday_means = df_plot.groupby('weekday')['item_count'].mean()
ttest_results = st.ttest_ind(df_plot[df_plot['weekday'] == 'Monday'].item_count, df_plot[df_plot['weekday'] == 'Tuesday'].item_count)
print(ttest_results)

TtestResult(statistic=-2.370576301525798, pvalue=0.019639492670345725, df=102.0)
