In [None]:
%pip install numpy matplotlib scipy pandas seaborn scikit-learn statsmodels feature-engine

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from darts import TimeSeries

In [None]:
# Read the CSV file into a DataFrame
train_df = pd.read_csv('./data/train.csv')
meal_info_df = pd.read_csv('./data/meal_info.csv')
fc_info_df = pd.read_csv('./data/fulfilment_center_info.csv')

In [None]:
print(f'MEAL SHAPE={meal_info_df.shape}')
print(f'FC SHAPE={fc_info_df.shape}')

In [None]:
grouped_counts = train_df.groupby(['week', 'center_id', 'meal_id']).size().reset_index(name='count')

print((grouped_counts['count'] > 1).any())

print(grouped_counts)

In [None]:
# Group by center_id and meal_id
unq_weeks = train_df.groupby(['center_id', 'meal_id'])['week'].unique()

# Find gaps for each group
gaps_by_group = {}
for (center_id, meal_id), weeks in unq_weeks.items():
    group_range = set(range(min(weeks), max(weeks) + 1))
    group_gaps = sorted(group_range - set(weeks))
    if group_gaps:
        gaps_by_group[(center_id, meal_id)] = group_gaps


# Print gaps by group with count
for (center_id, meal_id), gaps in gaps_by_group.items():
    print(f"Center ID: {center_id}, Meal ID: {meal_id}, Number of gaps: {len(gaps)}")

In [None]:
selected_df = train_df[['num_orders', 'base_price', 'checkout_price', 'emailer_for_promotion', 'homepage_featured']]

plt.figure(figsize=(8, 8))
sns.heatmap(selected_df.corr(), annot=True, cmap='coolwarm', vmin=-1, vmax=1, center=0)

plt.title('Correlation Heatmap')
plt.tight_layout()
plt.show()

In [None]:
# Join meal_info_df with train dataframe
train_with_meal = train_df.merge(meal_info_df, on='meal_id', how='left')

# Group num_orders by cuisine
cuisine_orders = train_with_meal.groupby('cuisine').agg({'num_orders': 'sum'}).reset_index().sort_values('num_orders', ascending=False)

# Create bar chart
plt.figure(figsize=(12, 8))
sns.barplot(data=cuisine_orders, x='cuisine', y='num_orders')
plt.title('Number of Orders by Cuisine')
plt.xlabel('Cuisine')
plt.ylabel('Number of Orders')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(train_df['checkout_price'], train_df['num_orders'], alpha=0.5)
plt.xlabel('Checkout Price')
plt.ylabel('Number of Orders')
plt.title('Scatter Plot: Number of Orders vs Checkout Price')
plt.grid(True)

In [None]:
# Group data by week and sum the number of orders
weekly_orders = train_df.groupby('week')['num_orders'].sum().reset_index()

# Create the line plot
plt.figure(figsize=(12, 6))
plt.plot(weekly_orders['week'], weekly_orders['num_orders'], marker='o')

# Customize the plot
plt.title('Number of Orders per Week')
plt.xlabel('Week')
plt.ylabel('Number of Orders')
plt.grid(True, linestyle='--', alpha=0.7)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Show the plot
plt.tight_layout()
plt.show()


In [None]:
random_combination = train_df[['meal_id', 'center_id']].drop_duplicates().sample(n=1).values[0]
random_meal_id, random_center_id = random_combination
filtered_df = train_df[(train_df['meal_id'] == random_meal_id) & (train_df['center_id'] == random_center_id)]
filtered_df = filtered_df.sort_values('week')
ts = TimeSeries.from_dataframe(filtered_df, 'week', 'num_orders', freq=1, fill_missing_dates=True, fillna_value=0)

In [None]:

from darts import TimeSeries
from darts.utils.statistics import check_seasonality, extract_trend_and_seasonality
from darts.utils.utils import ModelMode, TrendMode

# Check for weekly seasonality
weekly_seasonality = check_seasonality(ts, m=4)  # 4 weeks in a month
print(f"Weekly seasonality: {weekly_seasonality}")

trend, seasonality = extract_trend_and_seasonality(ts, 4, model=ModelMode.ADDITIVE, method='MSTL')

ts.plot(label='Data')
trend.plot(label='Trend')
seasonality.plot(label='Seasonality')


In [None]:
from darts.utils.statistics import stationarity_tests

# Assuming 'df' is your DataFrame with columns 'meal_id', 'center_id', 'num_orders', and 'date'

# Group by meal_id and center_id
grouped = train_df.groupby(['meal_id', 'center_id'])

stationary_count = 0
total_count = 0

for (meal_id, center_id), group in grouped:
    # print(f'Meal ID: {meal_id}, Center ID: {center_id}')

    # Create a TimeSeries object for each group
    ts = TimeSeries.from_dataframe(group, 'week', 'num_orders', freq=1, fill_missing_dates=True, fillna_value=0)    
    
    # Skip this iteration if number of observations in the group is 3 or less
    if len(group) <= 3:
        continue

    # Perform ADF test
    stationary = stationarity_tests(ts)
    
    if stationary:
        stationary_count += 1
    total_count += 1

# Calculate percentage of stationary time series
percentage_stationary = (stationary_count / total_count) * 100

print(f'Percentage of stationary time series: {percentage_stationary:.2f}%')
