# Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "iframe"
pio.renderers.default = "notebook_connected"
import warnings 
warnings.filterwarnings('ignore')


from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR  
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

In [None]:
df = pd.read_csv(r'/kaggle/input/retail-store-inventory-forecasting-dataset/retail_store_inventory.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.duplicated().sum()

In [None]:
df.drop(columns=['Date', 'Store ID', 'Product ID'], axis=1, inplace=True)

In [None]:
df = df.rename(columns={'Inventory Level': 'Inventory', 
                        'Units Sold': 'Sales', 
                        'Units Ordered': 'Orders', 
                        'Demand Forecast': 'Demand', 
                        'Weather Condition': 'Weather', 
                        'Holiday/Promotion': 'Promotion', 
                        'Competitor Pricing': 'Competitor Price'})

In [None]:
numarical_columns = ['Inventory','Sales','Orders','Price','Discount','Competitor Price']

for col in numarical_columns:
    fig = px.box(df, x=col)
    fig.update_layout(height=400, width=800, title_text=col)
    fig.show()

In [None]:
CategoryCounts = df['Category'].value_counts().reset_index()
CategoryCounts

In [None]:
fig = px.bar(CategoryCounts,
             x='Category',
             y='count',
             title='<b>Distribution of Category by counts<b>',
             text_auto=True,
             width=1000,
             color_discrete_sequence=['#17becf']
             )
fig.show()

In [None]:
RegionCounts = df['Region'].value_counts().reset_index()
RegionCounts

In [None]:
fig = px.bar(RegionCounts,
             x='Region',
             y='count',
             title='<b>Distribution of Region by counts<b>',
             text_auto=True,
             width=1000,
             color_discrete_sequence=['#bcbd22']
             )
fig.show()

In [None]:
InventoryDes = df['Inventory'].describe()
InventoryDes

In [None]:
fig = px.histogram(df,
                   x='Inventory',
                   nbins=15,
                   title='<b>Inventory Distribution<b>',
                   text_auto=True,
                   width=1000,
                   color_discrete_sequence=['brown']
                   )

fig.show()

In [None]:
SalesDes = df['Sales'].describe()
SalesDes

In [None]:
fig = px.histogram(df,
                   x='Sales',
                   nbins=20,
                   title='<b>Sales Distribution<b>',
                   text_auto=True,
                   width=1000,
                   color_discrete_sequence=['grey']
                   )

fig.show()

In [None]:
OrdersDes = df['Orders'].describe()
OrdersDes

In [None]:
fig = px.histogram(df,
                   x='Orders',
                   nbins=15,
                   title='<b>Orders Distribution<b>',
                   text_auto=True,
                   width=1000,
                   color_discrete_sequence=['pink']
                   )

fig.show()

In [None]:
DemandDes = df['Demand'].describe()
DemandDes

In [None]:
fig = px.histogram(df,
                   x='Demand',
                   nbins=20,
                   title='<b>Demand Distribution<b>',
                   text_auto=True,
                   width=1000,
                   color_discrete_sequence=['purple']
                   )

fig.show()

In [None]:
PriceDes = df['Price'].describe()
PriceDes

In [None]:
fig = px.histogram(df,
                   x='Price',
                   nbins=15,
                   title='<b>Price Distribution<b>',
                   text_auto=True,
                   width=1000,
                   color_discrete_sequence=['blue']
                   )

fig.show()

In [None]:
DiscountCounts = df['Discount'].value_counts().reset_index()
DiscountCounts

In [None]:
fig = px.pie(DiscountCounts,
             names='Discount',
             values='count',
             title='<b>Discount Distribution<b>',
             width=1000,
             color_discrete_sequence=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
             )

fig.show()

In [None]:
WeatherCounts = df['Weather'].value_counts().reset_index()
WeatherCounts

In [None]:
fig = px.bar(WeatherCounts,
             x='Weather',
             y='count',
             title='<b>Distribution of Weather by counts<b>',
             text_auto=True,
             width=1000,
             color_discrete_sequence=['#9467bd']
             )
fig.show()

In [None]:
PromotionCounts = df['Promotion'].value_counts().reset_index()
PromotionCounts

In [None]:
fig = px.pie(PromotionCounts,
             names='Promotion',
             values='count',
             title='<b>Promotion Distribution<b>',
             width=1000,
             color_discrete_sequence=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
             )

fig.show()

In [None]:
CompetitorPriceDes = df['Competitor Price'].describe()
CompetitorPriceDes

In [None]:
fig = px.histogram(df,
                   x='Competitor Price',
                   nbins=10,
                   title='<b>Competitor Price Distribution<b>',
                   text_auto=True,
                   width=1000,
                   color_discrete_sequence=['brown']
                   )

fig.show()

In [None]:
SeasonalityCounts = df['Seasonality'].value_counts().reset_index()
SeasonalityCounts

In [None]:
fig = px.bar(SeasonalityCounts,
             x='Seasonality',
             y='count',
             title='<b>Distribution of Seasonality by counts<b>',
             text_auto=True,
             width=1000,
             color_discrete_sequence=['#e377c2']
             )
fig.show()

In [None]:
numarical_columns = ['Inventory','Sales','Orders','Price','Discount','Competitor Price']
for col in numarical_columns:
    fig = px.scatter(df,
                    x='Demand',
                    y=col, 
                    title=f'Relationship between {col} and Demand ',
                    trendline="ols"
                    )
    fig.show()

In [None]:
df.info()

In [None]:
numarical_columns = ['Inventory','Sales','Orders','Price','Discount','Competitor Price','Promotion','Demand']

corr_matrix = df[numarical_columns].corr()

plt.figure(figsize=(10,8))

sns.heatmap(corr_matrix, annot=True)

plt.show()

In [None]:
x=df.drop(columns=['Demand'])
y=df['Demand']

In [None]:

from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
x['Category'] = label_encoder.fit_transform(x['Category'])
x['Region'] = label_encoder.fit_transform(x['Region'])
x['Weather'] = label_encoder.fit_transform(x['Weather'])
x['Seasonality'] = label_encoder.fit_transform(x['Seasonality'])
x['Promotion'] = label_encoder.fit_transform(x['Promotion'])


In [None]:
x.head()

In [None]:
scaler = StandardScaler()
x = scaler.fit_transform(x)
x

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
x_train.shape

In [None]:
y_train.value_counts(normalize=True)*100

In [None]:
from tabulate import tabulate

def evaluate_model(model, x_train, y_train, x_test, y_test):
    # Calculate metrics
    train_r2 = model.score(x_train, y_train)
    y_pred = model.predict(x_test)
    test_r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae = mean_absolute_error(y_test, y_pred)
    
    # Prepare the table data
    metrics = [
        ["Training R² Score", f"{train_r2:.4f}"],
        ["Testing R² Score", f"{test_r2:.4f}"],
        ["Mean Squared Error (MSE)", f"{mse:.2f}"],
        ["Root Mean Squared Error (RMSE)", f"{rmse:.2f}"],
        ["Mean Absolute Error (MAE)", f"{mae:.2f}"],
    ]
    
    # Print the table
    print(tabulate(metrics, headers=["Metric", "Value"], tablefmt="grid"))

In [None]:
lr = LinearRegression()

lr.fit(x_train, y_train)

In [None]:
# Evaluate the model
evaluate_model(lr, x_train, y_train, x_test, y_test)

In [None]:
svr = SVR()

svr.fit(x_train, y_train)

In [None]:
evaluate_model(svr, x_train, y_train, x_test, y_test)

In [None]:
DT = DecisionTreeRegressor()

DT.fit(x_train, y_train)

In [None]:
evaluate_model(DT, x_train, y_train, x_test, y_test)

In [None]:
KNR = KNeighborsRegressor()

KNR.fit(x_train,y_train)

In [None]:
evaluate_model(KNR, x_train, y_train, x_test, y_test)