In [None]:
import pandas as pd
import plotly.express as px
import numpy as np
from utils import plot_best_worst_malls_mix, get_best_worst_malls

mall = pd.read_csv('../data/v1/mall.csv')
cost = pd.read_csv('../data/v1/cost.csv')
store = pd.read_csv('../data/v1/store.csv')
store_to_external_mall = pd.read_csv('../data/v1/referenciel_traffic.csv')

mall.set_index('mall_id', inplace=True)
mall['is_third_party'] = mall['mall_country'].str.contains('Third Parties').astype(int)

cost.fillna(0, inplace=True)
cost['rent_incentive_r12m'] = cost['rent_incentive_r12m'] * -1
cost["total_cost_r12m"] = np.sum(
    np.array([cost[i].values for i in cost.columns if i.endswith('r12m')])
, axis=0)


store.drop(columns=[
    'store_name',
    ], inplace=True)

store = store.join(mall, on='mall_id')
store = pd.merge(store, cost[["store_id", "total_cost_r12m"]], on='store_id', how='left')
store.dropna(subset=[
    'store_id',
    'store_sales_r12m',
    'store_rent_r12m',
                     ], inplace=True)


store_to_external_mall.dropna(inplace=True)
store_to_external_mall.store_id = store_to_external_mall.store_id.astype(int)
store_to_external_mall.block_id = store_to_external_mall.block_id.astype(int)
store_to_external_mall.external_mall_id = store_to_external_mall.external_mall_id.astype(int)
store_to_external_mall.set_index('store_id', inplace=True)

store_index = store[["store_id", "mall_id"]]
mall_to_external_mall = store_index.join(store_to_external_mall, on='store_id', how='left')[['mall_id', 'external_mall_id']]
mall_to_external_mall = mall_to_external_mall.dropna().drop_duplicates()


## France

In [None]:
store_france = store.copy()[store['mall_country'] == 'France']

In [None]:
mall_france = (
    store_france.groupby("mall_id")
    .agg(
        {
            "store_sales_r12m": "sum",
            "store_rent_r12m": "sum",
            "total_cost_r12m": "sum",
            "total_mall_area": "first",
        }
    )
    .reset_index()
)


mall_france["sales_per_sqm"] = (
    mall_france["store_sales_r12m"] / mall_france["total_mall_area"]
)
mall_france["rent_per_sqm"] = (
    mall_france["store_rent_r12m"] / mall_france["total_mall_area"]
)


mall_france.sort_values(by="sales_per_sqm", ascending=False, inplace=True)

mall_france['rev_1pc'] = (mall_france['store_sales_r12m'] * 0.01 + mall_france['store_rent_r12m'] - mall_france['total_cost_r12m']) / mall_france['total_mall_area']
mall_france['rev_2pc'] = (mall_france['store_sales_r12m'] * 0.02 + mall_france['store_rent_r12m'] - mall_france['total_cost_r12m']) / mall_france['total_mall_area']
mall_france['rev_5pc'] = (mall_france['store_sales_r12m'] * 0.05 + mall_france['store_rent_r12m'] - mall_france['total_cost_r12m']) / mall_france['total_mall_area']
mall_france['rev_10pc'] = (mall_france['store_sales_r12m'] * 0.10 + mall_france['store_rent_r12m'] - mall_france['total_cost_r12m']) / mall_france['total_mall_area']

In [None]:
best_malls_sales, worst_malls_sales = get_best_worst_malls(mall_france, 'sales_per_sqm')
best_malls_rev_1pc, worst_malls_rev_1pc = get_best_worst_malls(mall_france, 'rev_1pc')
best_malls_rev_2pc, worst_malls_rev_2pc = get_best_worst_malls(mall_france, 'rev_2pc')
best_malls_rev_5pc, worst_malls_rev_5pc = get_best_worst_malls(mall_france, 'rev_5pc')
best_malls_rev_10pc, worst_malls_rev_10pc = get_best_worst_malls(mall_france, 'rev_10pc')

In [None]:
fig = plot_best_worst_malls_mix(store_france, best_malls_sales, worst_malls_sales, 'sales_per_sqm', 'Best vs Worst Malls by Sales per sqm')

fig.update_layout(width=800, height=600)

In [None]:
plot_best_worst_malls_mix(store_france, best_malls_rev_1pc, worst_malls_rev_1pc, 'rev_1pc', 'Best vs Worst Malls, Rent + 1% Sales - Costs')

In [None]:
fig = plot_best_worst_malls_mix(
    store_france,
    best_malls_rev_2pc,
    worst_malls_rev_2pc,
    "rev_2pc",
    "Best vs Worst Malls, Rent + 2% Sales - Costs",
)


fig.update_layout(width=800, height=600)
fig.write_html("../assets/best_worst_malls.html")

In [None]:
fig = plot_best_worst_malls_mix(store_france, best_malls_rev_5pc, worst_malls_rev_5pc, 'rev_5pc', 'Tenant mix in best, worst malls')
fig.update_layout(width=1000, height=600)
fig.update_xaxes(tickfont=dict(size=16))

In [None]:
pd.merge(worst_malls_rev_5pc, mall.reset_index(), on='mall_id', how='left')

In [None]:
best_malls_rev_5pc

In [None]:
# Extract data for mall_id 23
mall_23 = mall_france[mall_france['mall_id'] == 23]

# Combine data for mall_id 23 and the best malls
comparison_df = pd.concat([mall_23, best_malls_rev_5pc])

# Select relevant columns for comparison
comparison_df = comparison_df[['mall_id', 'store_sales_r12m', 'store_rent_r12m', 'total_cost_r12m']]
comparison_df = pd.merge(comparison_df, mall.reset_index(), on='mall_id', how='left')
# Reset index for better readability
comparison_df.reset_index(drop=True, inplace=True)

comparison_df

In [None]:
# Create a bar plot using Plotly
comparison_df['sales_per_sqm'] = comparison_df['store_sales_r12m'] / comparison_df['total_mall_area']
fig = px.bar(
    comparison_df,
    x='mall_name',
    y='sales_per_sqm',
    color='mall_id',
    title='Sales per sqm Comparison',
    labels={'sales_per_sqm': 'Sales per sqm', 'mall_name': 'Mall Name'},
    color_discrete_map={23: 'red'}
)

# Update layout for better readability
fig.update_layout(
    xaxis_title='Mall Name',
    yaxis_title='Sales per sqm',
    legend_title='Mall ID',
    xaxis_tickangle=-45,
    width=800,
    height=600
)

# Update the color of all bars except mall_id = 23 to blue
for data in fig.data:
    data.marker.color = ['#EF553B' if mall_id == 23 else '#636EFA' for mall_id in comparison_df['mall_id']]

# Show the plot
fig.show()


In [None]:
# Create a bar plot using Plotly
comparison_df['rent_per_sqm'] = comparison_df['store_rent_r12m'] / comparison_df['total_mall_area']
fig = px.bar(
    comparison_df,
    x='mall_name',
    y='rent_per_sqm',
    color='mall_id',
    title='Rent per sqm Comparison',
    labels={'rent_per_sqm': 'Rent per sqm', 'mall_name': 'Mall Name'},
    color_discrete_map={23: 'red'}
)

# Update layout for better readability
fig.update_layout(
    xaxis_title='Mall Name',
    yaxis_title='Rent per sqm',
    legend_title='Mall ID',
    xaxis_tickangle=-45,
    width=800,
    height=600
)

# Update the color of all bars except mall_id = 23 to blue
for data in fig.data:
    data.marker.color = ['#EF553B' if mall_id == 23 else '#636EFA' for mall_id in comparison_df['mall_id']]

# Show the plot
fig.show()

In [None]:
# Create a bar plot using Plotly
comparison_df['total_cost_per_sqm'] = comparison_df['total_cost_r12m'] / comparison_df['total_mall_area']
fig = px.bar(
    comparison_df,
    x='mall_name',
    y='total_cost_per_sqm',
    color='mall_id',
    title='Total Cost per sqm Comparison',
    labels={'total_cost_per_sqm': 'Total Cost per sqm', 'mall_name': 'Mall Name'},
    color_discrete_map={23: 'red'}
)

# Update layout for better readability
fig.update_layout(
    xaxis_title='Mall Name',
    yaxis_title='Total Cost per sqm',
    legend_title='Mall ID',
    xaxis_tickangle=-45,
    width=800,
    height=600
)

# Update the color of all bars except mall_id = 23 to blue
for data in fig.data:
    data.marker.color = ['#EF553B' if mall_id == 23 else '#636EFA' for mall_id in comparison_df['mall_id']]

# Show the plot
fig.show()

In [None]:
plot_best_worst_malls_mix(store_france, best_malls_rev_10pc, worst_malls_rev_10pc, 'rev_10pc', 'Best vs Worst Malls, Rent + 10% Sales - Costs')