In [41]:
import pandas as pd
import numpy as np
import io
import requests
import plotly.express as px
import plotly.graph_objects as go
GOOGLE_FILE_ID = "1CJGNXI3yp0l1rpzERVyKCU1K55DzfqIS"
URL = f"https://drive.usercontent.google.com/download?id={GOOGLE_FILE_ID}&export=download&confirm=t"

In [4]:
def get_stores_data() -> pd.DataFrame:
    """Download the CSV once and return a DataFrame."""
    r = requests.get(URL, timeout=60)
    r.raise_for_status()
    return pd.read_csv(io.BytesIO(r.content)) 

In [5]:
df = get_stores_data()

In [39]:
sample = df[['name_ar','rating','total_reviews']].head()
sample

Unnamed: 0,name_ar,rating,total_reviews
0,صبا العود,5.0,9
1,نشتري,2.5,12
2,مناقصات,5.0,1
3,قلوسي,3.79,19
4,رؤى الطبيعة,4.5,18


In [45]:
df['total_reviews'].max()

4584

In [64]:
fig3

In [62]:
# analysis.py  (append)

def create_distribution_histogram(
        df: pd.DataFrame,
        *,
        column: str = "rating",          # 'rating' | 'total_reviews'
        bin_width: float | int = 0.25,   # 0.25 for rating, 50 for reviews, etc.
        x_title: str | None = None,
        y_title: str = "عدد المتاجر"
) -> go.Figure:
    """
    Histogram of any numeric column with the unified Marouf theme.
    ----------------------------------------------------------------
    column     : DataFrame column to histogram
    bin_width  : Bucket size (0.25 for rating, 50/100/500 for reviews …)
    x_title    : X-axis label (auto-generated if None)
    y_title    : Y-axis label (default "عدد المتاجر")
    """
    # --- 1.  auto axis label -------------------------------------------------
    if x_title is None:
        x_title = "التقييم" if column == "rating" else "عدد التقييمات"

    # --- 2.  build bins -------------------------------------------------------
    col_min, col_max = df[column].min(), df[column].max()
    bins = np.arange(col_min,
                    col_max + 2 * bin_width,
                    bin_width)
    # force last edge a hair bigger to catch the maximum value
    bins[-1] += 1e-5

    counts = (
        pd.cut(df[column], bins=bins, right=False)
        .value_counts()
        .sort_index()
    )
    # tidy string labels
    x_labels = [f"{int(interval.left)}–{int(interval.right) - 1}"
                for interval in counts.index]

    # --- 3.  bar trace (theme colours) ---------------------------------------
    fig = go.Figure(
        go.Bar(
            x=x_labels,
            y=counts.values,
            name=y_title,
            marker_color="#2C7D8B",           # primary accent
            width=0.90,
            hovertemplate=f"{x_title}: %{{x}}<br>%{{y}} متجر<extra></extra>",
        )
    )

    # --- 4.  unified layout ---------------------------------------------------
    fig.update_layout(
        title=dict(
            text=f"توزيع المتاجر حسب {x_title}",
            font=dict(size=16, family="Noto Sans Arabic"),
        ),
        xaxis_title=x_title,
        yaxis_title=y_title,
        margin=dict(l=10, r=10, t=50, b=50),
        height=400,
        font=dict(family="Noto Sans Arabic"),
        hoverlabel=dict(
            bgcolor="#C9D2BA",
            font_size=12,
            font_family="Noto Sans Arabic",
            font_color="#202020",
            align="right",
        ),
        bargap=0.05,
    )
    return fig

In [18]:
df.loc[df['rating'] < 0]

Unnamed: 0.1,Unnamed: 0,name_ar,rating,total_reviews,business_type_ar,business_type_en,business_sub_type_ar,business_sub_type_en,other_type_name,description,main_activity
