In [None]:
#best practices example
# --- 1. Import libraries ---
import duckdb
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.express as px

# --- 2. Load dataset saved previously above from seaborn's GitHub mirror ---
df = duckdb.query("""
    SELECT * FROM read_csv_auto('../data/processed/tips_cleaned.csv')
""").df()


In [None]:
df.info

In [None]:
df['tip_pct'].hist(bins=20)

In [None]:
 df.boxplot(column='tip_pct', by='gender')

In [None]:
import matplotlib.pyplot as plt

# Create the boxplot
ax = df.boxplot(column='tip_pct', by='gender', figsize=(6, 4))

# Add a title
plt.title("Tip Percentage by Gender")
plt.suptitle("")  # Removes the default pandas subtitle
plt.xlabel("Gender")
plt.ylabel("Tip %")

# Save as JPG
plt.savefig("../reports/figures/tip_pct_by_gender.jpg", format='jpg', dpi=300, bbox_inches='tight')

# Show plot in notebook
plt.show()


In [None]:
df.plot.scatter(x='bill_total_usd', y='tip_usd', alpha=0.7)

In [None]:
px.scatter(df, x='bill_total_usd', y='tip_usd', color='gender')

In [None]:
import plotly.express as px
import pandas as pd
import os

# If you already have df in memory, skip this load:
# df = pd.read_csv("../data/processed/tips_cleaned.csv")

# Interactive boxplot (hover to see values, zoom/pan)
fig = px.box(
    df,
    x="gender",
    y="tip_pct",
    points="outliers",          # show outlier points (use "all" to show every point)
    title="Tip Percentage by Gender (Interactive)",
)

# Nice labels + percent formatting
fig.update_layout(
    xaxis_title="Gender",
    yaxis_title="Tip %",
    template="plotly_white",
)
fig.update_yaxes(tickformat=".0%")   # show as percent

fig.show()


In [None]:
import kaleido
print("Chrome path:", kaleido.get_chrome_sync())

# Ensure figures folder exists
os.makedirs("../reports/figures", exist_ok=True)

# 1) Save interactive HTML (best for sharing)
fig.write_html("../reports/figures/tip_pct_by_gender_interactive.html")

# 2) Save a static PNG/JPG (requires kaleido once)
#   pip install -U kaleido
fig.write_image("../reports/figures/tip_pct_by_gender.png", scale=2)
# or:
# fig.write_image("../reports/figures/tip_pct_by_gender.jpg", scale=2)


In [None]:
import plotly.express as px
#pip install statsmodels
px.scatter(df, x='bill_total_usd', y='tip_usd', color='gender', trendline='ols')

In [None]:
import plotly.express as px
#pip install statsmodels
fig = px.scatter(df, x='bill_total_usd', y='tip_usd', color='gender', trendline='ols')

In [None]:
fig.show()

In [None]:

#sudo apt install -y libasound2t64
# sudo apt install -y \
#   libnss3 libatk-bridge2.0-0 libcups2 libxcomposite1 libxdamage1 \
#   libxfixes3 libxrandr2 libgbm1 libxkbcommon0 libpango-1.0-0 libcairo2

#save the plot 
fig.write_image("../reports/figures/bill_total_usd_tip_gender.png", scale=2)

In [None]:
 px.box(df, x='day', y='tip_pct', color='is_smoker')

In [None]:
df.day.unique()

In [None]:
# Facet by day (small multiples)
px.scatter(df, x="bill_total_usd", y="tip_pct", color="gender",
           facet_col="day", trendline="ols",
           title="Tip % vs Bill by Day").update_yaxes(tickformat=".0%")


In [None]:
import plotly.express as px

fig = px.scatter(
    df,
    x="bill_total_usd",
    y="tip_usd",
    color="gender",
    trendline="ols",
    labels={
        "bill_total_usd": "Total Bill ($)",
        "tip_usd": "Tip Amount ($)",
        "gender": "Customer Gender"
    },
    title="Relationship Between Bill Size and Tip Amount by Gender"
)

# Clean up look
fig.update_layout(
    template="plotly_white",
    title_font=dict(size=18, family="Arial", color="#222"),
    xaxis=dict(range=[0, df["bill_total_usd"].max() + 5]),
    yaxis=dict(range=[0, df["tip_usd"].max() * 1.1]),  # adds a bit of headroom
)

fig.show()


# Save as JPG
fig.write_image("../reports/figures/Relationship Between Bill Size and Tip Amount by Gender.jpg", scale=2)



In [None]:
fig = px.scatter(
    df,
    x="bill_total_usd",
    y="tip_pct",
    color="gender",
    trendline="ols",
    labels={
        "bill_total_usd": "Total Bill ($)",
        "tip_pct": "Tip Percentage of Bill",
        "gender": "Customer Gender"
    },
    title="Tip Percentage vs. Bill Amount by Gender"
)

fig.update_layout(
    template="plotly_white",
    yaxis=dict(
        range=[0, 0.35],       # from 0% to 35%
        tickformat=".0%"       # format y-axis as percent
    ),
    title_font=dict(size=18, family="Arial", color="#222")
)
fig.show()


# Save as JPG
fig.write_image("../reports/figures/Tip Percentage vs. Bill Amount by Gender.jpg", scale=2)


In [None]:
# Boxplot by day & gender
px.box(df, x="day", y="tip_pct", color="gender",
       title="Tip % by Day and Gender").update_yaxes(tickformat=".0%")


## Visual Insights: Tipping Behavior by Gender

### 1. Relationship Between Bill Size and Tip Amount (in Dollars)

**Figure 1:** Relationship Between Bill Size and Tip Amount by Gender  
![Figure](../reports/figures/relationship_bill_vs_tip_by_gender.jpg)

- **Description:** This scatter plot shows how the *total bill amount* relates to the *tip amount (in USD)* for male and female customers.  
- **Trend:** Both trendlines slope upward — indicating that as the bill grows, the *total dollar amount tipped* increases.  
- **Interpretation:**  
  People tend to tip more dollars on higher bills.  
  The male (red) trendline is slightly steeper, suggesting that male customers may tip marginally higher absolute amounts on large bills.

---

### 2. Tip Percentage vs. Bill Amount

**Figure 2:** Tip Percentage vs. Bill Amount by Gender  

![Figure](../reports/figures/tip_percentage_vs_bill_amount_by_gender.jpg)

- **Description:** This chart normalizes tipping behavior by expressing the tip as a *percentage of the bill*.  
  The trendlines slope slightly downward for both genders.  
- **Trend:** As the total bill rises, the tip *percentage* declines — indicating that generosity, when measured relatively, *diminishes* on larger checks.  
- **Interpretation:**  
  This aligns with common behavioral economics findings: people often tip a fixed "mental dollar amount" rather than a strict percentage, leading to lower percentages on expensive meals.  
  Gender differences remain minimal — both groups follow a nearly parallel decline in relative generosity.

---

### 3. Summary Insight

Together, these two visualizations reveal an important duality:

- **In absolute terms**, higher bills produce higher tips (more dollars).  
- **In proportional terms**, generosity decreases slightly with bill size (smaller %).  

This dual pattern demonstrates how consumer behavior can appear both *increasing* and *decreasing* depending on the measurement frame — a key reminder for analysts to explore multiple scales before interpreting social data.


In [None]:
import os
print("CWD:", os.getcwd())
print(".. reports/figures:", os.path.exists("../reports/figures"))
print("reports/figures:", os.path.exists("reports/figures"))


In [None]:
from IPython.display import Image, display
display(Image(filename="../reports/figures/relationship_bill_vs_tip_by_gender.jpg"))


![Figure](../reports/figures/relationship_bill_vs_tip_by_gender.jpg)


In [None]:
print(sorted(df.columns.tolist()))

In [None]:
#TODO: ■ PART 2 — Interactive Plotly Views (30–45 min)