In [1]:
import pandas as pd

df = pd.read_csv("data/synthetic-food-demand.csv")
df.head()

Unnamed: 0,product_code,product_category,product_subcategory,location_code,scaled_price,promotion_email,promotion_homepage,timestamp,unit_sales
0,1062,Beverages,Fruit Juice Mango,101,0.87913,0,0,2018-01-01,636.0
1,1062,Beverages,Fruit Juice Mango,101,0.994517,0,0,2018-01-08,123.0
2,1062,Beverages,Fruit Juice Mango,101,1.005513,0,0,2018-01-15,391.0
3,1062,Beverages,Fruit Juice Mango,101,1.0,0,0,2018-01-22,339.0
4,1062,Beverages,Fruit Juice Mango,101,0.883309,0,0,2018-01-29,661.0


In [2]:
"""
Data Wrangler 生成的单元格。
"""
def clean_data(df):
    # 将缺少的值替换为列: 'unit_sales' 中的 0
    df = df.fillna({'unit_sales': 0})
    # 在列中将文本转换为小写: 'product_category', 'product_subcategory'
    df['product_category'] = df['product_category'].str.lower()
    df['product_subcategory'] = df['product_subcategory'].str.lower()
    return df

df_clean = clean_data(df.copy())
df_clean.head()

Unnamed: 0,product_code,product_category,product_subcategory,location_code,scaled_price,promotion_email,promotion_homepage,timestamp,unit_sales
0,1062,beverages,fruit juice mango,101,0.87913,0,0,2018-01-01,636.0
1,1062,beverages,fruit juice mango,101,0.994517,0,0,2018-01-08,123.0
2,1062,beverages,fruit juice mango,101,1.005513,0,0,2018-01-15,391.0
3,1062,beverages,fruit juice mango,101,1.0,0,0,2018-01-22,339.0
4,1062,beverages,fruit juice mango,101,0.883309,0,0,2018-01-29,661.0


In [5]:
!pip install "altair[all]"

Collecting altair[all]
  Downloading altair-5.4.0-py3-none-any.whl.metadata (9.4 kB)
Collecting narwhals>=1.1.0 (from altair[all])
  Downloading narwhals-1.4.1-py3-none-any.whl.metadata (5.5 kB)
Collecting altair-tiles>=0.3.0 (from altair[all])
  Downloading altair_tiles-0.3.0-py3-none-any.whl.metadata (2.7 kB)
Collecting anywidget>=0.9.0 (from altair[all])
  Downloading anywidget-0.9.13-py3-none-any.whl.metadata (7.2 kB)
Collecting vega-datasets>=0.9.0 (from altair[all])
  Downloading vega_datasets-0.9.0-py3-none-any.whl.metadata (5.5 kB)
Collecting vegafusion>=1.6.6 (from vegafusion[embed]>=1.6.6; extra == "all"->altair[all])
  Downloading vegafusion-1.6.9-py3-none-any.whl.metadata (1.3 kB)
Collecting vl-convert-python>=1.6.0 (from altair[all])
  Downloading vl_convert_python-1.6.0-cp37-abi3-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting mercantile (from altair-tiles>=0.3.0->altair[all])
  Downloading mercantile-1.2.1-py3-none-any.whl.metadata (4.8 kB)
Collecting xyzservices (fro

In [3]:
import altair as alt

alt.data_transformers.disable_max_rows()
# sampled_data = df_clean.sample(n=5000, random_state=1)
# sampled_data['timestamp'] = pd.to_datetime(sampled_data['timestamp'])


DataTransformerRegistry.enable('default')

In [4]:
line_chart = alt.Chart(df_clean).mark_line().encode(
    x='timestamp:T',
    y='unit_sales:Q'
).properties(
    title='Unit Sales Over Time'
)
line_chart.show()

In [5]:
scatter_plot = alt.Chart(df_clean).mark_circle(size=60).encode(
    x='scaled_price:Q',
    y='unit_sales:Q',
    color='product_subcategory:N'
).properties(
    title='Unit Sales vs Scaled Price'
)

scatter_plot.show()

In [13]:
data = df_clean.sample(n=5000, random_state=1)
heatmap_colored = alt.Chart(data).mark_rect().encode(
    x='timestamp:T',
    y='product_code:N',
    color=alt.Color('unit_sales:Q', scale=alt.Scale(scheme='viridis'))
).properties(
    title='Unit Sales Heatmap with Enhanced Colors'
)

heatmap_colored.show()