## Python绘制树状图

In [1]:
import pandas as pd
import json
from pyecharts import options as opts
from pyecharts.charts import Tree

### 1. 读取广告转化数据集

In [2]:
# 数据源：https://www.kaggle.com/loveall/clicks-conversion-tracking/data
# 广告转化数据集：Ad Conversions Data
df = pd.read_csv("./datas/KAG_conversion_data.csv")

In [3]:
df.head(3)

Unnamed: 0,ad_id,xyz_campaign_id,fb_campaign_id,age,gender,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion
0,708746,916,103916,30-34,M,15,7350,1,1.43,2,1
1,708749,916,103917,30-34,M,16,17861,2,1.82,2,0
2,708771,916,103920,30-34,M,20,693,0,0.0,1,0


In [4]:
# 数据处理
df["total"] = "全量"
df["xyz_campaign_id"] = df["xyz_campaign_id"].map(lambda x : f"c_{x}")
df["gender"] = df["gender"].map(lambda x : f"g_{x}")
df["age"] = df["age"].map(lambda x : f"a_{x}")

In [5]:
df.head(3)

Unnamed: 0,ad_id,xyz_campaign_id,fb_campaign_id,age,gender,interest,Impressions,Clicks,Spent,Total_Conversion,Approved_Conversion,total
0,708746,c_916,103916,a_30-34,g_M,15,7350,1,1.43,2,1,全量
1,708749,c_916,103917,a_30-34,g_M,16,17861,2,1.82,2,0,全量
2,708771,c_916,103920,a_30-34,g_M,20,693,0,0.0,1,0,全量


### 2. 准备数据：全量 > 活动 > 性别 > 年龄 四级树

In [6]:
# 数据统计
df_agg = df.groupby(["total", "xyz_campaign_id", "gender", "age"]).size().rename("size").reset_index()
df_agg.head(3)

Unnamed: 0,total,xyz_campaign_id,gender,age,size
0,全量,c_1178,g_F,a_30-34,89
1,全量,c_1178,g_F,a_35-39,57
2,全量,c_1178,g_F,a_40-44,60


In [7]:
data = {}
data["name"] = "全量"
data["children"] = []

for campaign in df_agg["xyz_campaign_id"].unique():
    campaign_node = {}
    campaign_node["name"] = campaign
    campaign_node["children"] = []
    
    for gender in df_agg[df_agg["xyz_campaign_id"]==campaign]["gender"].unique():
        gender_node = {}
        gender_node["name"] = gender
        gender_node["children"] = []
        
        age_datalist = (
            df_agg[
                (df_agg["xyz_campaign_id"]==campaign)
                & (df_agg["gender"]==gender)
            ][["age", "size"]]
        )
        
        for age,size in age_datalist.values:
            age_node = {}
            age_node["name"] = age
            # 叶子结点变成了value
            age_node["value"] = size
            gender_node["children"].append(age_node)
        
        campaign_node["children"].append(gender_node)
    
    data["children"].append(campaign_node)

print(json.dumps(data))

{"name": "\u5168\u91cf", "children": [{"name": "c_1178", "children": [{"name": "g_F", "children": [{"name": "a_30-34", "value": 89}, {"name": "a_35-39", "value": 57}, {"name": "a_40-44", "value": 60}, {"name": "a_45-49", "value": 70}]}, {"name": "g_M", "children": [{"name": "a_30-34", "value": 112}, {"name": "a_35-39", "value": 90}, {"name": "a_40-44", "value": 69}, {"name": "a_45-49", "value": 78}]}]}, {"name": "c_916", "children": [{"name": "g_F", "children": [{"name": "a_30-34", "value": 11}, {"name": "a_35-39", "value": 3}, {"name": "a_40-44", "value": 1}, {"name": "a_45-49", "value": 4}]}, {"name": "g_M", "children": [{"name": "a_30-34", "value": 18}, {"name": "a_35-39", "value": 9}, {"name": "a_40-44", "value": 5}, {"name": "a_45-49", "value": 3}]}]}, {"name": "c_936", "children": [{"name": "g_F", "children": [{"name": "a_30-34", "value": 97}, {"name": "a_35-39", "value": 49}, {"name": "a_40-44", "value": 46}, {"name": "a_45-49", "value": 64}]}, {"name": "g_M", "children": [{"nam

In [10]:
tree = (
    Tree()
    .add(
        "",
        [data],
        # 参数可以有：取值分别为 'LR' , 'RL', 'TB', 'BT'
        orient="LR",
        # layout：orthogonal，radial，如果是radial则是圆环发散图
        #layout="radial",
        label_opts=opts.LabelOpts(
            position="top",
            horizontal_align="right",
            vertical_align="middle",
        ),
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="广告流量图", pos_left="center"))
)
tree.render_notebook()