In [20]:
# 主要ライブラリのimport
import pandas as pd
import numpy as np

#ファイル出力時にパスを指定する際によく使う
import os
import pathlib

# ailab_tools → ailabで作っている各種ツール群
# impala、GSheet、GMail等の操作が簡単にできる
# ドキュメントは下記
# https://s3-ap-northeast-1.amazonaws.com/ailab-smn-valis/ailab-tools/docs/html/index.html

from ailab_tools.smn import ImpalaResource
from ailab_tools.utils import GMail, GSheet

#グラフをnotebook上で描画するためのおまじない
%matplotlib inline

In [None]:
# Impala接続情報 hc5
ircfg = {
    "hosts": ['172.16.60.117'],
    "port": 21050,
    "user": 'vmspool',
    "request_pool": 'adhoc_dm01_pool',
    "httpfs_host": '172.16.60.42',
    "httpfs_port": 14000
}

# SSP共通案件に限定してSSP比較

In [22]:
# 抽出期間指定（前月1日～前月末日）
start_at = "2025-05-01"
end_at = "2025-05-31"

## A.0 共通案件の抽出

In [23]:
# 除外リスト（広告主✖️SSP✖️OS(is_app)でCTV1が1000を超えるものを対象）
exclude_ads_id = "23626,24377,8139,24580,15701,23336,16763,8426,5930"

In [24]:
created_table_name = "dm_tmp.monthly_scheduled_batch_3"

### 中間テーブル作成

In [None]:
query = (
f"""
create table {created_table_name} as 
select 
    ads.child_ssp_id,
    s.name ssp,
    ads.advertiser_id,
    advertiser_name,
    case
        when user_agent_os_family in('iOS','Android') then 'SP'
        else 'PC'
    end os,
    case
        when is_app is null then 0
        else is_app
    end is_app,
    case
        when creative_type in(5,6) then 'native'
        when creative_type in(10) then 'video'
        else 'display'
    end creative,
    sum(imp) imp
from dm.domain_ads ads
    inner join dm.hierarchies hie on ads.target_id = hie.target_id
    inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
    inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
    inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
    left join console.ssp s on ads.child_ssp_id=s.id
where
    concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
    and ads.advertiser_id not in({exclude_ads_id})
    and child_ssp_id>0
group by 1,2,3,4,5,6,7
"""
)
with ImpalaResource(**ircfg) as ir:
    table = ir.sql_to_pandas(query)
    
table

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,summary
0,Inserted 37538 row(s)


In [100]:
# 対象
os = 'PC'
is_app = '0'
creative = 'display'

In [101]:
query = (
f"""
select
    tmp.advertiser_id,
    tmp.advertiser_name,
    tmp.child_ssp_id,
    tmp.ssp,
    SUM(imp)
from
    {created_table_name} tmp
WHERE
    tmp.os IN ('{os}')
    AND tmp.is_app IN ({is_app})
    AND tmp.creative IN ('{creative}')
GROUP BY
    1,2,3,4
ORDER BY
    1,2,3,4
;
"""
)
with ImpalaResource(**ircfg) as ir:
    df = ir.sql_to_pandas(query)
    
df

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,advertiser_id,advertiser_name,child_ssp_id,ssp,sum(imp)
0,46,富士通株式会社,1,Google,1499122
1,46,富士通株式会社,3,OpenX,60364
2,46,富士通株式会社,10,PubMatic,32331
3,46,富士通株式会社,11,Microsoft SSP MSN（旧Xandr）,2169846
4,46,富士通株式会社,13,COMPASS,41046
...,...,...,...,...,...
9153,25676,(S)株式会社バイン(バインテック),28,Geniee,112
9154,25676,(S)株式会社バイン(バインテック),31,Logicad for Publishers,7685
9155,25676,(S)株式会社バイン(バインテック),32,GMOSSP,204
9156,25676,(S)株式会社バイン(バインテック),41,LFP プラス,2921


In [102]:
# 閾値は下記の通り
thresholds = [10000, 5000, 3000, 1000]

# 変数の初期値
result = []
total_unique_ssp = 0
target_advertiser_id_list = []
advertiser_dict = {}
is_break = False

for threshold in thresholds:
    # impの合計値が一つでも閾値を超えていたら対象とする
    filtered_threshold_df = df[df['sum(imp)'] > threshold]

    # 1. 対象のSSPのユニーク数
    total_unique_ssp = filtered_threshold_df['ssp'].nunique()

    # 2. 広告主ID、SSPのユニーク数、child_ssp_idでグルーピング
    grouped_df = filtered_threshold_df.groupby('advertiser_id').agg(
        num_unique_ssp=('ssp', 'nunique'),
        child_ssp_ids=('child_ssp_id', lambda x: ','.join(map(str, x)))
    ).reset_index()

    # 3. imp数が設定した閾値の80%を超えている広告主IDを取得
    result_df = grouped_df[grouped_df['num_unique_ssp'] >= int(0.8 * total_unique_ssp)]

    # 広告主IDリストを閾値毎に管理
    result = result_df.to_dict(orient='records')
    target_advertiser_id_list = [row['advertiser_id'] for row in result]
    advertiser_dict[threshold] = target_advertiser_id_list
    
    # 閾値と広告主IDの個数
    print(f"upper {threshold}: {len(target_advertiser_id_list)} advertiser_id_num")
    print(f"threshold: upper {int(0.8 * total_unique_ssp)}\n")

    # 条件を満たしていたら抜ける
    if len(target_advertiser_id_list) >= 20 and target_advertiser_id_list:
        is_break = True
        break


pc_display_advertiser_id = ""
if not target_advertiser_id_list:
    print("No pc_native_advertiser_id.")
else:
    # 閾値を超えるものが見つからなかった場合、広告主IDが最も多いリストを対象とする
    if not is_break:
        max_key_value_pair = max(advertiser_dict.items(), key=lambda item: len(item[1]))
        threshold = max_key_value_pair[0]
        target_advertiser_id_list = max_key_value_pair[1]
    # 配列を文字列に変換
    pc_display_advertiser_id = ",".join(map(str, target_advertiser_id_list))

# Print the results
print(f"Final threshold value for sum(imp): {threshold}")
print("\nList of pc_display_advertiser_id:", pc_display_advertiser_id)

upper 10000: 55 ids
threshold: upper 12

Final threshold value for sum(imp): 10000

List of pc_display_advertiser_id: 46,867,1373,3066,4264,7151,7294,7694,7783,7983,9131,9700,10919,11772,14508,18600,18937,19174,19242,22532,22625,22672,22693,22747,22808,22903,23059,23278,23406,23704,23711,23786,24027,24126,24217,24355,24624,24632,24652,24866,24969,24991,25228,25281,25287,25321,25334,25342,25346,25427,25434,25438,25450,25469,25506


In [105]:
# 対象
os = 'PC'
is_app = '0'
creative = 'native'

In [106]:
query = (
f"""
select
    tmp.advertiser_id,
    tmp.advertiser_name,
    tmp.child_ssp_id,
    tmp.ssp,
    SUM(imp)
from
    {created_table_name} tmp
WHERE
    tmp.os IN ('{os}')
    AND tmp.is_app IN ({is_app})
    AND tmp.creative IN ('{creative}')
GROUP BY
    1,2,3,4
ORDER BY
    1,2,3,4
;
"""
)
with ImpalaResource(**ircfg) as ir:
    df = ir.sql_to_pandas(query)
    
df

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,advertiser_id,advertiser_name,child_ssp_id,ssp,sum(imp)
0,46,富士通株式会社,1,Google,54101
1,46,富士通株式会社,11,Microsoft SSP MSN（旧Xandr）,3177
2,46,富士通株式会社,13,COMPASS,1157
3,46,富士通株式会社,14,AdStir,2
4,46,富士通株式会社,17,AdGeneration,50
...,...,...,...,...,...
2005,25621,(S)埼玉県浦和競馬組合,32,GMOSSP,703
2006,25621,(S)埼玉県浦和競馬組合,34,【取引停止】popIn,6399
2007,25626,(S)弁護士法人永和総合法律事務所（Logicad）,30,Outbrain,0
2008,25626,(S)弁護士法人永和総合法律事務所（Logicad）,44,Allox,1


In [107]:
# 閾値は下記の通り
thresholds = [10000, 5000, 3000, 1000]

# 変数の初期値
result = []
total_unique_ssp = 0
target_advertiser_id_list = []
advertiser_dict = {}
is_break = False

for threshold in thresholds:
    # impの合計値が一つでも閾値を超えていたら対象とする
    filtered_threshold_df = df[df['sum(imp)'] > threshold]

    # 1. 対象のSSPのユニーク数
    total_unique_ssp = filtered_threshold_df['ssp'].nunique()

    # 2. 広告主ID、SSPのユニーク数、child_ssp_idでグルーピング
    grouped_df = filtered_threshold_df.groupby('advertiser_id').agg(
        num_unique_ssp=('ssp', 'nunique'),
        child_ssp_ids=('child_ssp_id', lambda x: ','.join(map(str, x)))
    ).reset_index()

    # 3. imp数が設定した閾値の80%を超えている広告主IDを取得
    result_df = grouped_df[grouped_df['num_unique_ssp'] >= int(0.8 * total_unique_ssp)]

    # 広告主IDリストを閾値毎に管理
    result = result_df.to_dict(orient='records')
    target_advertiser_id_list = [row['advertiser_id'] for row in result]
    advertiser_dict[threshold] = target_advertiser_id_list
    
    # 閾値と広告主IDの個数
    print(f"upper {threshold}: {len(target_advertiser_id_list)} advertiser_id_num")
    print(f"threshold: upper {int(0.8 * total_unique_ssp)}\n")

    # 条件を満たしていたら抜ける
    if len(target_advertiser_id_list) >= 20 and target_advertiser_id_list:
        is_break = True
        break


pc_native_advertiser_id = ""
if not target_advertiser_id_list:
    print("No pc_native_advertiser_id.")
else:
    # 閾値を超えるものが見つからなかった場合、広告主IDが最も多いリストを対象とする
    if not is_break:
        max_key_value_pair = max(advertiser_dict.items(), key=lambda item: len(item[1]))
        threshold = max_key_value_pair[0]
        target_advertiser_id_list = max_key_value_pair[1]
    # 配列を文字列に変換
    pc_native_advertiser_id = ",".join(map(str, target_advertiser_id_list))

# Print the results
print(f"Final threshold value for sum(imp): {threshold}")
print("\nList of pc_native_advertiser_id:", pc_native_advertiser_id)

upper 10000: 18 ids
threshold: upper 4

upper 5000: 14 ids
threshold: upper 5

upper 3000: 22 ids
threshold: upper 5

Final threshold value for sum(imp): 3000

List of pc_native_advertiser_id: 46,3066,5900,7294,11092,16006,18600,19363,19552,19640,20870,22233,22532,22672,22747,23278,23786,24632,24730,24866,25213,25434


In [108]:
# 対象
os = 'SP'
is_app = '0'
creative = 'display'

In [109]:
query = (
f"""
select
    tmp.advertiser_id,
    tmp.advertiser_name,
    tmp.child_ssp_id,
    tmp.ssp,
    SUM(imp)
from
    {created_table_name} tmp
WHERE
    tmp.os IN ('{os}')
    AND tmp.is_app IN ({is_app})
    AND tmp.creative IN ('{creative}')
GROUP BY
    1,2,3,4
ORDER BY
    1,2,3,4
;
"""
)
with ImpalaResource(**ircfg) as ir:
    df = ir.sql_to_pandas(query)
    
df

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,advertiser_id,advertiser_name,child_ssp_id,ssp,sum(imp)
0,46,富士通株式会社,1,Google,1
1,46,富士通株式会社,17,AdGeneration,1
2,53,スカパーJSAT株式会社,1,Google,856731
3,53,スカパーJSAT株式会社,3,OpenX,148621
4,53,スカパーJSAT株式会社,10,PubMatic,343595
...,...,...,...,...,...
14066,25676,(S)株式会社バイン(バインテック),31,Logicad for Publishers,10750
14067,25676,(S)株式会社バイン(バインテック),32,GMOSSP,4609
14068,25676,(S)株式会社バイン(バインテック),41,LFP プラス,5370
14069,25676,(S)株式会社バイン(バインテック),42,AJA,4164


In [110]:
# 閾値は下記の通り
thresholds = [10000, 5000, 3000, 1000]

# 変数の初期値
result = []
total_unique_ssp = 0
target_advertiser_id_list = []
advertiser_dict = {}
is_break = False

for threshold in thresholds:
    # impの合計値が一つでも閾値を超えていたら対象とする
    filtered_threshold_df = df[df['sum(imp)'] > threshold]

    # 1. 対象のSSPのユニーク数
    total_unique_ssp = filtered_threshold_df['ssp'].nunique()

    # 2. 広告主ID、SSPのユニーク数、child_ssp_idでグルーピング
    grouped_df = filtered_threshold_df.groupby('advertiser_id').agg(
        num_unique_ssp=('ssp', 'nunique'),
        child_ssp_ids=('child_ssp_id', lambda x: ','.join(map(str, x)))
    ).reset_index()

    # 3. imp数が設定した閾値の80%を超えている広告主IDを取得
    result_df = grouped_df[grouped_df['num_unique_ssp'] >= int(0.8 * total_unique_ssp)]

    # 広告主IDリストを閾値毎に管理
    result = result_df.to_dict(orient='records')
    target_advertiser_id_list = [row['advertiser_id'] for row in result]
    advertiser_dict[threshold] = target_advertiser_id_list
    
    # 閾値と広告主IDの個数
    print(f"upper {threshold}: {len(target_advertiser_id_list)} advertiser_id_num")
    print(f"threshold: upper {int(0.8 * total_unique_ssp)}\n")

    # 条件を満たしていたら抜ける
    if len(target_advertiser_id_list) >= 20 and target_advertiser_id_list:
        is_break = True
        break


spweb_display_advertiser_id = ""
if not target_advertiser_id_list:
    print("No pc_native_advertiser_id.")
else:
    # 閾値を超えるものが見つからなかった場合、広告主IDが最も多いリストを対象とする
    if not is_break:
        max_key_value_pair = max(advertiser_dict.items(), key=lambda item: len(item[1]))
        threshold = max_key_value_pair[0]
        target_advertiser_id_list = max_key_value_pair[1]
    # 配列を文字列に変換
    spweb_display_advertiser_id = ",".join(map(str, target_advertiser_id_list))

# Print the results
print(f"Final threshold value for sum(imp): {threshold}")
print("\nList of spweb_display_advertiser_id:", spweb_display_advertiser_id)

upper 10000: 247 ids
threshold: upper 14

Final threshold value for sum(imp): 10000

List of spweb_display_advertiser_id: 53,1021,1782,2663,2763,3066,3629,4264,4576,4996,5351,5374,7078,7151,7294,7783,7821,7983,8178,8595,9324,9662,9676,9700,10156,10215,10467,10632,10847,10919,12345,12597,12766,13104,13195,13471,13837,14398,14422,15372,15838,15941,16483,16737,16756,16843,17243,17278,17424,17579,17746,17800,17832,17989,18033,18110,18792,18880,18937,18958,18989,19033,19052,19171,19174,19397,19489,19532,19539,19544,19551,19552,19574,19679,19906,19951,20746,20832,20870,20904,20909,20968,20970,20974,21495,21588,21736,21765,21912,21964,21971,21985,22069,22187,22191,22273,22315,22316,22394,22397,22466,22524,22532,22558,22631,22672,22693,22718,22747,22955,22964,23028,23038,23059,23155,23180,23191,23194,23202,23253,23275,23278,23282,23315,23356,23364,23372,23401,23406,23414,23415,23419,23435,23437,23450,23454,23628,23698,23704,23711,23749,23786,23800,23805,23843,23844,23878,23903,23960,23980,2402

In [111]:
# 対象
os = 'SP'
is_app = '0'
creative = 'native'

In [112]:
query = (
f"""
select
    tmp.advertiser_id,
    tmp.advertiser_name,
    tmp.child_ssp_id,
    tmp.ssp,
    SUM(imp)
from
    {created_table_name} tmp
WHERE
    tmp.os IN ('{os}')
    AND tmp.is_app IN ({is_app})
    AND tmp.creative IN ('{creative}')
GROUP BY
    1,2,3,4
ORDER BY
    1,2,3,4
;
"""
)
with ImpalaResource(**ircfg) as ir:
    df = ir.sql_to_pandas(query)
    
df

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,advertiser_id,advertiser_name,child_ssp_id,ssp,sum(imp)
0,46,富士通株式会社,11,Microsoft SSP MSN（旧Xandr）,0
1,146,株式会社エデュケーショナルネットワーク,1,Google,6288
2,146,株式会社エデュケーショナルネットワーク,11,Microsoft SSP MSN（旧Xandr）,319
3,146,株式会社エデュケーショナルネットワーク,13,COMPASS,383
4,146,株式会社エデュケーショナルネットワーク,14,AdStir,43
...,...,...,...,...,...
4262,25657,(S)Rapport(武田光一の天上宝命術),13,COMPASS,5595
4263,25657,(S)Rapport(武田光一の天上宝命術),14,AdStir,72
4264,25657,(S)Rapport(武田光一の天上宝命術),20,fluct,19842
4265,25657,(S)Rapport(武田光一の天上宝命術),27,ProFit-X,574


In [113]:
# 閾値は下記の通り
thresholds = [10000, 5000, 3000, 1000]

# 変数の初期値
result = []
total_unique_ssp = 0
target_advertiser_id_list = []
advertiser_dict = {}
is_break = False

for threshold in thresholds:
    # impの合計値が一つでも閾値を超えていたら対象とする
    filtered_threshold_df = df[df['sum(imp)'] > threshold]

    # 1. 対象のSSPのユニーク数
    total_unique_ssp = filtered_threshold_df['ssp'].nunique()

    # 2. 広告主ID、SSPのユニーク数、child_ssp_idでグルーピング
    grouped_df = filtered_threshold_df.groupby('advertiser_id').agg(
        num_unique_ssp=('ssp', 'nunique'),
        child_ssp_ids=('child_ssp_id', lambda x: ','.join(map(str, x)))
    ).reset_index()

    # 3. imp数が設定した閾値の80%を超えている広告主IDを取得
    result_df = grouped_df[grouped_df['num_unique_ssp'] >= int(0.8 * total_unique_ssp)]

    # 広告主IDリストを閾値毎に管理
    result = result_df.to_dict(orient='records')
    target_advertiser_id_list = [row['advertiser_id'] for row in result]
    advertiser_dict[threshold] = target_advertiser_id_list
    
    # 閾値と広告主IDの個数
    print(f"upper {threshold}: {len(target_advertiser_id_list)} advertiser_id_num")
    print(f"threshold: upper {int(0.8 * total_unique_ssp)}\n")

    # 条件を満たしていたら抜ける
    if len(target_advertiser_id_list) >= 20 and target_advertiser_id_list:
        is_break = True
        break


spweb_native_advertiser_id = ""
if not target_advertiser_id_list:
    print("No pc_native_advertiser_id.")
else:
    # 閾値を超えるものが見つからなかった場合、広告主IDが最も多いリストを対象とする
    if not is_break:
        max_key_value_pair = max(advertiser_dict.items(), key=lambda item: len(item[1]))
        threshold = max_key_value_pair[0]
        target_advertiser_id_list = max_key_value_pair[1]
    # 配列を文字列に変換
    spweb_native_advertiser_id = ",".join(map(str, target_advertiser_id_list))

# Print the results
print(f"Final threshold value for sum(imp): {threshold}")
print("\nList of spweb_native_advertiser_id:", spweb_native_advertiser_id)

upper 10000: 17 ids
threshold: upper 9

upper 5000: 34 ids
threshold: upper 9

Final threshold value for sum(imp): 5000

List of spweb_native_advertiser_id: 3066,4264,5900,13471,15838,17579,17646,18294,18861,19481,19552,19906,20870,20909,22233,22524,22532,22558,22672,23028,23191,23315,23437,23843,23878,23903,24499,24627,24632,24911,25200,25256,25259,25386


In [114]:
# 対象
os = 'SP'
is_app = '1'
creative = 'display'

In [115]:
query = (
f"""
select
    tmp.advertiser_id,
    tmp.advertiser_name,
    tmp.child_ssp_id,
    tmp.ssp,
    SUM(imp)
from
    {created_table_name} tmp
WHERE
    tmp.os IN ('{os}')
    AND tmp.is_app IN ({is_app})
    AND tmp.creative IN ('{creative}')
GROUP BY
    1,2,3,4
ORDER BY
    1,2,3,4
;
"""
)
with ImpalaResource(**ircfg) as ir:
    df = ir.sql_to_pandas(query)
    
df

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,advertiser_id,advertiser_name,child_ssp_id,ssp,sum(imp)
0,53,スカパーJSAT株式会社,1,Google,155970
1,53,スカパーJSAT株式会社,3,OpenX,48483
2,53,スカパーJSAT株式会社,14,AdStir,474
3,53,スカパーJSAT株式会社,16,Magnite,197499
4,53,スカパーJSAT株式会社,17,AdGeneration,94109
...,...,...,...,...,...
4826,25657,(S)Rapport(武田光一の天上宝命術),14,AdStir,12
4827,25657,(S)Rapport(武田光一の天上宝命術),16,Magnite,15281
4828,25657,(S)Rapport(武田光一の天上宝命術),20,fluct,2923
4829,25657,(S)Rapport(武田光一の天上宝命術),27,ProFit-X,214


In [117]:
# 閾値は下記の通り
thresholds = [10000, 5000, 3000, 1000]

# 変数の初期値
result = []
total_unique_ssp = 0
target_advertiser_id_list = []
advertiser_dict = {}
is_break = False

for threshold in thresholds:
    # impの合計値が一つでも閾値を超えていたら対象とする
    filtered_threshold_df = df[df['sum(imp)'] > threshold]

    # 1. 対象のSSPのユニーク数
    total_unique_ssp = filtered_threshold_df['ssp'].nunique()

    # 2. 広告主ID、SSPのユニーク数、child_ssp_idでグルーピング
    grouped_df = filtered_threshold_df.groupby('advertiser_id').agg(
        num_unique_ssp=('ssp', 'nunique'),
        child_ssp_ids=('child_ssp_id', lambda x: ','.join(map(str, x)))
    ).reset_index()

    # 3. imp数が設定した閾値の80%を超えている広告主IDを取得
    result_df = grouped_df[grouped_df['num_unique_ssp'] >= int(0.8 * total_unique_ssp)]

    # 広告主IDリストを閾値毎に管理
    result = result_df.to_dict(orient='records')
    target_advertiser_id_list = [row['advertiser_id'] for row in result]
    advertiser_dict[threshold] = target_advertiser_id_list
    
    # 閾値と広告主IDの個数
    print(f"upper {threshold}: {len(target_advertiser_id_list)} advertiser_id_num")
    print(f"threshold: upper {int(0.8 * total_unique_ssp)}\n")

    # 条件を満たしていたら抜ける
    if len(target_advertiser_id_list) >= 20 and target_advertiser_id_list:
        is_break = True
        break


spapp_display_advertiser_id = ""
if not target_advertiser_id_list:
    print("No pc_native_advertiser_id.")
else:
    # 閾値を超えるものが見つからなかった場合、広告主IDが最も多いリストを対象とする
    if not is_break:
        max_key_value_pair = max(advertiser_dict.items(), key=lambda item: len(item[1]))
        threshold = max_key_value_pair[0]
        target_advertiser_id_list = max_key_value_pair[1]
    # 配列を文字列に変換
    spapp_display_advertiser_id = ",".join(map(str, target_advertiser_id_list))

# Print the results
print(f"Final threshold value for sum(imp): {threshold}")
print("\nList of spapp_display_advertiser_id:", spapp_display_advertiser_id)

upper 10000: 43 ids
threshold: upper 6

Final threshold value for sum(imp): 10000

List of spapp_display_advertiser_id: 53,3066,4576,6862,9700,10847,13471,15372,15838,15941,18110,18792,18937,19052,19174,19574,20870,20909,20974,21495,22187,22394,22532,22558,22693,22964,23182,23194,23202,23278,23704,24126,24245,24500,24585,24768,25200,25213,25259,25300,25321,25422,25438


In [118]:
# 対象
os = 'SP'
is_app = '1'
creative = 'native'

In [119]:
query = (
f"""
select
    tmp.advertiser_id,
    tmp.advertiser_name,
    tmp.child_ssp_id,
    tmp.ssp,
    SUM(imp)
from
    {created_table_name} tmp
WHERE
    tmp.os IN ('{os}')
    AND tmp.is_app IN ({is_app})
    AND tmp.creative IN ('{creative}')
GROUP BY
    1,2,3,4
ORDER BY
    1,2,3,4
;
"""
)
with ImpalaResource(**ircfg) as ir:
    df = ir.sql_to_pandas(query)
    
df

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,advertiser_id,advertiser_name,child_ssp_id,ssp,sum(imp)
0,146,株式会社エデュケーショナルネットワーク,1,Google,747
1,146,株式会社エデュケーショナルネットワーク,17,AdGeneration,168
2,146,株式会社エデュケーショナルネットワーク,39,PubNative,1042
3,146,株式会社エデュケーショナルネットワーク,44,Allox,38
4,1738,(S)アサヒカルピスウェルネス株式会社,1,Google,57
...,...,...,...,...,...
1667,25657,(S)Rapport(武田光一の天上宝命術),1,Google,3841
1668,25657,(S)Rapport(武田光一の天上宝命術),20,fluct,15
1669,25657,(S)Rapport(武田光一の天上宝命術),27,ProFit-X,4
1670,25657,(S)Rapport(武田光一の天上宝命術),39,PubNative,17511


In [120]:
# 閾値は下記の通り
thresholds = [10000, 5000, 3000, 1000]

# 変数の初期値
result = []
total_unique_ssp = 0
target_advertiser_id_list = []
advertiser_dict = {}
is_break = False

for threshold in thresholds:
    # impの合計値が一つでも閾値を超えていたら対象とする
    filtered_threshold_df = df[df['sum(imp)'] > threshold]

    # 1. 対象のSSPのユニーク数
    total_unique_ssp = filtered_threshold_df['ssp'].nunique()

    # 2. 広告主ID、SSPのユニーク数、child_ssp_idでグルーピング
    grouped_df = filtered_threshold_df.groupby('advertiser_id').agg(
        num_unique_ssp=('ssp', 'nunique'),
        child_ssp_ids=('child_ssp_id', lambda x: ','.join(map(str, x)))
    ).reset_index()

    # 3. imp数が設定した閾値の80%を超えている広告主IDを取得
    result_df = grouped_df[grouped_df['num_unique_ssp'] >= int(0.8 * total_unique_ssp)]

    # 広告主IDリストを閾値毎に管理
    result = result_df.to_dict(orient='records')
    target_advertiser_id_list = [row['advertiser_id'] for row in result]
    advertiser_dict[threshold] = target_advertiser_id_list
    
    # 閾値と広告主IDの個数
    print(f"upper {threshold}: {len(target_advertiser_id_list)} advertiser_id_num")
    print(f"threshold: upper {int(0.8 * total_unique_ssp)}\n")

    # 条件を満たしていたら抜ける
    if len(target_advertiser_id_list) >= 20 and target_advertiser_id_list:
        is_break = True
        break


spapp_native_advertiser_id = ""
if not target_advertiser_id_list:
    print("No pc_native_advertiser_id.")
else:
    # 閾値を超えるものが見つからなかった場合、広告主IDが最も多いリストを対象とする
    if not is_break:
        max_key_value_pair = max(advertiser_dict.items(), key=lambda item: len(item[1]))
        threshold = max_key_value_pair[0]
        target_advertiser_id_list = max_key_value_pair[1]
    # 配列を文字列に変換
    spapp_native_advertiser_id = ",".join(map(str, target_advertiser_id_list))

# Print the results
print(f"Final threshold value for sum(imp): {threshold}")
print("\nList of spapp_native_advertiser_id:", spapp_native_advertiser_id)

upper 10000: 5 ids
threshold: upper 4

upper 5000: 7 ids
threshold: upper 4

upper 3000: 12 ids
threshold: upper 4

upper 1000: 4 ids
threshold: upper 5

Final threshold value for sum(imp): 3000

List of spapp_native_advertiser_id: 4264,19551,22394,22532,22558,22693,23191,23878,24499,24627,24911,25438


## A1.SSP全体比較

In [121]:
# pc_display
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
	sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
	sum(logicad_request_id_1_sum_native_plcmtcnt_synced) logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(bidden) bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
sum(logicad_request_id_1_sum_native_plcmtcnt_synced)logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_req
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and native_request=false and video_request=false
group by 1,2,3,4,5

union all

select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({pc_display_advertiser_id})
group by 1,2,3,4,5

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({pc_display_advertiser_id})
group by 1,2,3,4,5) a
left join console.ssp s on s.id=a.child_ssp_id
group by  1,2,3,4,5,6
"""
)
with ImpalaResource(**ircfg) as ir:
    data_1_pc_display = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [122]:
# pc_native
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
	sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
	sum(logicad_request_id_1_sum_native_plcmtcnt_synced) logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
sum(logicad_request_id_1_sum_native_plcmtcnt_synced)logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_req
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and native_request=true and video_request=false
group by 1,2,3,4,5

union all

select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({pc_native_advertiser_id})
group by 1,2,3,4,5

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({pc_native_advertiser_id})
group by 1,2,3,4,5) a
left join console.ssp s on s.id=a.child_ssp_id
group by  1,2,3,4,5,6
"""
)
with ImpalaResource(**ircfg) as ir:
    data_1_pc_native = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [123]:
# spweb_display
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
	sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
	sum(logicad_request_id_1_sum_native_plcmtcnt_synced) logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
sum(logicad_request_id_1_sum_native_plcmtcnt_synced)logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_req
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and native_request=false and video_request=false
group by 1,2,3,4,5

union all

select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spweb_display_advertiser_id})
group by 1,2,3,4,5

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spweb_display_advertiser_id})
group by 1,2,3,4,5) a
left join console.ssp s on s.id=a.child_ssp_id
group by  1,2,3,4,5,6
"""
)
with ImpalaResource(**ircfg) as ir:
    data_1_spweb_display = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [124]:
# spweb_native
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
	sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
	sum(logicad_request_id_1_sum_native_plcmtcnt_synced) logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
sum(logicad_request_id_1_sum_native_plcmtcnt_synced)logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_req
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and native_request=true and video_request=false
group by 1,2,3,4,5

union all

select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({spweb_native_advertiser_id})
group by 1,2,3,4,5

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({spweb_native_advertiser_id})
group by 1,2,3,4,5) a
left join console.ssp s on s.id=a.child_ssp_id
group by  1,2,3,4,5,6
"""
)
with ImpalaResource(**ircfg) as ir:
    data_1_spweb_native = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [125]:
# spapp_display
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
	sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
	sum(logicad_request_id_1_sum_native_plcmtcnt_synced) logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
sum(logicad_request_id_1_sum_native_plcmtcnt_synced)logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_req
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and native_request=false and video_request=false
group by 1,2,3,4,5

union all

select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spapp_display_advertiser_id})
group by 1,2,3,4,5

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spapp_display_advertiser_id})
group by 1,2,3,4,5) a
left join console.ssp s on s.id=a.child_ssp_id
group by  1,2,3,4,5,6
"""
)
with ImpalaResource(**ircfg) as ir:
    data_1_spapp_display = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [126]:
# spapp_native
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
	sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
	sum(logicad_request_id_1_sum_native_plcmtcnt_synced) logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
sum(logicad_request_id_1_count_synced) logicad_request_id_1_count_synced,
sum(logicad_request_id_1_sum_native_plcmtcnt_synced)logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_req
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and native_request=true and video_request=false
group by 1,2,3,4,5

union all

select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type in(5,6)
and ads.advertiser_id in ({spapp_native_advertiser_id})
group by 1,2,3,4,5

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
	0 logicad_request_id_1_count_synced,
	0 logicad_request_id_1_sum_native_plcmtcnt_synced,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type in(5,6)
and ads.advertiser_id in ({spapp_native_advertiser_id})
group by 1,2,3,4,5) a
left join console.ssp s on s.id=a.child_ssp_id
group by  1,2,3,4,5,6
"""
)
with ImpalaResource(**ircfg) as ir:
    data_1_spapp_native = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [130]:
data_a_1=pd.concat([data_1_pc_display,data_1_pc_native,data_1_spweb_display,data_1_spweb_native,data_1_spapp_display,data_1_spapp_native])

In [131]:
# path定義
parent_path = pathlib.Path("__file__").resolve().parent
file_path = parent_path / "data"/ f"SSP_limited_adv__{start_at}_{end_at}_a_1.tsv"

data_a_1.to_csv(file_path, index=False, sep="\t")

## A4.商材、産業カテゴリ別SSP比較

In [132]:
# pc_display
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
advertiser_id,
advertiser_name,
case
when hie.private_brand_id = 107 then "7:TVBridge"
when hie.ad_type =5 then '6:DOOH'
when hie.oem_id=300 then '5:CPX'
when summary_name = "RTG" then "1:RTG"
when summary_name = "DyC" then "2:DyC"
when summary_name = "類似" then "3:類似拡張"
when summary_name in ("ATA","カスタム(ブロード)","動画","カスタムATA") then "4:オーディエンス(動画含)"
else "9:その他" end as category,
sangyo_name,
sangyo_concrete_name,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(
select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
    a.target_id,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res a
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and a.advertiser_id in ({pc_display_advertiser_id})
group by 1,2,3,4,5,6

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
ads.target_id,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({pc_display_advertiser_id})
group by 1,2,3,4,5,6) a
left join console.ssp s on s.id=a.child_ssp_id
inner join dm.product_target_master c on c.target_id=a.target_id
inner join dm.hierarchies hie on a.target_id = hie.target_id
left join dm.sangyo_id san on hie.product_ctgr_id = san.product_ctgr_id
group by  1,2,3,4,5,6,7,8,9,10,11
"""
)
with ImpalaResource(**ircfg) as ir:
    data_4_pc_display = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [133]:
# pc_native
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
advertiser_id,
advertiser_name,
case
when hie.private_brand_id = 107 then "7:TVBridge"
when hie.ad_type =5 then '6:DOOH'
when hie.oem_id=300 then '5:CPX'
when summary_name = "RTG" then "1:RTG"
when summary_name = "DyC" then "2:DyC"
when summary_name = "類似" then "3:類似拡張"
when summary_name in ("ATA","カスタム(ブロード)","動画","カスタムATA") then "4:オーディエンス(動画含)"
else "9:その他" end as category,
sangyo_name,
sangyo_concrete_name,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(
select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
    ads.target_id,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({pc_native_advertiser_id})
group by 1,2,3,4,5,6

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
ads.target_id,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family not in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({pc_native_advertiser_id})
group by 1,2,3,4,5,6) a
left join console.ssp s on s.id=a.child_ssp_id
inner join dm.product_target_master c on c.target_id=a.target_id
inner join dm.hierarchies hie on a.target_id = hie.target_id
left join dm.sangyo_id san on hie.product_ctgr_id = san.product_ctgr_id
group by  1,2,3,4,5,6,7,8,9,10,11
"""
)
with ImpalaResource(**ircfg) as ir:
    data_4_pc_native = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [134]:
# spweb_display
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
advertiser_id,
advertiser_name,
case
when hie.private_brand_id = 107 then "7:TVBridge"
when hie.ad_type =5 then '6:DOOH'
when hie.oem_id=300 then '5:CPX'
when summary_name = "RTG" then "1:RTG"
when summary_name = "DyC" then "2:DyC"
when summary_name = "類似" then "3:類似拡張"
when summary_name in ("ATA","カスタム(ブロード)","動画","カスタムATA") then "4:オーディエンス(動画含)"
else "9:その他" end as category,
sangyo_name,
sangyo_concrete_name,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(
select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
    ads.target_id,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spweb_display_advertiser_id})
group by 1,2,3,4,5,6

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
ads.target_id,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spweb_display_advertiser_id})
group by 1,2,3,4,5,6) a
left join console.ssp s on s.id=a.child_ssp_id
inner join dm.product_target_master c on c.target_id=a.target_id
inner join dm.hierarchies hie on a.target_id = hie.target_id
left join dm.sangyo_id san on hie.product_ctgr_id = san.product_ctgr_id
group by  1,2,3,4,5,6,7,8,9,10,11
"""
)
with ImpalaResource(**ircfg) as ir:
    data_4_spweb_display = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [135]:
# spweb_native
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
advertiser_id,
advertiser_name,
case
when hie.private_brand_id = 107 then "7:TVBridge"
when hie.ad_type =5 then '6:DOOH'
when hie.oem_id=300 then '5:CPX'
when summary_name = "RTG" then "1:RTG"
when summary_name = "DyC" then "2:DyC"
when summary_name = "類似" then "3:類似拡張"
when summary_name in ("ATA","カスタム(ブロード)","動画","カスタムATA") then "4:オーディエンス(動画含)"
else "9:その他" end as category,
sangyo_name,
sangyo_concrete_name,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(
select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
    ads.target_id,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({spweb_native_advertiser_id})
group by 1,2,3,4,5,6

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
ads.target_id,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=false
and creative_type in(5,6)
and ads.advertiser_id in ({spweb_native_advertiser_id})
group by 1,2,3,4,5,6) a
left join console.ssp s on s.id=a.child_ssp_id
inner join dm.product_target_master c on c.target_id=a.target_id
inner join dm.hierarchies hie on a.target_id = hie.target_id
left join dm.sangyo_id san on hie.product_ctgr_id = san.product_ctgr_id
group by  1,2,3,4,5,6,7,8,9,10,11
"""
)
with ImpalaResource(**ircfg) as ir:
    data_4_spweb_native = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [136]:
# spapp_display
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
advertiser_id,
advertiser_name,
case
when hie.private_brand_id = 107 then "7:TVBridge"
when hie.ad_type =5 then '6:DOOH'
when hie.oem_id=300 then '5:CPX'
when summary_name = "RTG" then "1:RTG"
when summary_name = "DyC" then "2:DyC"
when summary_name = "類似" then "3:類似拡張"
when summary_name in ("ATA","カスタム(ブロード)","動画","カスタムATA") then "4:オーディエンス(動画含)"
else "9:その他" end as category,
sangyo_name,
sangyo_concrete_name,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(
select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
    ads.target_id,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spapp_display_advertiser_id})
group by 1,2,3,4,5,6

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
ads.target_id,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type not in(5,6,10)
and ads.advertiser_id in ({spapp_display_advertiser_id})
group by 1,2,3,4,5,6) a
left join console.ssp s on s.id=a.child_ssp_id
inner join dm.product_target_master c on c.target_id=a.target_id
inner join dm.hierarchies hie on a.target_id = hie.target_id
left join dm.sangyo_id san on hie.product_ctgr_id = san.product_ctgr_id
group by  1,2,3,4,5,6,7,8,9,10,11
"""
)
with ImpalaResource(**ircfg) as ir:
    data_4_spapp_display = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [137]:
# spapp_native
query = (
f"""
select 
ym,
    os,
    is_app,
    creative,
	child_ssp_id,
	s.name ssp,
advertiser_id,
advertiser_name,
case
when hie.private_brand_id = 107 then "7:TVBridge"
when hie.ad_type =5 then '6:DOOH'
when hie.oem_id=300 then '5:CPX'
when summary_name = "RTG" then "1:RTG"
when summary_name = "DyC" then "2:DyC"
when summary_name = "類似" then "3:類似拡張"
when summary_name in ("ATA","カスタム(ブロード)","動画","カスタムATA") then "4:オーディエンス(動画含)"
else "9:その他" end as category,
sangyo_name,
sangyo_concrete_name,
	sum(res) res,
	sum(bidden)bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win) original_win,
	sum(net_sales) net_sales,
	sum(gross_sales) gross_sales
from
(
select
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when native_request=true then 'native' when video_request=true then 'video' else 'display' end creative,
	child_ssp_id,
    ads.target_id,
	sum(res) res,
	sum(case when bidden=true then res else 0 end) bidden,
	0 imp,
	0 click,
	0 ctv1,
	0 ctv2,
	0 original_win,
    0 net_sales,
    0 gross_sales
from dm.domain_res ads
inner join dm.hierarchies hie using(target_id)
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type in(5,6)
and ads.advertiser_id in ({spapp_native_advertiser_id})
group by 1,2,3,4,5,6

union all

select 
concat_ws('-', year, month) ym,
    case when user_agent_os_family in('iOS','Android') then 'SP' else 'PC' end os,
    case when is_app is null then 0 else is_app end is_app,
    case when creative_type in(5,6) then 'native' when creative_type in(10) then 'video' else 'display' end creative,
	child_ssp_id,
ads.target_id,
	0 res,
	0 bidden,
	sum(imp) imp,
	sum(click)click,
	sum(ctv1) ctv1,
	sum(ctv2)ctv2,
	sum(original_win_price)/1000000 original_win,
    sum(net_spend)/1000000 net_sales,
    sum(gross_spend)/1000000 gross_sales
from dm.domain_ads ads
inner join dm.hierarchies hie on ads.target_id = hie.target_id
inner join agency_console.campaign cp on ads.strategy_id = cp.real_strategy_id
inner join agency_console.campaign_agency_margin cam on cp.campaign_id = cam.campaign_id and concat_ws('-', year, month, day) between cam.start_date and cam.end_date
inner join agency_console.campaign_smn_margin csm on cp.campaign_id = csm.campaign_id and concat_ws('-', year, month, day) between csm.start_date and csm.end_date
where concat_ws('-', year, month, day) between '{start_at}' and '{end_at}'
and user_agent_os_family in('iOS','Android')
and is_app=true
and creative_type in(5,6)
and ads.advertiser_id in ({spapp_native_advertiser_id})
group by 1,2,3,4,5,6) a
left join console.ssp s on s.id=a.child_ssp_id
inner join dm.product_target_master c on c.target_id=a.target_id
inner join dm.hierarchies hie on a.target_id = hie.target_id
left join dm.sangyo_id san on hie.product_ctgr_id = san.product_ctgr_id
group by  1,2,3,4,5,6,7,8,9,10,11
"""
)
with ImpalaResource(**ircfg) as ir:
    data_4_spapp_native = ir.sql_to_pandas(query)

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

In [138]:
data_a_4=pd.concat([data_4_pc_display,data_4_pc_native,data_4_spweb_display,data_4_spweb_native,data_4_spapp_display,data_4_spapp_native])

In [139]:
# path定義
parent_path = pathlib.Path("__file__").resolve().parent
file_path = parent_path / "data"/ f"SSP_limited_adv__{start_at}_{end_at}_a_4_adv.tsv"

data_a_4.to_csv(file_path, index=False, sep="\t")

## A5.作成した中間テーブルの削除

In [140]:
query = (
f"""
drop table {created_table_name}
"""
)
with ImpalaResource(**ircfg) as ir:
    table = ir.sql_to_pandas(query)

table

INFO:ailab_tools.smn.impala_client:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_client:hosts : ['172.16.60.117']
INFO:ailab_tools.smn.impala_client:impala_host : 172.16.60.117
INFO:ailab_tools.smn.impala_client:impala_port : 21050
INFO:ailab_tools.smn.impala_client:impala_user : vmspool
INFO:ailab_tools.smn.impala_client:impala_password : 
INFO:ailab_tools.smn.impala_client:request_pool : adhoc_dm01_pool
INFO:ailab_tools.smn.impala_client:use_ssl : False
INFO:ailab_tools.smn.impala_client:configuration : {'REQUEST_POOL': 'adhoc_dm01_pool'}
INFO:ailab_tools.smn.impala_resource:loaded configuration file: /data1/anaconda3/lib/python3.8/site-packages/ailab_tools/config/ailab_tools_config.py
INFO:ailab_tools.smn.impala_resource:httpfs_host: 172.16.60.42
INFO:ailab_tools.smn.impala_resource:httpfs_port: 14000
INFO:impyla_service:Impala open connection OK. hostname = [172.16.60.117]
INFO:impyla_ser

Unnamed: 0,summary
0,Table has been dropped.
