# 简易瀑布流模拟器

### 设置

In [11]:
import numpy as np
import random
import pandas

In [12]:
# Configurations: 返回match/no-match这个log信息的时长, ---在AdPlacement里更改
#                 每个广告类型的加载时长,  ---在AdID里更改
#                 每个预加载时机的等待时长, ---wait_time
#                 每个展示chance的预加载时机, ---preload
#                 展示chance ---chance

# type 可能的时长list，or range(start, stop, step), 需要到AdPlacement里更改
AdTypeList = {'tt_Native_Video': [0.001, 0.002, 0.003]}

preload = 2
chance = 500
wait_time = [0.008, 0.01]
Answer_duration = 0.0005

In [13]:
class ID:
    def __init__(self, ad_id, floor, match_rate, adtype):
        """
        一个广告位ID

        :param floor: 底价 或 裸跑的预估价格
        :param match_rate: 预测的 match rate
        :param adtype: ['tt_Native_Video', 'gdt_Native_Image'] 注意每个库的video和image的返回时长也是不一样的
        """
        self.ad_id = ad_id
        self.floor = floor
        self.match_rate = match_rate
        self.adtype = adtype
        self.data = {'Request': 0,
                     'Matched': 0,
                     'Impression': 0}

    def generate_duration(self):
        dur_list = AdTypeList.get(self.adtype)
        duration = random.choice(dur_list)
        return duration

    def initiate_request(self):
        """
        :return: boolean of matched or not (1 for matched, 0 for no match);
                 ad returning duration
        """
        self.data['Request'] += 1
        matched = np.random.choice([1, 0], 1, p=[self.match_rate, 1-self.match_rate])

        if matched:
            self.data['Matched'] += 1

        return matched, self.generate_duration()

    def end_request(self, showed):
        if showed:
            self.data['Impression'] += 1

In [14]:
def request_group(g):
    parallel = g.get('parallel_count')
    id_list = g.get('id_list')
    group_duration = 0

    if parallel == 1:
        for ad in id_list:
            group_duration += Answer_duration
            matched_boolean, dur = ad.initiate_request()
            if matched_boolean:
                return ad, group_duration + dur

    elif parallel == 2:
        for i, k in zip(id_list, id_list[1:]):
            group_duration += Answer_duration
            matched_boolean1, dur1 = i.initiate_request()
            matched_boolean2, dur2 = k.initiate_request()

            if matched_boolean1 and matched_boolean2:
                if dur1 < dur2:
                    return i, group_duration + dur1
                else:
                    return k, group_duration + dur2

            elif matched_boolean1:
                return i, group_duration + dur1
            elif matched_boolean2:
                return k, group_duration + dur2
    else:
        raise Exception('parallel_count_error')

    return None, group_duration

In [15]:
class Waterfall:
    group_list = []
    current_matched_ad = None

    def insert_group(self, parallel_count=1):
        group_id = len(self.group_list)
        group_cur = {'group_id': group_id, 'parallel_count': parallel_count, 'id_list': []}
        self.group_list.append(group_cur)

    def insert_id(self, group_id, ad_id, floor, match_rate, adtype):
        id_cur = ID(ad_id, floor, match_rate, adtype)
        self.group_list[group_id]['id_list'].extend([id_cur])

    def request_waterfall(self):
        duration = 0
        for g in self.group_list:

            matched_ad, dur = request_group(g)
            duration += dur
            if matched_ad is not None:
                self.current_matched_ad = matched_ad
                return matched_ad, duration

        return None, duration

    def return_impression(self, success):
        self.current_matched_ad.end_request(success)
        self.current_matched_ad = None

    def generate_structure(self):
        COL = ['group_ID', 'parallel_count', 'ad_ID', 'type', 'eCPM', 'o_match_rate']
        waterfall_structure = pandas.DataFrame(columns=COL)
        if self.group_list is None:
            return waterfall_structure

        for g in self.group_list:

            group_ID = g.get('group_id')
            parallel_count = g.get('parallel_count')
            ad_list = g.get('id_list')

            for ad in ad_list:
                if ad is None:
                    continue
                ad_id = ad.ad_id
                ad_type = ad.adtype
                eCPM = ad.floor
                o_match_rate = ad.match_rate

                waterfall_structure.loc[len(waterfall_structure)] = [group_ID, parallel_count, ad_id,
                                                                     ad_type, eCPM, o_match_rate]

        return waterfall_structure

    def generate_data(self):
        COL = ['group_ID', 'parallel_count', 'ad_ID', 'type', 'eCPM', 'o_match_rate', 'request', 'matched',
               'impression', 'match_rate', 'show_rate', 'fill_rate', 'revenue']
        waterfall_data = pandas.DataFrame(columns=COL)

        if self.group_list is None:
            return waterfall_data

        for g in self.group_list:

            group_ID = g.get('group_id')
            parallel_count = g.get('parallel_count')
            ad_list = g.get('id_list')

            for ad in ad_list:
                if ad is None:
                    continue
                ad_id = ad.ad_id
                ad_type = ad.adtype
                eCPM = ad.floor
                o_match_rate = ad.match_rate

                data = ad.data
                request = data.get('Request')
                matched = data.get('Matched')
                impression = data.get('Impression')

                if request > 0:
                    match_rate = matched/request
                    fill_rate = impression/request
                else:
                    match_rate = 0
                    fill_rate = 0

                if matched > 0:
                    show_rate = impression/matched
                else:
                    show_rate = 0

                revenue = (eCPM * impression)/1000

                waterfall_data.loc[len(waterfall_data)] = [group_ID, parallel_count, ad_id, ad_type, eCPM,
                                                           o_match_rate, request, matched, impression, match_rate,
                                                           show_rate, fill_rate, revenue]

        return waterfall_data

In [16]:
waterfall = Waterfall()

waterfall.insert_group(1)
waterfall.insert_group(1)
waterfall.insert_group(1)

waterfall.insert_id(0, 'a', 10, 0.4, 'tt_Native_Video')
waterfall.insert_id(1, 'b', 8, 0.3, 'tt_Native_Video')
waterfall.insert_id(1, 'c', 7, 0.2, 'tt_Native_Video')
waterfall.insert_id(2, 'a', 6, 0.5, 'tt_Native_Video')
waterfall.insert_id(2,  'a', 5, 0.9, 'tt_Native_Video')
waterfall.insert_id(2, 'a', 4, 0.9, 'tt_Native_Video')

In [17]:
final_structure = waterfall.generate_structure()
print(final_structure)

  group_ID parallel_count ad_ID             type eCPM  o_match_rate
0        0              1     a  tt_Native_Video   10           0.4
1        1              1     b  tt_Native_Video    8           0.3
2        1              1     c  tt_Native_Video    7           0.2
3        2              1     a  tt_Native_Video    6           0.5
4        2              1     a  tt_Native_Video    5           0.9
5        2              1     a  tt_Native_Video    4           0.9


In [18]:
for c in range(chance):
    for i in range(preload):
        mached_ad, dur = waterfall.request_waterfall()
        success = False
        if mached_ad is not None:
            if wait_time[i] > dur:
                success = True
            waterfall.return_impression(success)
            break

In [19]:
final_data = waterfall.generate_data()

print(final_data)

final_data.to_excel('final_data.xlsx')

  group_ID parallel_count ad_ID             type eCPM  o_match_rate request  \
0        0              1     a  tt_Native_Video   10           0.4     501   
1        1              1     b  tt_Native_Video    8           0.3     283   
2        1              1     c  tt_Native_Video    7           0.2     192   
3        2              1     a  tt_Native_Video    6           0.5     166   
4        2              1     a  tt_Native_Video    5           0.9      89   
5        2              1     a  tt_Native_Video    4           0.9       8   

  matched impression  match_rate  show_rate  fill_rate  revenue  
0     218        218    0.435130        1.0   0.435130    2.180  
1      91         91    0.321555        1.0   0.321555    0.728  
2      26         26    0.135417        1.0   0.135417    0.182  
3      77         77    0.463855        1.0   0.463855    0.462  
4      81         81    0.910112        1.0   0.910112    0.405  
5       7          7    0.875000        1.0   0.87