In [71]:
import utilities
import os
import json
import statistics
from prettytable import PrettyTable
from prettytable import ALL as ALL

In [4]:
DATA_DIR = '/Users/umariqbal/Documents/voice-assistant-central/data' 

CONTROL_PATH = os.path.join(DATA_DIR, 'control_persona/website_bids/')
TEST_PATH = os.path.join(DATA_DIR, 'healthfitness_persona/website_bids/')

In [57]:
PERSONA = 'Health-Fitness'

### Reading bids for personas

In [9]:
control_bid_files = utilities.get_files_in_a_directory(CONTROL_PATH)
test_bid_files = utilities.get_files_in_a_directory(TEST_PATH)

In [83]:
def get_websites_with_bids(bid_files):
    websites_with_bids = []
    for bid_file in bid_files:
        websites_with_bids.append(bid_file.rsplit('/', 1)[1].rsplit('_', 1)[1])

    return websites_with_bids


def return_bid_cpms(bid_files):
    bid_cpms = []
    for bid_file in bid_files:
        all_bids = utilities.read_file(bid_file)

        for bid in all_bids:
            bid_json = json.loads(bid)
            bid_cpms.append(bid_json['cpm'])
    
    return bid_cpms


def return_bid_cpms_for_websites(bid_files, websites):
    bid_cpms = []
    for bid_file in bid_files:

        bid_website = bid_file.rsplit('/', 1)[1].rsplit('_', 1)[1]
        if bid_website not in websites:
            continue
            
        all_bids = utilities.read_file(bid_file)

        for bid in all_bids:
            bid_json = json.loads(bid)
            bid_cpms.append(bid_json['cpm'])
    
    return bid_cpms


def return_median_cpms(bid_files):
    bid_cpms = []
    for bid_file in bid_files:
        all_bids = utilities.read_file(bid_file)
        
        current_bid_cpms = []
        for bid in all_bids:
            bid_json = json.loads(bid)
            current_bid_cpms.append(bid_json['cpm'])
            
        bid_cpms.append(statistics.median(current_bid_cpms))
    
    return bid_cpms


def return_median_cpms(bid_files, websites):
    bid_cpms = []
    for bid_file in bid_files:
        all_bids = utilities.read_file(bid_file)
        
        bid_website = bid_file.rsplit('/', 1)[1].rsplit('_', 1)[1]
        if bid_website not in websites:
            continue
            
        current_bid_cpms = []
        for bid in all_bids:
            bid_json = json.loads(bid)
            current_bid_cpms.append(bid_json['cpm'])
            
        bid_cpms.append(statistics.median(current_bid_cpms))
    
    return bid_cpms

In [80]:
def print_bid_statistics(control_cpms, test_cpms, persona, 
                         control_websites, test_websites, common_websites, count_limit = 20):
    control_median = statistics.median(control_cpms)
    test_median = statistics.median(test_cpms)
    
    control_mean = "{0:0.3f}".format(statistics.mean(control_cpms))
    test_mean = "{0:0.3f}".format(statistics.mean(test_cpms))
    
    control_min = min(control_cpms)
    control_max = "{0:0.3f}".format(max(control_cpms))
    
    test_min = min(test_cpms)
    test_max = "{0:0.3f}".format(max(test_cpms))
    
    control_total_bids = len(control_cpms)
    test_total_bids = len(test_cpms)
    
    control_total_websites = len(control_websites)
    test_total_websites = len(test_websites)
    
    common_websites_len = len(common_websites)
    
    table = PrettyTable(['Measure', 'Control', persona], hrules=ALL)
    
    table.add_row(['# of websites', control_total_websites, test_total_websites])
    table.add_row(['# of bids', control_total_bids, test_total_bids])
    table.add_row(['Common websites', common_websites_len, common_websites_len])
    table.add_row(['Normal/Forced', common_websites_len, common_websites_len])
    
    table.add_row(['Median', control_median, test_median])
    table.add_row(['Mean', control_mean, test_mean])
    table.add_row(['Min', control_min, test_min])
    table.add_row(['Max', control_max, test_max])
    
    print(table)

### Get websites with bids

In [None]:
control_websites = get_websites_with_bids(control_bid_files)
test_websites = get_websites_with_bids(test_bid_files)

common_websites = set(control_websites).intersection(set(test_websites))

### Consider all websites and all bids

In [55]:
control_cpms = return_bid_cpms(control_bid_files)
test_cpms = return_bid_cpms(test_bid_files)

In [74]:
print_bid_statistics(control_cpms, test_cpms, PERSONA, control_websites, test_websites, common_websites)

+-----------------+---------+----------------+
|     Measure     | Control | Health-Fitness |
+-----------------+---------+----------------+
|  # of websites  |    15   |       26       |
+-----------------+---------+----------------+
|    # of bids    |    29   |       59       |
+-----------------+---------+----------------+
| Common websites |    13   |       13       |
+-----------------+---------+----------------+
|      Median     |  0.102  |      0.08      |
+-----------------+---------+----------------+
|       Mean      |  0.284  |     0.270      |
+-----------------+---------+----------------+
|       Min       |    0    |       0        |
+-----------------+---------+----------------+
|       Max       |  1.337  |     4.560      |
+-----------------+---------+----------------+


### Consider bids on websites where bids returned for both personas

In [75]:
common_control_cpms = return_bid_cpms_for_websites(control_bid_files, common_websites)
common_test_cpms = return_bid_cpms_for_websites(test_bid_files, common_websites)

In [76]:
print_bid_statistics(common_control_cpms, common_test_cpms, PERSONA, control_websites, test_websites, common_websites)

+-----------------+---------+----------------+
|     Measure     | Control | Health-Fitness |
+-----------------+---------+----------------+
|  # of websites  |    15   |       26       |
+-----------------+---------+----------------+
|    # of bids    |    27   |       19       |
+-----------------+---------+----------------+
| Common websites |    13   |       13       |
+-----------------+---------+----------------+
|      Median     |  0.102  |     0.164      |
+-----------------+---------+----------------+
|       Mean      |  0.300  |     0.431      |
+-----------------+---------+----------------+
|       Min       |    0    |       0        |
+-----------------+---------+----------------+
|       Max       |  1.337  |     1.456      |
+-----------------+---------+----------------+


### Consider medain bids on websites 

In [81]:
median_control_cpms = return_median_cpms(control_bid_files)
median_test_cpms = return_median_cpms(test_bid_files)

In [82]:
print_bid_statistics(median_control_cpms, median_test_cpms, PERSONA, control_websites, test_websites, common_websites)

+-----------------+----------+----------------+
|     Measure     | Control  | Health-Fitness |
+-----------------+----------+----------------+
|  # of websites  |    15    |       26       |
+-----------------+----------+----------------+
|    # of bids    |    15    |       26       |
+-----------------+----------+----------------+
| Common websites |    13    |       13       |
+-----------------+----------+----------------+
|  Normal/Forced  |    13    |       13       |
+-----------------+----------+----------------+
|      Median     | 0.122709 |     0.081      |
+-----------------+----------+----------------+
|       Mean      |  0.324   |     0.402      |
+-----------------+----------+----------------+
|       Min       |   0.0    |       0        |
+-----------------+----------+----------------+
|       Max       |  1.337   |     4.560      |
+-----------------+----------+----------------+


### Consider medain bids on websites where bids returned for both personas

In [84]:
common_median_control_cpms = return_median_cpms(control_bid_files, common_websites)
common_median_test_cpms = return_median_cpms(test_bid_files, common_websites)

In [85]:
print_bid_statistics(common_median_control_cpms, common_median_test_cpms, PERSONA, 
                     control_websites, test_websites, common_websites)

+-----------------+---------+----------------+
|     Measure     | Control | Health-Fitness |
+-----------------+---------+----------------+
|  # of websites  |    15   |       26       |
+-----------------+---------+----------------+
|    # of bids    |    13   |       13       |
+-----------------+---------+----------------+
| Common websites |    13   |       13       |
+-----------------+---------+----------------+
|  Normal/Forced  |    13   |       13       |
+-----------------+---------+----------------+
|      Median     |  0.1527 |      0.1       |
+-----------------+---------+----------------+
|       Mean      |  0.364  |     0.385      |
+-----------------+---------+----------------+
|       Min       |   0.0   |       0        |
+-----------------+---------+----------------+
|       Max       |  1.337  |     1.456      |
+-----------------+---------+----------------+


In [78]:
common_websites

{'cnet.com',
 'cnn.com',
 'digg.com',
 'espn.com',
 'grid.id',
 'howstuffworks.com',
 'imgur.com',
 'newyorker.com',
 'nypost.com',
 'reverso.net',
 'speedtest.net',
 'theguardian.com',
 'timeanddate.com'}

In [None]:
for bid_file in test_bid_files:
    all_bids = utilities.read_file(bid_file)

    for bid in all_bids:
        bid_json = json.loads(bid)
        bid_cpms.append(bid_json['cpm'])