In [1]:
import numpy 
import matplotlib 
from matplotlib import pyplot
import seaborn as sns 
import pandas
%matplotlib inline



In [2]:
ad_info = pandas.read_json("data/ad_calls.json")

In [3]:
ad_info.keys()

Index([u'ad_unit_id', u'cost_per_second', u'n_seconds', u'treatment_id'], dtype='object')

#### Casually glancing through the file I see that treatment 1 always costs 0.32 c/s and 0 costs 0.16 c/s. Reformat the table so I can get the average values for treatment 0 and 1 for each of the ad_unit_ids (pandas pivot table automatically aggregates the entries by the mean for the table value)

In [4]:
ad_mean_pivot = pandas.pivot_table(ad_info,values='n_seconds', index=['ad_unit_id'],columns=['treatment_id'])

In [5]:
test = ad_info['n_seconds'][(ad_info['ad_unit_id']==0)&(ad_info['treatment_id']==1)]
print numpy.mean(test)

7.63993288107


In [34]:
ad_mean_pivot.head(n=5)

treatment_id,0,1
ad_unit_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2.17342,7.639933
1,1.857541,3.647597
2,1.935385,3.941385
3,1.960112,8.335809
4,1.942295,3.814124


In [37]:
zs = ad_mean_pivot[0].copy()
zs.sort(ascending=False)
os = ad_mean_pivot[1].copy()
os.sort(ascending=False)

In [41]:
zs.head(n=200)

ad_unit_id
198           2.275911
6             2.256219
31            2.253210
137           2.211310
8             2.205088
19            2.194322
116           2.188543
23            2.187440
0             2.173420
50            2.172567
138           2.171278
196           2.162835
108           2.162740
179           2.154072
152           2.148492
...
136           1.946941
61            1.946785
39            1.945064
96            1.944691
93            1.943890
4             1.942295
147           1.939751
150           1.939092
193           1.938938
139           1.937789
190           1.937271
64            1.937037
219           1.936628
84            1.935875
2             1.935385
Name: 0, Length: 200, dtype: float64

In [42]:
os.head(n=200)

ad_unit_id
211           8.963853
60            8.866322
22            8.808542
201           8.784608
11            8.706966
168           8.684148
139           8.651411
240           8.595362
98            8.535774
52            8.495709
229           8.484473
148           8.450355
184           8.435209
192           8.427792
29            8.423214
...
232           3.997956
125           3.995327
187           3.993126
210           3.992027
132           3.990839
73            3.990039
8             3.989529
254           3.981833
58            3.977036
69            3.972680
10            3.968735
56            3.961485
158           3.958636
18            3.956286
84            3.955478
Name: 1, Length: 200, dtype: float64

#### By eye the greedy algorithm solution to this problem seems simple to solve by eye, because mode 1 pays more (0.32 compared to 0.16) and also makes users interact with the ads longer. Without doing any coding at all I expect the solution to be 1)For the first half of ads, sorted by the mode 1 durations, always choose mode 1 2)Once you have used up all your mode 1 trials for the day start choosing mode 0. 

### To solve the generic problem (where perhaps I do not know that the time AND price is always greater for mode 1 etc.), I will use a greedy algorithm strategy. In the first pass through the list I will greedily choose the option that optimizes the time until I run out of uses of that mode (each mode gets 256,000/2 uses only based on the constraints). On the second pass through the algorithm I will force the algorithm to switch the mode to the mode that makes the most money*time until the total price/duration > 0.2 (the second constraint) and the number of mode 1 uses equals the number of mode 0 uses. The final solution will give the optimal weights for mode 0 and 1 for all the ads.

In [43]:
weight_dict = {key: [0,0] for key in xrange(0,256)}

In [16]:
for ad in xrange(0,256):
    

{1: (), 2: (), 3: (), 4: (), 5: (), 6: (), 7: (), 8: (), 9: (), 10: (), 11: (), 12: (), 13: (), 14: (), 15: (), 16: (), 17: (), 18: (), 19: (), 20: (), 21: (), 22: (), 23: (), 24: (), 25: (), 26: (), 27: (), 28: (), 29: (), 30: (), 31: (), 32: (), 33: (), 34: (), 35: (), 36: (), 37: (), 38: (), 39: (), 40: (), 41: (), 42: (), 43: (), 44: (), 45: (), 46: (), 47: (), 48: (), 49: (), 50: (), 51: (), 52: (), 53: (), 54: (), 55: (), 56: (), 57: (), 58: (), 59: (), 60: (), 61: (), 62: (), 63: (), 64: (), 65: (), 66: (), 67: (), 68: (), 69: (), 70: (), 71: (), 72: (), 73: (), 74: (), 75: (), 76: (), 77: (), 78: (), 79: (), 80: (), 81: (), 82: (), 83: (), 84: (), 85: (), 86: (), 87: (), 88: (), 89: (), 90: (), 91: (), 92: (), 93: (), 94: (), 95: (), 96: (), 97: (), 98: (), 99: (), 100: (), 101: (), 102: (), 103: (), 104: (), 105: (), 106: (), 107: (), 108: (), 109: (), 110: (), 111: (), 112: (), 113: (), 114: (), 115: (), 116: (), 117: (), 118: (), 119: (), 120: (), 121: (), 122: (), 123: (), 