In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from pulp import *

In [2]:
# importing the data to take a look at what we have

df = pd.read_csv('../data/family_data.csv',index_col=0)

df.head()

Unnamed: 0_level_0,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
family_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,52,38,12,82,33,75,64,76,10,28,4
1,26,4,82,5,11,47,38,6,66,61,4
2,100,54,25,12,27,82,10,89,80,33,3
3,2,95,1,96,32,6,40,31,9,59,2
4,53,1,47,93,26,3,46,16,42,39,4


In [3]:
# I'm going to add a column which will represent the choice falling outside of any of the given choices. 
# This will be choice_10, and it will be 101,,,

In [4]:
df.describe()

Unnamed: 0,choice_0,choice_1,choice_2,choice_3,choice_4,choice_5,choice_6,choice_7,choice_8,choice_9,n_people
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,40.715,41.601,41.1942,41.595,41.9506,42.6352,41.965,42.7396,42.339,42.6082,4.2006
std,29.054208,29.518375,28.821878,29.094602,29.045049,28.98729,28.711089,28.959156,28.69832,28.116088,1.53272
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0
25%,16.0,16.0,17.0,17.0,17.0,18.0,17.0,18.0,17.0,19.0,3.0
50%,38.0,38.0,38.0,39.0,39.0,40.0,39.0,39.0,39.0,39.0,4.0
75%,62.0,66.0,63.0,66.0,66.0,66.0,66.0,67.0,66.0,66.0,5.0
max,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,8.0


The decision variable needs to be a boolean for each choice for each family. We will create a 

In [5]:
# The prob variable is created to contain the problem data        
prob = LpProblem("Santa",LpMinimize)

In [6]:
# creating the decision variables
choice = ['choice_0','choice_1', 'choice_2', 'choice_3', 'choice_4', 'choice_5', 
          'choice_6', 'choice_7', 'choice_8', 'choice_9','choice_10']
day = [i for i in range(1,101)]
fam_id = df.index.tolist()

# The decision variables are actually the family and the day they are assigned
selection = LpVariable.dicts("sel",(fam_id,day),0,1,LpInteger)

In [7]:
# adding in the constraints

# The first set of constraints ensures each family only has a single day selected
for f in fam_id:
        prob += lpSum([selection[f][d] for d in day]) == 1, ''

In [8]:
# the second set of constraints guarantee that the total number of visitors is between 125 and 300 for
# for every single day leading up to christmas

for d in day:
    prob += lpSum([selection[f][d]*df['n_people'][f] for f in fam_id]) >= 125, ''
    prob += lpSum([selection[f][d]*df['n_people'][f] for f in fam_id]) <= 300, ''

Now we need to write down the objective function. How do we figure out how to flag for choice? Can we create a vector of choice for each family member?

Can we do some sort of mod of the day in the choice dictionary over the selected day?

I think we need to place a vector of length 11 in every single day for every single family. The location that is 1 will be the choice for that day. If the day is not one of the choices, the 11th element will be 1. This will then be used to multiply by the matrix for penalties.

Need to do some sort of 

In [9]:
choice_matrix = {}

for f in fam_id:
    
    if f%100==0:
        print('{}/{}'.format(f,len(df)))
    
    choice_matrix[f] = {}
    
    for d in day:
        
        choice_to_check = df.loc[f,df.columns.tolist()[:10]].values
        
        if d in choice_to_check:
            c_loc = np.where(choice_to_check==d)[0][0]
        else:
            c_loc = 10
            
            
        choice_matrix[f][d] = {c:(0 if i!=c_loc else 1) for i,c in enumerate(choice)} 

0/5000
100/5000
200/5000
300/5000
400/5000
500/5000
600/5000
700/5000
800/5000
900/5000
1000/5000
1100/5000
1200/5000
1300/5000
1400/5000
1500/5000
1600/5000
1700/5000
1800/5000
1900/5000
2000/5000
2100/5000
2200/5000
2300/5000
2400/5000
2500/5000
2600/5000
2700/5000
2800/5000
2900/5000
3000/5000
3100/5000
3200/5000
3300/5000
3400/5000
3500/5000
3600/5000
3700/5000
3800/5000
3900/5000
4000/5000
4100/5000
4200/5000
4300/5000
4400/5000
4500/5000
4600/5000
4700/5000
4800/5000
4900/5000


In [10]:
choice_matrix[0][52]

{'choice_0': 1,
 'choice_1': 0,
 'choice_2': 0,
 'choice_3': 0,
 'choice_4': 0,
 'choice_5': 0,
 'choice_6': 0,
 'choice_7': 0,
 'choice_8': 0,
 'choice_9': 0,
 'choice_10': 0}

In [11]:
# gift card contribution by choice
choice_gc = {}
choice_gc['choice_0'] = 0
choice_gc['choice_1'] = 50
choice_gc['choice_2'] = 50
choice_gc['choice_3'] = 100
choice_gc['choice_4'] = 200
choice_gc['choice_5'] = 200
choice_gc['choice_6'] = 300
choice_gc['choice_7'] = 300
choice_gc['choice_8'] = 400
choice_gc['choice_9'] = 500
choice_gc['choice_10'] = 500

# per member monetary contribution
choice_pm = {}
choice_pm['choice_0'] = 0
choice_pm['choice_1'] = 0
choice_pm['choice_2'] = 9
choice_pm['choice_3'] = 9
choice_pm['choice_4'] = 9
choice_pm['choice_5'] = 18
choice_pm['choice_6'] = 18
choice_pm['choice_7'] = 36
choice_pm['choice_8'] = 36
choice_pm['choice_9'] = 36+199
choice_pm['choice_10'] = 36+398


In [12]:

prob += lpSum([selection[f][d]*choice_matrix[f][d][c]*(choice_gc[c]+df['n_people'][f]*choice_pm[c])
               for c in choice for d in day for f in fam_id]), 'Objective Function'

In [None]:
prob.solve()

In [None]:
# The status of the solution is printed to the screen
print("Status:", LpStatus[prob.status])

In [None]:
obj = value(prob.objective)
print(obj)

In [None]:
t = df.loc[0,df.columns.tolist()[:10]].values

In [None]:
val = 28

if val in t:
    loc = np.where(t==val)[0][0]
else:
    loc = 10
    
print(loc)

In [None]:
501%50

In [None]:
x = [0,1,2]
y = [3,4,5]
z = [6,7,8]

In [None]:
l = [a*b*c for a in x for b in y for c in z]

In [None]:
len(l)