## Decision Analytics Final Project
### Question: What is the best combination of tourist attractions in NYC that a travel agency can put together in order to provide the best tour experience? 
#### Team Members: Megan, Jhanvi, Hannah, Kaia

## Preprocessing

In [None]:
# impor
import numpy as np
from gurobipy import *
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
survey_table = pd.read_csv("survey_final_table.csv")
nyc = pd.read_csv("nyc_attractions.csv")

In [3]:
# Load the survey results table
survey_table.head()

Unnamed: 0.1,Unnamed: 0,Group,Proba,Nature,Museum,Family,Activity,Walking,Theatre,Landmarks
0,0,3.0,0.25,3.8,2.0,3.4,4.4,3.0,2.4,2.8
1,1,2.0,0.2,1.0,3.75,3.0,1.25,2.25,3.5,3.0
2,2,1.0,0.3,4.333333,3.666667,3.166667,1.833333,4.166667,3.333333,1.333333
3,3,0.0,0.25,5.0,3.2,1.8,4.2,2.6,3.2,1.4


In [76]:
# Load the NYC tourist attraction category table
nyc.head()

Unnamed: 0,AttractionID,AttractionName,CostPerVisit,Nature,Museum,Family,Active,Walking,Theatre,Landmark
0,1,Central Park,0.0,3,2,3,3,3,1,3
1,2,The National 9/11 Memorial & Museum,33.0,1,3,2,1,2,1,2
2,3,The Metropolitan Museum of Art,30.0,1,3,2,1,2,1,2
3,4,Empire State Building,79.0,1,1,3,1,1,1,3
4,5,The High Line,0.0,3,1,3,3,3,1,3


In [82]:
# 
nyc_short = nyc.drop(labels=['AttractionID', 'AttractionName', 'CostPerVisit'],axis=1)
nyc_short.head()

Unnamed: 0,Nature,Museum,Family,Active,Walking,Theatre,Landmark
0,3,2,3,3,3,1,3
1,1,3,2,1,2,1,2
2,1,3,2,1,2,1,2
3,1,1,3,1,1,1,3
4,3,1,3,3,3,1,3


In [85]:
nyc_short = nyc_short.rename(columns={
    "Landmark": "Landmarks",
    "Active":"Activity"
})
nyc_short.head()

Unnamed: 0,Nature,Museum,Family,Activity,Walking,Theatre,Landmarks
0,3,2,3,3,3,1,3
1,1,3,2,1,2,1,2
2,1,3,2,1,2,1,2
3,1,1,3,1,1,1,3
4,3,1,3,3,3,1,3


In [78]:
# Clean up survey table
survey_table_short = survey_table.drop(labels=['Unnamed: 0', 'Group', 'Proba'], axis=1)
survey_table_short

Unnamed: 0,Nature,Museum,Family,Activity,Walking,Theatre,Landmarks
0,3.8,2.0,3.4,4.4,3.0,2.4,2.8
1,1.0,3.75,3.0,1.25,2.25,3.5,3.0
2,4.333333,3.666667,3.166667,1.833333,4.166667,3.333333,1.333333
3,5.0,3.2,1.8,4.2,2.6,3.2,1.4


In [79]:
attractions1 = nyc.to_dict()

#### Calculate the dot product of survey table responses and attraction relevance score so that we can get the happiness score of each attraction for each group

In [86]:
attraction_user_product = np.dot(nyc_short, survey_table_short.T)
attraction_user_product

array([[58.6       , 42.5       , 55.16666667, 54.6       ],
       [35.        , 33.5       , 37.83333333, 33.6       ],
       [35.        , 33.5       , 37.83333333, 33.6       ],
       [34.2       , 29.75      , 30.83333333, 27.8       ],
       [56.6       , 38.75      , 51.5       , 51.4       ],
       [34.2       , 29.75      , 30.83333333, 27.8       ],
       [38.        , 30.75      , 35.16666667, 32.8       ],
       [36.2       , 32.5       , 35.16666667, 31.8       ],
       [30.        , 27.75      , 31.66666667, 29.6       ],
       [30.8       , 26.75      , 27.66666667, 26.        ],
       [41.        , 33.        , 39.33333333, 35.4       ],
       [42.2       , 37.5       , 45.33333333, 40.4       ],
       [38.2       , 30.        , 38.        , 34.        ],
       [34.2       , 29.75      , 30.83333333, 27.8       ],
       [36.8       , 31.25      , 36.        , 31.2       ],
       [38.2       , 36.25      , 38.83333333, 35.        ],
       [37.2       , 32.

The above matrix demonstrate the composite "attraction score" of each tourist site for each tourist group archetype. For example, the first number 52 is the attraction score of central park for Group 0 (Adventurers). It is calculated by SUM(category i preferences score * this site's relevance to category i). 

## Optimization task
We tried two coding variations for our formulation of the problem:
* **Variation #1**: Hard-code At
This variation is closest to our formula where the agency will commit to a maximum total number of attractions then run the model for the best attractions to choose.

* **Variation #2**: Let Gurobi chooses an At
This variation is inspired by the L9-Aircraft.ipynb notebook. We add At as a decision variable and see if gurobi can choose the best At.

In [100]:
# Set up the Parameters
budget = 100  # Total budget constraint
demographic_weights = list(survey_table['Proba'])

demographic_weights

[0.25, 0.2, 0.3, 0.25]

In [88]:
nyc.head()

Unnamed: 0,AttractionID,AttractionName,CostPerVisit,Nature,Museum,Family,Active,Walking,Theatre,Landmark
0,1,Central Park,0.0,3,2,3,3,3,1,3
1,2,The National 9/11 Memorial & Museum,33.0,1,3,2,1,2,1,2
2,3,The Metropolitan Museum of Art,30.0,1,3,2,1,2,1,2
3,4,Empire State Building,79.0,1,1,3,1,1,1,3
4,5,The High Line,0.0,3,1,3,3,3,1,3


In [89]:
# function to filter the nyc attraction table by selected attraction
def filtered_table(attractionList):
    table = nyc[nyc['AttractionID'].isin(attractionList)]
    return table

### Variation #1

In [102]:
# Initialize Model
model = Model("NYC_Tour_Optimization")

In [103]:
## Decision variables
# Stage 1 decision varable: A (decide on a max number of attractions - in this case, 10)
At = 10
# Stage 2 decision variable: a (decide on which attraction to go to)
a = model.addVars(24, vtype=GRB.BINARY, name="a") 

In [104]:
# Objective Function
model.setObjective(quicksum(demographic_weights[j] * a[i] * attraction_user_product[i][j] for i in range(24) for j in range(3)), GRB.MAXIMIZE)

### Constraints ###

# Budget Constraint
model.addConstr(
    quicksum(a[i] * nyc["CostPerVisit"][i] for i in range(24)) <= budget, "Budget"
)

# Total no.of attractions Constraint 
model.addConstr(
    quicksum(a[i] for i in range(24)) <= At, "Duration"
)

# Optimize
model.optimize()

# Print Results
if model.status == GRB.OPTIMAL:
    selected_attractions = [i + 1 for i in range(24) if a[i].X > 0.5]
    print("Optimal Attractions:", selected_attractions)
    print("Maximum Happiness:", model.objVal)
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.0 build v12.0.0rc1 (win64 - Windows 11.0 (22631.2))

CPU model: 12th Gen Intel(R) Core(TM) i7-1250U, instruction set [SSE2|AVX|AVX2]
Thread count: 10 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 2 rows, 24 columns and 34 nonzeros
Model fingerprint: 0xe2907114
Variable types: 0 continuous, 24 integer (24 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+02]
  Objective range  [2e+01, 4e+01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+01, 1e+02]
Found heuristic solution: objective 291.5500000
Presolve removed 0 rows and 9 columns
Presolve time: 0.00s
Presolved: 2 rows, 15 columns, 22 nonzeros
Found heuristic solution: objective 217.9000000
Variable types: 0 continuous, 15 integer (14 binary)
Found heuristic solution: objective 307.5000000

Root relaxation: objective 3.219500e+02, 3 iterations, 0.00 seconds (0.00 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Wo

In [105]:
# prints the results in ipython
variation1 = filtered_table(selected_attractions)

variation1

Unnamed: 0,AttractionID,AttractionName,CostPerVisit,Nature,Museum,Family,Active,Walking,Theatre,Landmark
0,1,Central Park,0.0,3,2,3,3,3,1,3
4,5,The High Line,0.0,3,1,3,3,3,1,3
10,11,Brooklyn Bridge,0.0,2,1,3,1,2,1,3
11,12,American Museum of Natural History,30.0,2,3,3,1,2,1,2
15,16,NY Public Library,0.0,1,2,2,1,2,2,3
17,18,MoMA,30.0,1,3,3,1,2,2,2
19,20,Bryant Park,0.0,2,1,3,3,3,1,3
21,22,Shopping on Fifth Avenue,0.0,1,1,3,1,3,1,3
22,23,Shopping at SoHo,0.0,1,1,3,1,3,1,3
23,24,Roosevelt Island,0.0,2,1,3,3,2,1,3


### Write some conclusions about the places chosen: cost per visit and distribution of attributes. It also did not pick any locations with cost > 30

### Variation 2

In [106]:
# Initialize Model
model = Model("NYC_Tour_Optimization")

In [107]:
## Decision variables
# Stage 1 decision varable: A (decide on a max number of attractions - in this case, 5)
At = model.addVar(vtype=GRB.INTEGER, name="At")
# Stage 2 decision variable: a (decide on which attraction to go to)
a = model.addVars(24, vtype=GRB.BINARY, name="a") 

In [108]:
# Objective Function
model.setObjective(quicksum(demographic_weights[j] * a[i] * attraction_user_product[i][j] for i in range(24) for j in range(3)), GRB.MAXIMIZE)

### Constraints ###

# Budget Constraint
model.addConstr(
    quicksum(a[i] * nyc["CostPerVisit"][i] for i in range(24)) <= budget, "Budget"
)

# Total no.of attractions Constraint 
model.addConstr(
    quicksum(a[i] for i in range(24)) <= At, "Duration"
)

# Optimize
model.optimize()

# Print Results
if model.status == GRB.OPTIMAL:
    selected_attractions = [i + 1 for i in range(24) if a[i].X > 0.5]
    print("Optimal Attractions:", selected_attractions)
    print("Maximum Happiness:", model.objVal)
else:
    print("No optimal solution found.")


Gurobi Optimizer version 12.0.0 build v12.0.0rc1 (win64 - Windows 11.0 (22631.2))

CPU model: 12th Gen Intel(R) Core(TM) i7-1250U, instruction set [SSE2|AVX|AVX2]
Thread count: 10 physical cores, 12 logical processors, using up to 12 threads

Optimize a model with 2 rows, 25 columns and 35 nonzeros
Model fingerprint: 0x932ffb8b
Variable types: 0 continuous, 25 integer (24 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+02]
  Objective range  [2e+01, 4e+01]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+02, 1e+02]
Found heuristic solution: objective -0.0000000
Found heuristic solution: objective 497.1000000
Presolve removed 2 rows and 25 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.02 seconds (0.00 work units)
Thread count was 1 (of 12 available processors)

Solution count 3: 502.5 497.1 -0 

Optimal solution found (tolerance 1.00e-04)
Best objective 5.025000000000e+02, best bound 5.025000000000

In [109]:
variation2 = filtered_table(selected_attractions)

variation2

Unnamed: 0,AttractionID,AttractionName,CostPerVisit,Nature,Museum,Family,Active,Walking,Theatre,Landmark
0,1,Central Park,0.0,3,2,3,3,3,1,3
4,5,The High Line,0.0,3,1,3,3,3,1,3
6,7,Statue of Liberty,0.0,2,1,3,1,1,1,3
7,8,Times Square,0.0,1,1,2,1,2,2,3
9,10,Grand Central Terminal,0.0,1,1,2,1,1,1,3
10,11,Brooklyn Bridge,0.0,2,1,3,1,2,1,3
11,12,American Museum of Natural History,30.0,2,3,3,1,2,1,2
12,13,Staten Island Ferry,0.0,2,1,3,1,2,1,2
14,15,Chelsea Market,0.0,1,1,2,1,3,1,3
15,16,NY Public Library,0.0,1,2,2,1,2,2,3


### Write something about the model just maximizes free locations ==> we feel like we need to limit At ==> we would need data on travel time and makes this more complicated