I want to efficiently go to a baseball game at every MLB stadium. The introduction of a multi-option schedule changes the problem from a plain traveling salesman problem to a kind of traveling salesman with time windows problem. Time is an important factor in this problem because the most efficient solution by distance may end up taking weeks due to the need to wait for a game to be played at a particular stadium.  Distance is also important because the most efficient in time might have you traveling back and forth across the country several times. This notebook sets up a number of parameters such as the start and end date of the tour, the teams to start or end with, assumptions about how many miles you are willing to travel each day, and how to weigh the total time vs distance traveled. 

The approach is to read in all the games matching the input parameters. A matrix is created that maps each game to the next reachable game at every other stadium. A minizinc model is then executed to find the optinmal path of games that each have a different team with minimal cost w.r.t. the configured distance and time coefficient. 

Configuration

In [4]:
import ipywidgets as widgets
import datetime
import json

with open('data/teams.json') as file:
  teams = json.load(file)

now = datetime.datetime.now()

w_start_date = widgets.DatePicker(
    description='Start Date',
    disabled=False,
    value=now
)
w_end_date = widgets.DatePicker(
    description='End Date',
    disabled=False,
    value=datetime.datetime(now.year,11,1)
)

team_option_list=[]
team_option_value=[]
for team in teams:
    if team['teamName'] != 'None':
        item = (team['teamName'],team['id'])
        team_option_list.append(item)
        team_option_value.append(team['id'])

w_select_teams = widgets.SelectMultiple(
    options=team_option_list,
    description='Teams',
    disabled=False,
    value=team_option_value
)

w_select_start_teams = widgets.SelectMultiple(
    options=team_option_list,
    description='Start teams',
    disabled=False,
    value=team_option_value
)

w_select_end_teams = widgets.SelectMultiple(
    options=team_option_list,
    description='End teams',
    disabled=False,
    value=team_option_value
)

def update_start_end_options(*args):
    options=[]
    values=[]
    selected_teams = set(w_select_teams.value)
    for team in teams:
        if team['id'] in selected_teams:
            item = (team['teamName'],team['id'])
            options.append(item)
            values.append(team['id'])
    w_select_start_teams.options = options
    w_select_start_teams.value = values
    w_select_end_teams.options = options
    w_select_end_teams.value = values
w_select_teams.observe(update_start_end_options, 'value')

w_distance_time_coefficient = widgets.IntSlider(
    value=25,
    min=0,
    max=100,
    step=1,
    disabled=False,
    description='Distance/Time coefficient',
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

w_speed = widgets.IntSlider(
    value=70,
    min=0,
    max=100,
    step=1,
    disabled=False,
    description='Miles per hour',
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

w_travel_hours_per_day = widgets.IntSlider(
    value=10,
    min=0,
    max=24,
    step=1,
    disabled=False,
    description='Maximum travel hours per day',
    continuous_update=True,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
    
display(w_start_date, w_end_date, w_select_teams, 
        w_select_start_teams, w_select_end_teams,
        w_distance_time_coefficient,  w_speed,  w_travel_hours_per_day)


DatePicker(value=datetime.datetime(2021, 3, 3, 12, 53, 7, 26387), description='Start Date')

DatePicker(value=datetime.datetime(2021, 11, 1, 0, 0), description='End Date')

SelectMultiple(description='Teams', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 1…

SelectMultiple(description='Start teams', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,…

SelectMultiple(description='End teams', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 1…

IntSlider(value=25, description='Distance/Time coefficient')

IntSlider(value=70, description='Miles per hour')

IntSlider(value=10, description='Maximum travel hours per day', max=24)

Append a fake team for the final game

In [5]:
start_date = w_start_date.value
end_date = w_end_date.value

distance_time_coefficient=w_distance_time_coefficient.value
speed=w_speed.value
max_travel_hours_per_day=w_travel_hours_per_day.value


teams_by_distance_matrix_id = {}
for team in teams:
    teams_by_distance_matrix_id[team['distanceMatrixId']]=team

start_teams=list(w_select_start_teams.value)
end_teams=list(w_select_end_teams.value)
teams_to_attend = list(w_select_teams.value)
teams_to_attend.append(31)


Load the distance matrix

In [6]:
import csv
import numpy as np

distance_matrix = np.array(np.zeros((32,32)), dtype=np.int64)
firstLine = True;

with open('data/distance matrix.csv') as file:
    reader = csv.reader(file)
    for row in reader:
        if not firstLine:
            from_team = int(row[0])
            to_team = int(row[1])
            distance = int(row[4])
            from_team_id = teams_by_distance_matrix_id[from_team]['id']
            to_team_id = teams_by_distance_matrix_id[to_team]['id']
            distance_matrix[from_team_id][to_team_id] = distance
        else:
            firstLine = False;

Load games from the statsapi.mlb.com web service.

In [7]:
import datetime
import json
import urllib.request

games = []
gameId = 0;

# append a fake first game so the id always matches the 
games.append({})
games[gameId]['date']=start_date
games[gameId]['id']=gameId
games[gameId]['teamId']=31
gameId=gameId+1

for team_id in teams_to_attend:
    response = urllib.request.urlopen('https://statsapi.mlb.com/api/v1/schedule?lang=en&sportId=1&hydrate=venue&season={}&startDate={}&endDate={}&teamId={}&gameType=R&scheduleTypes=games'
                              .format(start_date.year,start_date.strftime("%Y-%m-%d"),end_date.strftime("%Y-%m-%d"),teams[team_id]['mlbTeamId']))
    schedule = json.load(response)
    for current in schedule['dates']:
        home_team_id = current['games'][0]['teams']['home']['team']['id']
        if teams[team_id]['mlbTeamId'] == home_team_id:
            date = current['date'].split('-')
            date_value=datetime.datetime(int(date[0]),int(date[1]),int(date[2]))
            game = {}
            game['id'] = gameId
            game['teamId'] = team_id
            game['date']=date_value
            if date_value<=end_date and date_value>=start_date and team_id in teams_to_attend:
                games.append(game)
                gameId = gameId + 1
final_game=gameId
games.append({})
games[final_game]['date']=end_date
games[final_game]['id']=final_game
games[final_game]['teamId']=31

initialize the next_game, weights, and home_teams lookup tables

In [8]:
next_games = np.array(np.zeros((len(games),32)), dtype=np.int64)
weights = np.array(np.ones((len(games),32)), dtype=np.int64)
home_teams= np.array(np.zeros(len(games)), dtype=np.int64)
for game in games:
    home_teams[game['id']]=31
    for team in range(1,32):
        next_games[game['id']][team] = final_game


Calculate the next games graph and weights

In [9]:
import math

date_sorted_games = []
for game in games:
    date_sorted_games.append(games[game['id']])


def dateAccessor(value):
    return value['date']


def cost(from_game_id, to_game_id, distance_time_coefficient):
    coefficient = distance_time_coefficient / 100
    from_game=games[from_game_id]
    to_game=games[to_game_id]
    distance=distance_matrix[from_game['teamId']][to_game['teamId']]
    minutes=0
    if to_game['id']!=final_game:
        minutes = (to_game['date']-from_game['date']).total_seconds() / 60
    return coefficient * minutes + (1-coefficient) * distance

date_sorted_games.sort(key=dateAccessor)
for i in range(0,len(date_sorted_games)):
    game = date_sorted_games[i]
    from_team = game['teamId']
    for team in range(1,32):
        for j in range(i+1,len(date_sorted_games)):
            next_game = date_sorted_games[j]
            home_teams[game['id']]=game['teamId']
            to_team = next_game['teamId']
            if(to_team == team):
                distance = distance_matrix[from_team][to_team]
                days = math.ceil(distance / speed / max_travel_hours_per_day )
                mindate = game['date'] + datetime.timedelta(days=days)
                if mindate <= next_game['date']:
                    next_games[game['id']][to_team] = next_game['id']
                    weights[game['id']][to_team] = cost(game['id'],next_games[game['id']][to_team],distance_time_coefficient)
                    break
for i in range(0,len(next_games)):
    next_games[i][31]=final_game

NumPy ndarray is not json serailizable so need to cast it out to json, then parse it back in to force it.

In [10]:
def default_json(obj):
    if type(obj).__module__ == np.__name__:
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return obj.item()
    raise TypeError('Unknown type:', type(obj))

def jsonify(obj):
    serialized=json.dumps(obj,default=default_json)
    return json.loads(serialized)

def sum_cost(game_ids, distance_time_coefficient):
    value = 0
    for i in range(len(game_ids)-1):
        value = value + cost(game_ids[i],game_ids[i+1],distance_time_coefficient)
    return value

def print_games(game_ids):
    for i in range(len(game_ids)-1):
        print('{}: {} - {} ${} {} days {} miles'
              .format(games[game_ids[i]]['id'], 
                      games[game_ids[i]]['date'], 
                      teams[home_teams[game_ids[i]]]['teamName'], 
                      cost(game_ids[i],game_ids[i+1],distance_time_coefficient),
                      cost(game_ids[i],game_ids[i+1],100)/60/24,
                      cost(game_ids[i],game_ids[i+1],0)))
    total_seconds = (games[game_ids[len(game_ids)-2]]['date']-games[game_ids[0]]['date']).total_seconds()
    total_days = total_seconds / 60 / 60 / 24
    total_distance = sum_cost(game_ids,0)
    print('Total days:{} Total distance (miles):{}'
         .format(total_days, total_distance))
        
        

Write data file as json

In [11]:
import json

data={
    "GAMES":list(range(1,len(games))),
    "weights":jsonify(weights[1:,1:]),
    "next_games":jsonify(next_games[1:,1:]),
    "home_teams":jsonify(home_teams[1:]),
    "teams_to_attend":teams_to_attend,
    "start_teams":start_teams,
    "end_teams":end_teams
}
with open('data.json', 'w') as file:
    json.dump(data, file)

Run the model. This can take several minutes depending on the number of games, the number of start teams and the number of end teams. 

In [12]:
import minizinc as mzn

model = mzn.Model('./BaseballModel.mzn')
    
# Find the MiniZinc solver configuration for Gecode
gecode = mzn.Solver.lookup("gecode")
# Create an Instance of the baseball schedule solver model for Gecode
instance = mzn.Instance(gecode, model)
instance.add_file('data.json')

result = instance.solutions(intermediate_solutions=True)
final=None
async for solution in result:
    if solution.status.has_solution() and solution.status is not mzn.result.Status.OPTIMAL_SOLUTION:
        print_games(solution['path'])
        final=solution
        print('---------------------')
    elif solution.status.has_solution() and solution.status is mzn.result.Status.OPTIMAL_SOLUTION:
        print('optimal solution found')
        final=solution
        print('---------------------')        
print('---------------------')     
print(final.status)
print(final.statistics)


82: 2021-04-09 00:00:00 - Arizona Diamondbacks $2433.0 3.0 days 1804.0 miles
247: 2021-04-12 00:00:00 - Chicago White Sox $2180.25 4.0 days 987.0 miles
169: 2021-04-16 00:00:00 - Boston Red Sox $2820.0 7.0 days 400.0 miles
8: 2021-04-23 00:00:00 - Baltimore Orioles $0.0 0.0 days 0.0 miles
Total days:14.0 Total distance (miles):3191.0
---------------------
83: 2021-04-10 00:00:00 - Arizona Diamondbacks $2433.0 3.0 days 1804.0 miles
248: 2021-04-13 00:00:00 - Chicago White Sox $1820.25 3.0 days 987.0 miles
169: 2021-04-16 00:00:00 - Boston Red Sox $2820.0 7.0 days 400.0 miles
8: 2021-04-23 00:00:00 - Baltimore Orioles $0.0 0.0 days 0.0 miles
Total days:13.0 Total distance (miles):3191.0
---------------------
84: 2021-04-11 00:00:00 - Arizona Diamondbacks $2433.0 3.0 days 1804.0 miles
249: 2021-04-14 00:00:00 - Chicago White Sox $1460.25 2.0 days 987.0 miles
169: 2021-04-16 00:00:00 - Boston Red Sox $2820.0 7.0 days 400.0 miles
8: 2021-04-23 00:00:00 - Baltimore Orioles $0.0 0.0 days 0.0 