# MODEL TRAINING

## Libraries, Constants, Functions

In [None]:
from libraries import *
from constants import *
from functions import *

gmaps = googlemaps.Client(key = GOOGLE_API_KEY)

## Calculating Cost Matrices

In [None]:
# Load aeds, cards and possible_locations

aeds = {}
cards = {}
possible_locations = {}

for city in cities:
    os.chdir(data_path + clean_path)
    aeds[city] = pd.read_csv(city + "_aeds.csv")
    cards[city] = pd.read_csv(city + "_cards_train.csv")

    os.chdir(data_path + possible_locations_path)
    possible_locations[city] = pd.read_csv(city + "_possible_locations.csv")

In [None]:
# Make 'mandatory' columns

predefined_lists = {}

for city in cities:
    aeds_df = aeds[city]
    possible_locations_df = possible_locations[city]
    
    aeds_df['public'] = aeds_df['public'].fillna(0)
    aeds_df['public'] = ~aeds_df['public'].astype(bool)
    aeds_df = aeds_df.rename(columns={'public': 'mandatory'})
    
    possible_locations_df['mandatory'] = 0
    possible_locations_df['mandatory'] = possible_locations_df['mandatory'].astype(bool)

    predefined_list_df = pd.concat([aeds_df['mandatory'], possible_locations_df['mandatory']], ignore_index = True)
    predefined_list_np = predefined_list_df.to_numpy().flatten()
    predefined_lists[city] = predefined_list_np

In [None]:
# Keep only coordinate columns

for city in cities:
    aeds[city] = aeds[city][['latitude', 'longitude']]
    possible_locations[city][['latitude', 'longitude']] = possible_locations[city]['geometry'].apply(
        lambda x: pd.Series(get_coordinates_from_geometry(x))
    )
    possible_locations[city] = possible_locations[city][['latitude', 'longitude']]
    
    cards[city] = cards[city][['latitude', 'longitude']]

In [None]:
# Calculate cost matrix

cost_matrices = {}

for city in cities:
    print("Current city: " + city)
    combined_locations = pd.concat([aeds[city], possible_locations[city]], ignore_index=True)
    
    # Euclidean distance matrix
    distance_matrix = cdist(cards[city], combined_locations, metric = 'euclidean')  # Transposed
    flag_matrix = np.zeros_like(distance_matrix)
    
    for row in range(distance_matrix.shape[0]):
        row_indices = np.argsort(distance_matrix[row, :])[:CLOSEST_AEDS]
        flag_matrix[row, row_indices] = 1
    
    # Column names
    cards_str = cards[city].apply(lambda x: f"{x['latitude']}, {x['longitude']}", axis = 1)
    flag_matrix = pd.DataFrame(flag_matrix, index = cards_str, columns = combined_locations.apply(tuple, axis = 1))
    flag_matrix.replace(0, INF_LENGTH, inplace=True)

    confirmation = input(f"This will initialize {len(flag_matrix) * CLOSEST_AEDS} API requests. Are you sure? (yes/no): ")
    if confirmation == "yes":            
        # Replace all marked cells with real calculated distances
        cost_matrices[city] = replace_1_with_distances(flag_matrix)        
    else:
        print("OK. Will not procced.\n")

## Max Coverage algorithm

In [None]:
os.chdir(data_path + optimal_indicators_path)

for city in cities:
    print("Optimizing " + city + "...\n")
    
    mclp = MCLP.from_cost_matrix(cost_matrix = cost_matrices[city].to_numpy(),
                                 predefined_facilities_arr = predefined_lists[city],
                                 weights = np.ones(cost_matrices[city].shape[0]),
                                 p_facilities = len(aeds[city]),
                                 service_radius = COVERAGE_RADIUS)

    mclp = mclp.solve(pulp.PULP_CBC_CMD(msg=False))

    facility_status = []
    for i, variable in enumerate(mclp.fac_vars):
        if variable.varValue == 1:
            status = True
        else:
            status = False
        facility_status.append([status])

    optimal_indicators = pd.DataFrame(facility_status, columns=['SelectionStatus'])
    optimal_indicators.to_csv(f'{city}_optimal_indicators.csv', index = False)