In [127]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from k_means_constrained import KMeansConstrained

df = pd.read_csv('stadiums.csv')

print(df.head())

def get_cartesian(lat, long):
    EARTH_RADIUS = 3958.8
    x = EARTH_RADIUS * np.cos(np.deg2rad(lat)) * np.cos(np.deg2rad(long))
    y = EARTH_RADIUS * np.cos(np.deg2rad(lat)) * np.sin(np.deg2rad(long))
    return x, y

def dist(lat1, long1, lat2, long2):
    
    lat1, lat2 = np.deg2rad(lat1), np.deg2rad(lat2)
    long1, long2 = np.deg2rad(long1), np.deg2rad(long2)
    EARTH_RADIUS = 3958.8

    return EARTH_RADIUS * np.arccos((np.sin(lat1) * np.sin(lat2)) + np.cos(lat1) * np.cos(lat2) * np.cos(long2 - long1))

                Team League Division        Lat        Long
0   Dallas Mavericks    NBA     West  32.790556  -96.810278
1      Orlando Magic    NBA     East  28.539167  -81.383611
2  San Antonio Spurs    NBA     West  29.426944  -98.437500
3     Denver Nuggets    NBA     West  39.748920 -105.008400
4      Brooklyn Nets    NBA     East  40.682661  -73.975225


In [141]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from k_means_constrained import KMeansConstrained #by Joshua Levy-Kramer

df = pd.read_csv('stadiums.csv')

df['X'], df['Y'] = get_cartesian(df['Lat'], df['Long'])
groups = df.groupby(['League'])

new_divisions = {}
for name, group in groups:

    #replace divisions with unique ints
    codes , uniques = pd.factorize(group['Division'])
    group['Division'] = codes 

    points = np.array(group[['X','Y']])

    #get number of teams in each division
    counts = pd.value_counts(group['Division']) 

    #define and use clustering algorithm
    clf = KMeansConstrained(n_clusters = len(counts), 
                            size_min = min(counts), 
                            size_max = max(counts), 
                            n_init = 100, 
                            random_state = 42)
    clf.fit_predict(points)

    #save the new divisions
    new_divisions[name] = clf.labels_

for name, group in groups:

    codes , uniques = pd.factorize(group['Division'])
    group['Division'] = codes 
    group['Realigned Division'] = new_divisions[name]

    #make plots for current divisions and realigned divisions
    for col in ['Realigned Division', 'Division']:
        fig = go.Figure(go.Scattergeo(
            lat = group['Lat'],
            lon = group['Long'],
            text = group['Team'],
            mode = 'markers',
            marker_color = group[col],
            marker_size = 10))

        fig.update_layout(title = name + ' ' + col + 's')
        fig.show()

for name, group in groups:

    #replace divisions with unique ints 
    codes , uniques = pd.factorize(group['Division'])
    group['Division'] = codes 

    #assign realigned divisions
    group['Realigned Division'] = new_divisions[name]

    distances, realigned_distances = {}, {}
    counts, realigned_counts = {}, {}

    #initialize dictionary entries
    for division in group['Division'].unique():
        distances[division] = 0
        counts[division] = 0
    
    for division in group['Realigned Division'].unique():
        realigned_distances[division] = 0
        realigned_counts[division] = 0

    for i in range(len(group)):
        for j in range(i + 1, len(group)):

            pair_dist = dist(group.iloc[i]['Lat'], 
                             group.iloc[i]['Long'], 
                             group.iloc[j]['Lat'], 
                             group.iloc[j]['Long'])
            
            #if in the same division add distance
            if group.iloc[i]['Division'] == group.iloc[j]['Division']:
                distances[group.iloc[i]['Division']] += pair_dist
                counts[group.iloc[i]['Division']] += 1

            #if in the same realigned division add distance  
            if group.iloc[i]['Realigned Division'] == group.iloc[j]['Realigned Division']:
                realigned_distances[group.iloc[i]['Realigned Division']] += pair_dist
                realigned_counts[group.iloc[i]['Realigned Division']] += 1
    
    #get average for each (realigned) division
    division_avg = {k: distances[k]/counts[k] for k in distances}
    realigned_division_avg = {k: realigned_distances[k]/realigned_counts[k] for k in realigned_distances}

    #average across (realigned) divisions
    final_avg = round(sum(division_avg.values()) / len(division_avg))
    realigned_final_avg = round(sum(realigned_division_avg.values()) / len(realigned_division_avg))

    print('Average ' + name + ' Interdivisional Distance: ' + str(final_avg))
    print('Average ' + name + ' Realigned Interdivisional Distance: ' + str(realigned_final_avg))

Average MLB Interdivisional Distance: 608
Average MLB Realigned Interdivisional Distance: 436
Average MLS Interdivisional Distance: 804
Average MLS Realigned Interdivisional Distance: 804
Average NBA Interdivisional Distance: 774
Average NBA Realigned Interdivisional Distance: 774
Average NFL Interdivisional Distance: 569
Average NFL Realigned Interdivisional Distance: 373
Average NHL Interdivisional Distance: 625
Average NHL Realigned Interdivisional Distance: 585
