-
Notifications
You must be signed in to change notification settings - Fork 1
/
matchup_handler.py
138 lines (122 loc) · 4.82 KB
/
matchup_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from collections import deque
from datetime import datetime, timedelta
import json
import random
from collections import Counter
def main():
web = {}
with open("public/data/web.json", "r") as outfile:
web = json.load(outfile)
verify_matchups(web)
def verify_matchups(web):
with open("public/data/matchups.json", "r") as outfile:
matchups = json.load(outfile)
# padding for removed matchups that have already occurred
num_days = 78
seen = set()
for matchup in matchups:
num_days += 1
start, end = matchup
if start not in web or end not in web:
print("Not in web:", matchup)
elif not is_good_matchup(web, matchup):
print("Not good matchup:", matchup)
elif (start, end) in seen:
print("Duplicate matchup:", matchup)
seen.add((start, end))
start = datetime(2023, 11, 29)
start += timedelta(days=num_days-1)
print("Matchups completed until", start.strftime("%m/%d/%Y"))
def generate_matchups(m, amount, artists, with_replacement=False):
res = []
while len(res) < amount:
vals = random.choices(artists, k=2)
while not is_good_matchup(m, vals):
vals = random.choices(artists, k=2)
res.append((vals[0], vals[1]))
if not with_replacement:
artists.remove(vals[1])
return res
def is_good_matchup(m, matchup):
min_deg_of_separation, max_deg_of_separation = 3, 7
# Range of num paths for a good matchup at max_deg of sep > deg of sep > min deg of sep
min_allowed_num_paths, max_allowed_num_paths = 10, float('inf')
start, end = matchup
if start == end:
return False
# Ensure that destination isn't <= min degress of separation
if len(get_valid_paths(m, start, end, min_deg_of_separation)) != 0:
return False
valid_paths = get_valid_paths(m, start, end, max_deg_of_separation)
# Must be within allowed range of paths at
if not (max_allowed_num_paths >= len(valid_paths) >= min_allowed_num_paths):
return False
# Don't want any single artist to be in every valid path
new_valid_paths = valid_paths
# If only 1 first related artist, then ignore the first artist
if len(m[matchup[0]]['related']) == 1:
new_valid_paths = [path[1:] for path in valid_paths]
all_artists_in_paths = set([x for path in new_valid_paths for x in path])
for artist in all_artists_in_paths:
if len(list(filter(lambda x: artist in x[:-1], new_valid_paths))) == len(new_valid_paths):
return False
# If only 2 first artists, then must be at least 2 paths for each
if (len(set([x[0] for x in valid_paths])) == 2):
for artist in set([x[0] for x in valid_paths]):
if len(list(filter(lambda x: x[0] == artist, valid_paths))) < 2:
return False
# Target artist has at 30% of artists that related in both directions
if len(list(filter(lambda x: end in m[x]['related'], m[end]['related'])))/len(m[end]['related']) <= 0.4:
return False
return True
def get_valid_paths(graph, start, end, max_steps=float('inf')):
# BFS path finding within <= max_steps deg of sep
visited = set()
queue = deque()
queue.append((start, 0, []))
paths = []
while queue:
node, steps, path = queue.popleft()
visited.add(node)
if node == end and steps <= max_steps:
paths.append(path)
if steps <= max_steps:
for neighbor in graph.get(node, []).get('related', []):
if neighbor not in visited:
queue.append((neighbor, steps+1, path + [neighbor]))
return paths
def get_connected_nodes(web, start):
visited = set()
result = []
def dfs(node):
if node not in visited:
visited.add(node)
if node != start:
result.append(node)
for neighbor in web.get(node, []).get('related', []):
dfs(neighbor)
dfs(start)
return result
def get_min_path(web, start, end):
visited = set()
queue = deque()
queue.append((start, 0, []))
while queue:
node, steps, path = queue.popleft()
if node == end:
return path
if node not in visited:
visited.add(node)
for neighbor in web.get(node, []).get('related', []):
queue.append((neighbor, steps+1, path + [neighbor]))
return None
def get_distribution_of_degrees_of_separation(web):
degrees_of_separation = {}
for artist in web:
end_artists = get_connected_nodes(web, artist)
end_artist = random.sample(end_artists, k=1)
min_path = len(get_min_path(web, artist, end_artist[0]))
degrees_of_separation[artist] = min_path
return Counter(sorted(degrees_of_separation.values(), reverse=True))
if __name__ == '__main__':
main()