1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Sat Jun 22 01:06:38 2019
4
+
5
+ @author: Stefanos
6
+ """
7
+ import math
8
+ import pprint
9
+ import sys
10
+ import random
11
+
12
+ #function that gets as an argument the name of a file and
13
+ #creates a dictionary @dic that contains the names of the points as keys
14
+ #and the coordinates of the points as values of the keys
15
+ #coordinates will be tuples as (abscissa,ordinate)
16
+ #@filename must have the following format:
17
+ #s1 (5, 0)
18
+ #s2 (3, 7)
19
+ #s3 (10, 9)
20
+ #
21
+ #where s1, s2, s3 are the names of the points and the tuples are the coordinates
22
+ def open_textfile (filename ):
23
+ dic = {}
24
+ with open (filename ) as file_input :
25
+ for line in file_input :
26
+ g = line .split (' ' )
27
+ dic [g [0 ]]= (float (g [1 ][1 :- 1 ]),float (g [2 ][:- 2 ]))
28
+ return (dic )
29
+
30
+ #function that gets two points: @p1, @p2 that are tuples (x,y) where
31
+ #x is abscissa of the point and y is the ordinate
32
+ #returns the euclidean distance of those points.
33
+ #In detail, for the euclidean distance:
34
+ #firstly calculate the difference of the abscissa of the two points
35
+ #then the difference of the ordinate of the two points
36
+ #then calculate the square of each difference , add them together
37
+ #lastly take the square root of the sum
38
+ def euclidean_distance (p1 ,p2 ):
39
+ return math .sqrt ( pow (p1 [0 ]- p2 [0 ],2 ) + pow (p1 [1 ]- p2 [1 ],2 ))
40
+
41
+ #function that takes as argument two points @p1 @p2 that are tuples
42
+ #calculates and returns the midpoint of the line that starts from @p1 and
43
+ #ends in @p2
44
+ #abscissa of the midpoint is the sum of abscissas of the points divided by 2
45
+ #ordinate of the midpoint is the sum of ordinates of the points divided by 2
46
+ def midpoint_finder (p1 ,p2 ):
47
+ return ((p1 [0 ]+ p2 [0 ])/ 2 ,(p1 [1 ]+ p2 [1 ])/ 2 )
48
+
49
+ #function that gets as arguments the diameter and the center of a circle and
50
+ #calculates the 4 corners of the square of side @diameter circumscribing the
51
+ #circle . Returns a list containing 2 corners as tuples (x,y)
52
+ #(the left-low and the top-right corner)
53
+ def get_square_Q (diameter , center ):
54
+ right_high_corner = (center [0 ]+ diameter / 2 ,center [1 ]+ diameter / 2 )
55
+ #doing a diagramm it is easy to understand that: the
56
+ #abscissa of the right_high_corner is the same as the biggest abscissa of the
57
+ #points of the circle and the
58
+ #ordinate of the right_high_corner is the same as the biggest ordinate of the
59
+ #points of the circle
60
+ left_low_corner = (center [0 ]- diameter / 2 ,center [1 ]- diameter / 2 )
61
+ #accordingly we see for the left-low corner
62
+ return [left_low_corner , right_high_corner ]
63
+
64
+ #function that checks if a point (@point) is contained in a square (@square)
65
+ # @square is a list that contains two tuples, first tuple is the bottom left
66
+ #corner of the square and second tuple is the upper right corner of the square
67
+ #let's say @point is a tuple (x,y) and @square is [(x1,y1),(x2,y2)]
68
+ #then @point is inside the square only if:
69
+ # x>x1 and y>y1 and x<=x2 and y<=y2
70
+ #returns true is the point is contained in the square
71
+ #false otherwise
72
+ #I consider that if a point is right on the border it will be contained
73
+ #only if it is on the upper or right side of the square
74
+ #otherwise two points could end up in the two different cells, leading to
75
+ #incorrect results
76
+ def check_point_in_square (point , square ):
77
+ start_corner = square [0 ] #assigns bottom-left corner of square to @start_corner
78
+ end_corner = square [1 ] #assigns upper-right corner of square to @end_corner
79
+ if (point [0 ]> start_corner [0 ] and point [1 ] > start_corner [1 ] and point [0 ]<= end_corner [0 ] and point [1 ]<= end_corner [1 ]):
80
+ #checks the abscissa and ordinates of the points as explained in the description
81
+ return True
82
+ return False
83
+
84
+ #a function that gets as arguments a list of points (@lis) whom we want to find
85
+ #the minimum spanning tree, a dictionary @S that contains the names of the points
86
+ #as keys and the coordinates(tuple (abscissa,ordinate)) of the points as values
87
+ # This function finds and returns the minimum spanning tree of the points in
88
+ # @lis according to the Prim's algorithm for mst.
89
+ #It works by keeping a list of visited points and a dictionary that contains
90
+ #other dictionaries of the distances , and everytime we look for the minimum
91
+ #distance of a visited point to a not visited point
92
+ def prim_kmst (lis ,S ):
93
+ num_points = len (lis ) #number of points that the mst will have
94
+ distance = {}
95
+ for every_point in lis :
96
+ distance [every_point ]= {}
97
+ for second_point in lis :
98
+ if every_point != second_point :
99
+ distance [every_point ][second_point ]= euclidean_distance (S [every_point ],S [second_point ])
100
+ #dictionary that contains as keys all the points and for every key
101
+ #it has an inner dictionary that contains all the other points ( except for
102
+ #the point that is the outer key) and after contains the distance
103
+ # for example :
104
+ # {'s1': {'s2': 7.280109889280518,
105
+ # 's3': 10.295630140987,
106
+ # 's4': 5.0,
107
+ # 's5': 7.211102550927978} ....}
108
+ visited = [] #a list that contains the names of the points that have already been visited
109
+ solution = {} #dictionary that contains the solution as: keys are tuples of the
110
+ #points that are connected and value is the distance of those points (in the tuple)
111
+ # for example:
112
+ #{('s1', 's9'): 5.0, ('s1', 's7'): 6.0 ....}
113
+ len_mst = 0 #variable to know how many coonections we already have (number of edges)
114
+ node_now = lis [0 ] #assigns first point to visit the first item of the list
115
+ #it doesn't really matter which point we select to visit first
116
+ while len_mst != num_points - 1 :
117
+ #knowing that the kmst has one egge less than the nodes it connects
118
+ #we run the loop till len_mst=num_points-1
119
+ visited .append (node_now ) #node_now is considered visited
120
+ min_dis = sys .maxsize #assign min_dis with a very big number
121
+ for every_visited in visited :
122
+ #for every point we have already visited we look for the
123
+ #minimum distance from that point to any unvisited point
124
+ min_visited_distance = min (distance [every_visited ].values ())
125
+ #@min_visited_distance is the minimum distance of the specific @every_visited
126
+ #between all the other points
127
+ if min_visited_distance < min_dis :
128
+ point_to_visit = list (distance [every_visited ].keys ())[list (distance [every_visited ].values ()).index (min_visited_distance )]
129
+ #in order to find the point whom @every_visited is connected
130
+ #to with @min_visited_distance, we use this trick
131
+ #( We know the outer key of the dictionary @distance and the
132
+ #value (@min_visited_distance) but we are looking for the inner key)
133
+ node_now = every_visited
134
+ min_dis = min_visited_distance
135
+ for every_visited in visited :
136
+ #in order for all of this to work, we need to delete the points and distances
137
+ #that we can't use anymore because there is already a connection
138
+ #As a result, we delete the points and distances of all the visited
139
+ #points and the newpoint(@point_to_visit)
140
+ del distance [every_visited ][point_to_visit ]
141
+ del distance [point_to_visit ][every_visited ]
142
+ len_mst += 1 #number of edges is increased by one
143
+ solution [(node_now ,point_to_visit )]= min_dis
144
+ node_now = point_to_visit #node_now is the new point we visit
145
+ return solution
146
+
147
+ #MAIN
148
+ file = sys .argv [1 ]
149
+ k = int (sys .argv [2 ]) #minimum ammount of points that the tree is spanning
150
+ S = open_textfile (file )
151
+ #in order for the algorithm to work, k must have a square root that is an integer.
152
+ # In detail, for step (4) where we divide Q into k square cells,
153
+ # the square cells will have equal dimensions and area(emvadon)
154
+ #only if there are sqrt(k) rows and sqrt(k) columns inside the square Q .
155
+ # As a result sqrt(k) must be an integer otherwise the algorithm can't work
156
+ while k < len (S ) and not (math .sqrt (k )).is_integer ():
157
+ #The programm will return the mst for at least k points (sum of points is n)
158
+ #We continiously increase k till it reaches an integer that has
159
+ # a square root that is an integer or till k equals to n
160
+ k += 1
161
+ solution = {}#dictionary that contains solutions and
162
+ #is used to get every distinct pair of points
163
+ #When k is equal to n(sum of points), we only need to construct one minimum
164
+ #spanning tree for all the n points and the solution is the length of it
165
+ if k == len (S ):
166
+ solution = prim_kmst (list (S .keys ()),S )
167
+ len_tree = sum (solution .values ())
168
+ print ("For " ,k ," points the MST has length: " ,len_tree )
169
+ print ("The links that were used are: " )
170
+ pprint .pprint (solution )
171
+ else :
172
+ #k is less than n (sum of points) so we follow the steps from the paper
173
+ min_length = sys .maxsize
174
+ for p1 in S .keys ():
175
+ for p2 in S .keys ():
176
+ #for every distinct pair of points
177
+ #we achieve that by appending to the dictionary @solution
178
+ #the mst between the two points
179
+ #so we just check if the pair of points we examine, exists in the
180
+ #dic @solution and also if the the two points are not the same
181
+ if ((p1 ,p2 ) not in solution ) and p1 != p2 :
182
+ d = math .sqrt (3 ) * euclidean_distance (S [p1 ], S [p2 ])
183
+ #d is the diameter of the circle centered in the midpoint of
184
+ #the line segment of p1, p2
185
+ midpoint = midpoint_finder (S [p1 ],S [p2 ])
186
+ #calculate the midpoint from the function
187
+ sc = [] # subset of S contained in C
188
+ for point in S .keys ():
189
+ if (euclidean_distance (S [point ],midpoint )< d / 2 ):
190
+ #if distance between @midpoint and @point is less than
191
+ #diameter/2 (radius), then @point is contained in the circle
192
+ sc .append (point )
193
+ if len (sc )< k : #if sc contains fewer than k points
194
+ solution [(p2 ,p1 )]= None #enter in dictionary @solution
195
+ #so that we don't check the same pair of points again
196
+ continue #skip to the next iteration
197
+ Q = get_square_Q (d ,midpoint )
198
+ #Q be is the square of side d circumscribing C.
199
+ cells = {}
200
+ #initialize the list @cells which will contain lists of points
201
+ #which are placed in the specific cell
202
+ for i in range (int (math .sqrt (k ))):
203
+ for j in range (int (math .sqrt (k ))):
204
+ cells [(i ,j )]= []
205
+ sum_points_of_cell = [[0 for j in range (int (math .sqrt (k )))] for i in range (int (math .sqrt (k )))]
206
+ #a list that contains the sum of points inside every cell
207
+ #for instance the sum of cell (1,2) will be sum_points_of_cell[1][2]
208
+ for point in sc :
209
+ #for every point that is contained in the circle C
210
+ point_change = (S [point ][0 ]- Q [0 ][0 ],S [point ][1 ]- Q [0 ][1 ])
211
+ #we transform all points by considering that the bottom left
212
+ #corner of the square Q is the start of axis so that we can
213
+ #calculate in which cell every point is by using the
214
+ #operator //
215
+ x_cell = int (point_change [0 ] // (d / math .sqrt (k )))
216
+ y_cell = int (point_change [1 ]// (d / math .sqrt (k )))
217
+ #each square cell has a side d/sqrt(k)
218
+ #in order to know in which cell the point is, we divide by
219
+ # (d/sqrt(k)) and the integer of the result is the position of the cell
220
+ #we do the above for both the abscissa and the ordinate
221
+ sum_points_of_cell [x_cell ][y_cell ]+= 1
222
+ #increase the sum of points in the specific cell by one
223
+ cells [(x_cell ,y_cell )].append (point )
224
+ #append the point as value of the dictionary @cells with
225
+ # key the coordinates of the cell it is in the square Q
226
+ sort_list = []
227
+ #sorting the cells by finding the max in sum_points_of_cell
228
+ #Then finding the index of the max, for both the inner and outer list
229
+ #@sum_points_of_cell is a list that contains lists
230
+ #then we assign the sum_points_of_cell[x_max][y_max] with -1
231
+ #so that we don't encounter it again
232
+ for i in range (int (math .sqrt (k ))):
233
+ for j in range (int (math .sqrt (k ))):
234
+ max_value = max ([max (every_lis ) for every_lis in sum_points_of_cell ])
235
+ #finding the max_value of the whole list
236
+ for z in range (int (math .sqrt (k ))):
237
+ if max_value in sum_points_of_cell [z ]:
238
+ x_max = z
239
+ #finding the index of the list that max_value
240
+ #is contained (outer) that index is the
241
+ #abscissa of the cell that contains the most points
242
+ break
243
+ y_max = sum_points_of_cell [x_max ].index (max_value )
244
+ #finding the index of the inner list where max_value is found
245
+ #that index is the ordinate of the cell that contains the most points
246
+ sort_list .append ((x_max ,y_max ))
247
+ #append to sort_list
248
+ sum_points_of_cell [x_max ][y_max ]= - 1
249
+ #so that we don't encounter the same cell again
250
+ sum_selected = 0 #variable to know how many points are contained so far
251
+ counter = 0 #variable to know the index of @sort_list that we are at
252
+ point_selected = []# list that will contain all the k points
253
+ while sum_selected < k :
254
+ #as long as we haven't selected k points the loop is going
255
+ cell_now = sort_list [counter ]
256
+ #gets the cells from the ones that contain the most points to less
257
+ #as sort_list is sorted
258
+ for point in cells [cell_now ]:
259
+ #for every point that is contained in the specific cell (@cell_now)
260
+ sum_selected += 1
261
+ point_selected .append (point )
262
+ counter += 1
263
+ sum_of_lastcell = len (cells [cell_now ])#finds the sum of points added
264
+ #from the last chosen cell
265
+ #
266
+ #This loop randomly discards points from the last cell so that
267
+ #the sum of points selected (in list @point_selected) is
268
+ #equal to k
269
+ while len (point_selected )!= k :
270
+ #while we have selected more than k points
271
+ point_to_pop = random .randint (len (point_selected )- sum_of_lastcell ,len (point_selected )- 1 )
272
+ #gets a random number that corresponds to an index
273
+ #of a point that belonged to the last chosen cell
274
+ #that was used to collect the needed points
275
+ point_selected .pop (point_to_pop )
276
+ sum_of_lastcell -= 1
277
+ #reducing the variable @sum_of_cell in order not to
278
+ #discard any points that don't belong to the last chosen cell
279
+ #(We only want to discard points from the last chosen cell)
280
+ solution [(p2 ,p1 )]= prim_kmst (point_selected ,S )
281
+ #finding the mst for the k points in the list @point_selected
282
+ len_tree = sum (solution [(p2 ,p1 )].values ())
283
+ #length is the sum of all distances of the connections
284
+ if len_tree < min_length :
285
+ min_length = len_tree
286
+ min_len_pair = (p2 ,p1 )
287
+ print ("For " ,k ," points the MST has length: " ,min_length )
288
+ print ("The links that were used are: " )
289
+ pprint .pprint (solution [min_len_pair ])
0 commit comments