Skip to content

Commit 48426cb

Browse files
authored
Add files via upload
1 parent 08806bc commit 48426cb

File tree

5 files changed

+334
-0
lines changed

5 files changed

+334
-0
lines changed

bonus-assignment/kmst.py

Lines changed: 289 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sat Jun 22 01:06:38 2019
4+
5+
@author: Stefanos
6+
"""
7+
import math
8+
import pprint
9+
import sys
10+
import random
11+
12+
#function that gets as an argument the name of a file and
13+
#creates a dictionary @dic that contains the names of the points as keys
14+
#and the coordinates of the points as values of the keys
15+
#coordinates will be tuples as (abscissa,ordinate)
16+
#@filename must have the following format:
17+
#s1 (5, 0)
18+
#s2 (3, 7)
19+
#s3 (10, 9)
20+
#
21+
#where s1, s2, s3 are the names of the points and the tuples are the coordinates
22+
def open_textfile(filename):
23+
dic={}
24+
with open(filename) as file_input:
25+
for line in file_input:
26+
g=line.split(' ')
27+
dic[g[0]]=(float(g[1][1:-1]),float(g[2][:-2]))
28+
return (dic)
29+
30+
#function that gets two points: @p1, @p2 that are tuples (x,y) where
31+
#x is abscissa of the point and y is the ordinate
32+
#returns the euclidean distance of those points.
33+
#In detail, for the euclidean distance:
34+
#firstly calculate the difference of the abscissa of the two points
35+
#then the difference of the ordinate of the two points
36+
#then calculate the square of each difference , add them together
37+
#lastly take the square root of the sum
38+
def euclidean_distance(p1,p2):
39+
return math.sqrt( pow(p1[0]-p2[0],2) + pow(p1[1]-p2[1],2))
40+
41+
#function that takes as argument two points @p1 @p2 that are tuples
42+
#calculates and returns the midpoint of the line that starts from @p1 and
43+
#ends in @p2
44+
#abscissa of the midpoint is the sum of abscissas of the points divided by 2
45+
#ordinate of the midpoint is the sum of ordinates of the points divided by 2
46+
def midpoint_finder(p1,p2):
47+
return ((p1[0]+p2[0])/2,(p1[1]+p2[1])/2)
48+
49+
#function that gets as arguments the diameter and the center of a circle and
50+
#calculates the 4 corners of the square of side @diameter circumscribing the
51+
#circle . Returns a list containing 2 corners as tuples (x,y)
52+
#(the left-low and the top-right corner)
53+
def get_square_Q(diameter, center):
54+
right_high_corner=(center[0]+diameter/2,center[1]+diameter/2)
55+
#doing a diagramm it is easy to understand that: the
56+
#abscissa of the right_high_corner is the same as the biggest abscissa of the
57+
#points of the circle and the
58+
#ordinate of the right_high_corner is the same as the biggest ordinate of the
59+
#points of the circle
60+
left_low_corner=(center[0]-diameter/2,center[1]-diameter/2)
61+
#accordingly we see for the left-low corner
62+
return [left_low_corner, right_high_corner]
63+
64+
#function that checks if a point (@point) is contained in a square (@square)
65+
# @square is a list that contains two tuples, first tuple is the bottom left
66+
#corner of the square and second tuple is the upper right corner of the square
67+
#let's say @point is a tuple (x,y) and @square is [(x1,y1),(x2,y2)]
68+
#then @point is inside the square only if:
69+
# x>x1 and y>y1 and x<=x2 and y<=y2
70+
#returns true is the point is contained in the square
71+
#false otherwise
72+
#I consider that if a point is right on the border it will be contained
73+
#only if it is on the upper or right side of the square
74+
#otherwise two points could end up in the two different cells, leading to
75+
#incorrect results
76+
def check_point_in_square(point, square):
77+
start_corner=square[0] #assigns bottom-left corner of square to @start_corner
78+
end_corner=square[1] #assigns upper-right corner of square to @end_corner
79+
if (point[0]>start_corner[0] and point[1] > start_corner[1] and point[0]<=end_corner[0] and point[1]<= end_corner[1]):
80+
#checks the abscissa and ordinates of the points as explained in the description
81+
return True
82+
return False
83+
84+
#a function that gets as arguments a list of points (@lis) whom we want to find
85+
#the minimum spanning tree, a dictionary @S that contains the names of the points
86+
#as keys and the coordinates(tuple (abscissa,ordinate)) of the points as values
87+
# This function finds and returns the minimum spanning tree of the points in
88+
# @lis according to the Prim's algorithm for mst.
89+
#It works by keeping a list of visited points and a dictionary that contains
90+
#other dictionaries of the distances , and everytime we look for the minimum
91+
#distance of a visited point to a not visited point
92+
def prim_kmst(lis,S):
93+
num_points=len(lis) #number of points that the mst will have
94+
distance={}
95+
for every_point in lis:
96+
distance[every_point]={}
97+
for second_point in lis:
98+
if every_point != second_point:
99+
distance[every_point][second_point]=euclidean_distance(S[every_point],S[second_point])
100+
#dictionary that contains as keys all the points and for every key
101+
#it has an inner dictionary that contains all the other points ( except for
102+
#the point that is the outer key) and after contains the distance
103+
# for example :
104+
# {'s1': {'s2': 7.280109889280518,
105+
# 's3': 10.295630140987,
106+
# 's4': 5.0,
107+
# 's5': 7.211102550927978} ....}
108+
visited=[] #a list that contains the names of the points that have already been visited
109+
solution={} #dictionary that contains the solution as: keys are tuples of the
110+
#points that are connected and value is the distance of those points (in the tuple)
111+
# for example:
112+
#{('s1', 's9'): 5.0, ('s1', 's7'): 6.0 ....}
113+
len_mst=0 #variable to know how many coonections we already have (number of edges)
114+
node_now=lis[0] #assigns first point to visit the first item of the list
115+
#it doesn't really matter which point we select to visit first
116+
while len_mst!= num_points-1 :
117+
#knowing that the kmst has one egge less than the nodes it connects
118+
#we run the loop till len_mst=num_points-1
119+
visited.append(node_now) #node_now is considered visited
120+
min_dis= sys.maxsize #assign min_dis with a very big number
121+
for every_visited in visited:
122+
#for every point we have already visited we look for the
123+
#minimum distance from that point to any unvisited point
124+
min_visited_distance=min(distance[every_visited].values())
125+
#@min_visited_distance is the minimum distance of the specific @every_visited
126+
#between all the other points
127+
if min_visited_distance < min_dis:
128+
point_to_visit=list(distance[every_visited].keys())[list(distance[every_visited].values()).index(min_visited_distance)]
129+
#in order to find the point whom @every_visited is connected
130+
#to with @min_visited_distance, we use this trick
131+
#( We know the outer key of the dictionary @distance and the
132+
#value (@min_visited_distance) but we are looking for the inner key)
133+
node_now=every_visited
134+
min_dis=min_visited_distance
135+
for every_visited in visited:
136+
#in order for all of this to work, we need to delete the points and distances
137+
#that we can't use anymore because there is already a connection
138+
#As a result, we delete the points and distances of all the visited
139+
#points and the newpoint(@point_to_visit)
140+
del distance[every_visited][point_to_visit]
141+
del distance [point_to_visit][every_visited]
142+
len_mst +=1 #number of edges is increased by one
143+
solution[(node_now,point_to_visit)]=min_dis
144+
node_now=point_to_visit #node_now is the new point we visit
145+
return solution
146+
147+
#MAIN
148+
file = sys.argv[1]
149+
k = int(sys.argv[2]) #minimum ammount of points that the tree is spanning
150+
S=open_textfile(file)
151+
#in order for the algorithm to work, k must have a square root that is an integer.
152+
# In detail, for step (4) where we divide Q into k square cells,
153+
# the square cells will have equal dimensions and area(emvadon)
154+
#only if there are sqrt(k) rows and sqrt(k) columns inside the square Q .
155+
# As a result sqrt(k) must be an integer otherwise the algorithm can't work
156+
while k< len(S) and not (math.sqrt(k)).is_integer():
157+
#The programm will return the mst for at least k points (sum of points is n)
158+
#We continiously increase k till it reaches an integer that has
159+
# a square root that is an integer or till k equals to n
160+
k+=1
161+
solution={}#dictionary that contains solutions and
162+
#is used to get every distinct pair of points
163+
#When k is equal to n(sum of points), we only need to construct one minimum
164+
#spanning tree for all the n points and the solution is the length of it
165+
if k==len(S):
166+
solution= prim_kmst(list(S.keys()),S)
167+
len_tree=sum(solution.values())
168+
print("For ",k," points the MST has length: ",len_tree)
169+
print("The links that were used are: ")
170+
pprint.pprint(solution)
171+
else:
172+
#k is less than n (sum of points) so we follow the steps from the paper
173+
min_length=sys.maxsize
174+
for p1 in S.keys():
175+
for p2 in S.keys():
176+
#for every distinct pair of points
177+
#we achieve that by appending to the dictionary @solution
178+
#the mst between the two points
179+
#so we just check if the pair of points we examine, exists in the
180+
#dic @solution and also if the the two points are not the same
181+
if ((p1,p2) not in solution) and p1!=p2:
182+
d=math.sqrt(3) * euclidean_distance(S[p1], S[p2])
183+
#d is the diameter of the circle centered in the midpoint of
184+
#the line segment of p1, p2
185+
midpoint= midpoint_finder(S[p1],S[p2])
186+
#calculate the midpoint from the function
187+
sc=[] # subset of S contained in C
188+
for point in S.keys():
189+
if (euclidean_distance(S[point],midpoint)<d/2):
190+
#if distance between @midpoint and @point is less than
191+
#diameter/2 (radius), then @point is contained in the circle
192+
sc.append(point)
193+
if len(sc)< k: #if sc contains fewer than k points
194+
solution[(p2,p1)]=None #enter in dictionary @solution
195+
#so that we don't check the same pair of points again
196+
continue #skip to the next iteration
197+
Q=get_square_Q(d,midpoint)
198+
#Q be is the square of side d circumscribing C.
199+
cells={}
200+
#initialize the list @cells which will contain lists of points
201+
#which are placed in the specific cell
202+
for i in range (int(math.sqrt(k))):
203+
for j in range (int(math.sqrt(k))):
204+
cells[(i,j)]=[]
205+
sum_points_of_cell=[[0 for j in range (int(math.sqrt(k)))] for i in range (int(math.sqrt(k)))]
206+
#a list that contains the sum of points inside every cell
207+
#for instance the sum of cell (1,2) will be sum_points_of_cell[1][2]
208+
for point in sc:
209+
#for every point that is contained in the circle C
210+
point_change=(S[point][0]-Q[0][0],S[point][1]-Q[0][1])
211+
#we transform all points by considering that the bottom left
212+
#corner of the square Q is the start of axis so that we can
213+
#calculate in which cell every point is by using the
214+
#operator //
215+
x_cell=int(point_change[0] // (d/math.sqrt(k)))
216+
y_cell=int(point_change[1]// (d/math.sqrt(k)))
217+
#each square cell has a side d/sqrt(k)
218+
#in order to know in which cell the point is, we divide by
219+
# (d/sqrt(k)) and the integer of the result is the position of the cell
220+
#we do the above for both the abscissa and the ordinate
221+
sum_points_of_cell[x_cell][y_cell]+=1
222+
#increase the sum of points in the specific cell by one
223+
cells[(x_cell,y_cell)].append(point)
224+
#append the point as value of the dictionary @cells with
225+
# key the coordinates of the cell it is in the square Q
226+
sort_list=[]
227+
#sorting the cells by finding the max in sum_points_of_cell
228+
#Then finding the index of the max, for both the inner and outer list
229+
#@sum_points_of_cell is a list that contains lists
230+
#then we assign the sum_points_of_cell[x_max][y_max] with -1
231+
#so that we don't encounter it again
232+
for i in range (int(math.sqrt(k))):
233+
for j in range (int(math.sqrt(k))):
234+
max_value=max([max(every_lis) for every_lis in sum_points_of_cell])
235+
#finding the max_value of the whole list
236+
for z in range (int(math.sqrt(k))):
237+
if max_value in sum_points_of_cell[z]:
238+
x_max=z
239+
#finding the index of the list that max_value
240+
#is contained (outer) that index is the
241+
#abscissa of the cell that contains the most points
242+
break
243+
y_max=sum_points_of_cell[x_max].index(max_value)
244+
#finding the index of the inner list where max_value is found
245+
#that index is the ordinate of the cell that contains the most points
246+
sort_list.append((x_max,y_max))
247+
#append to sort_list
248+
sum_points_of_cell[x_max][y_max]=-1
249+
#so that we don't encounter the same cell again
250+
sum_selected=0 #variable to know how many points are contained so far
251+
counter=0 #variable to know the index of @sort_list that we are at
252+
point_selected=[]# list that will contain all the k points
253+
while sum_selected<k:
254+
#as long as we haven't selected k points the loop is going
255+
cell_now=sort_list[counter]
256+
#gets the cells from the ones that contain the most points to less
257+
#as sort_list is sorted
258+
for point in cells[cell_now]:
259+
#for every point that is contained in the specific cell (@cell_now)
260+
sum_selected+=1
261+
point_selected.append(point)
262+
counter +=1
263+
sum_of_lastcell=len(cells[cell_now])#finds the sum of points added
264+
#from the last chosen cell
265+
#
266+
#This loop randomly discards points from the last cell so that
267+
#the sum of points selected (in list @point_selected) is
268+
#equal to k
269+
while len(point_selected)!=k:
270+
#while we have selected more than k points
271+
point_to_pop=random.randint(len(point_selected)-sum_of_lastcell,len(point_selected)-1)
272+
#gets a random number that corresponds to an index
273+
#of a point that belonged to the last chosen cell
274+
#that was used to collect the needed points
275+
point_selected.pop(point_to_pop)
276+
sum_of_lastcell-=1
277+
#reducing the variable @sum_of_cell in order not to
278+
#discard any points that don't belong to the last chosen cell
279+
#(We only want to discard points from the last chosen cell)
280+
solution[(p2,p1)]=prim_kmst(point_selected,S)
281+
#finding the mst for the k points in the list @point_selected
282+
len_tree=sum(solution[(p2,p1)].values())
283+
#length is the sum of all distances of the connections
284+
if len_tree < min_length:
285+
min_length=len_tree
286+
min_len_pair=(p2,p1)
287+
print("For ",k," points the MST has length: ",min_length)
288+
print("The links that were used are: ")
289+
pprint.pprint(solution[min_len_pair])

bonus-assignment/points.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
s1 (5, 0)
2+
s2 (3, 7)
3+
s3 (10, 9)
4+
s4 (0, 0)
5+
s5 (1, 6)

bonus-assignment/points1.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
s1 (5, 0)
2+
s2 (3, 7)
3+
s3 (10, 9)
4+
s4 (0, 0)
5+
s5 (1, 6)
6+
s6 (2, 4)
7+
s7 (1, 5)
8+
s8 (-10, 11)
9+
s9 (12, 0)
10+
s10 (1, 0)

bonus-assignment/points2.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
s1 (2, 1)
2+
s2 (9, 10)
3+
s3 (7, 0)
4+
s4 (3, 7)
5+
s5 (1, -2)
6+
s6 (4, -7)
7+
s7 (2, -10)
8+
s8 (-3, 0)
9+
s9 (-6, 2)
10+
s10 (-4, 9)
11+
s11 (-8, 12)
12+
s12 (-4, -3)
13+
s13 (-5, -10)
14+
s14 (-9, -2)
15+
s15 (0, 12)
16+
s16 (-1, -7)
17+
s17 (6, 10)
18+
s18 (9, -11)
19+
s19 (-8, -7)
20+
s20 (-7, 7)

bonus-assignment/points3.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
s1 (-5, 5)
2+
s2 (7, -7)
3+
s3 (1, 9)
4+
s4 (4, 0)
5+
s5 (-1, -6)
6+
s6 (2, -4)
7+
s7 (1, 5)
8+
s8 (-6, -8)
9+
s9 (-5, 0)
10+
s10 (500, 100)

0 commit comments

Comments
 (0)