# Параметри

In [64]:
# Файл із вихідними даними
MATCH_GRAPH_FILE = "data/mds_matches.csv"

PROFILE0 = "Sergi Adamchuk"
PROFILE1 = "Natalya Valentynivna Dymytryuk"

DIMENSIONS = 2

SCALE = 100

# Підготовлені дані

In [17]:
matches = {}

In [12]:
def add_match(match1, match2, cm):
    if match1 in matches:
        m = matches[match1]
    else:       
        m = {}
        matches[match1] = m
    m[match2] = cm    

In [18]:
def parse_num(s):
    return float(s.replace(" ", "").replace(",", "."))

In [20]:
import re

def clean_name(s):
    return re.sub(r'\s*\[.*\]$', '', s)

# Зчитування вихідних даних

In [6]:
import pandas as pd
matches_df = pd.read_csv(MATCH_GRAPH_FILE, sep='\t')
matches_df

Unnamed: 0,name,shared_dna,match_shared,match_name
0,Levko Adamchuk [Левко Адамчук],"3 532,1",8833,Natalya Valentynivna Dymytryuk
1,Levko Adamchuk [Левко Адамчук],"3 532,1",4710,Mykola Martyniuk
2,Levko Adamchuk [Левко Адамчук],"3 532,1",292,Marie Lis
3,Levko Adamchuk [Левко Адамчук],"3 532,1",356,Anna stefanidis
4,Levko Adamchuk [Левко Адамчук],"3 532,1",240,Olya Shylipuk
...,...,...,...,...
585,Wojciech Duszkiewicz [Войцих Дузкиевихз],363,155,Dmitry Konstantinov
586,Wojciech Duszkiewicz [Войцих Дузкиевихз],363,142,Christine Fischer
587,Wojciech Duszkiewicz [Войцих Дузкиевихз],363,302,Svetlana Kuhn
588,Wojciech Duszkiewicz [Войцих Дузкиевихз],363,87,Matěj Jakimov


In [21]:
def handle_row(row):
    match = clean_name(row['name'])
    add_match(PROFILE0, match, parse_num(row["shared_dna"]))
    add_match(match, row["match_name"], parse_num(row["match_shared"]))
    

matches_df.apply(handle_row, axis=1)

0      None
1      None
2      None
3      None
4      None
       ... 
585    None
586    None
587    None
588    None
589    None
Length: 590, dtype: object

In [30]:
labels = list(matches.keys())
matches_count = len(labels)

In [25]:
def shared_dna_direct(match1, match2):
    if match1 in matches:
        m = matches[match1]
        if match2 in m:
            return m[match2]
    return None

def shared_dna(match1, match2):
    cm = shared_dna_direct(match1, match2)
    if cm is None:
        cm = shared_dna_direct(match2, match1)
    return cm

# Функція розрахунку відстані між збігами

In [56]:
import math

def distance(m1, m2):
    cm = shared_dna(m1, m2)
    if cm is None:
        cm = 5
    return 1 + math.log2(3500.0/cm)/2

# Матриця відстаней

In [57]:
import numpy as np

distances = np.zeros((matches_count, matches_count))

for i in range(0, matches_count):
    for j in range(i+1, matches_count):
        dist = distance(labels[i], labels[j])
        distances[i,j] = dist
        distances[j,i] = dist


# Розкидаємо збіги на системі координат

In [131]:
from sklearn.manifold import MDS

mds = MDS(n_components=DIMENSIONS, dissimilarity='precomputed', normalized_stress='auto', random_state=42)

coords_raw = mds.fit_transform(distances)

# Трансформуємо точки

In [132]:
p0, p1 = coords_raw[labels.index(PROFILE0)], coords_raw[labels.index(PROFILE1)]
diff = p1 - p0
norm = np.linalg.norm(diff)
direction = diff / norm
dx, dy = direction[0], direction[1]
rotation_matrix = np.array([[dx, -dy], [dy, dx]])
scale = SCALE / norm
coords = ((coords_raw - p0) @ rotation_matrix) * scale

In [133]:
coords

array([[ 0.00000000e+00,  0.00000000e+00],
       [ 3.54145738e+01,  7.56577312e+00],
       [ 1.00000000e+02,  4.53225754e-16],
       [ 8.09462723e+01, -1.13839899e+02],
       [-1.36975293e+02, -4.35921385e+01],
       [-2.94528550e+01, -2.51396386e+02],
       [-1.74205094e+02,  1.84129629e+02],
       [ 1.87709152e+02, -6.20433981e+00],
       [-5.27851190e+01, -1.71364282e+02],
       [ 1.66947467e+01,  1.49660867e+02],
       [ 6.90793430e-01,  2.45965696e+02],
       [ 2.21772614e+02,  7.77952608e+01],
       [-2.49228010e+02, -7.70368634e+01],
       [ 2.25955450e+02, -8.53464861e+01],
       [ 1.01754513e+02,  2.16472900e+02],
       [-9.14509062e+01,  2.25499540e+02],
       [-1.83597665e+02, -1.39202892e+02],
       [-2.52783161e+02,  1.66524715e+01],
       [ 1.71776479e+02,  1.58949497e+02],
       [-2.28631094e+02,  1.01075991e+02],
       [ 1.64598375e+02, -1.81586366e+02],
       [ 7.11442168e+01, -2.33310155e+02],
       [-1.43753836e+02, -2.14955361e+02],
       [-1.

In [82]:
direction

array([-0.77179011, -0.63587737])

In [92]:
coords

array([[   0.        ,    0.        ],
       [  35.4145738 ,  -22.52171391],
       [ 100.        ,  -77.17901099],
       [  80.94627235, -134.86174794],
       [-136.97529275,   77.99692198],
       [ -29.45285495, -137.12584986],
       [-174.20509374,  251.53363228],
       [ 187.70915233, -148.81726658],
       [ -52.78511897,  -68.22763569],
       [  16.69474673,   82.28111738],
       [   0.69079343,  155.87087185],
       [ 221.77261389, -121.69366442],
       [-249.22801014,  143.36571545],
       [ 225.95544962, -228.66008019],
       [ 101.75451307,   59.11709111],
       [ -91.45090619,  213.97095895],
       [-183.59766491,   53.18289353],
       [-252.7831611 ,  205.68447341],
       [ 171.77647945,  -31.50300027],
       [-228.63109381,  240.72715209],
       [ 164.59837504, -242.50205837],
       [  71.14421679, -203.2650501 ],
       [-143.75383623,  -25.73745979],
       [-124.51563837,  157.39326985]])

In [86]:
diff

array([-1.30660634, -1.07651211])

In [128]:
coords_raw = np.array([[10, 20], [30, 40], [50, 60], [70, 80]])

p1 = np.array([30, 40])

norm = np.linalg.norm(p1)

print("norm", norm)

direction = p1 / norm

dx, dy = direction[0], direction[1]
print(dx, dy)
rotation_matrix = np.array([[dx, -dy], [dy, dx]])

# np.dot(coords_raw, rotation_matrix)

coords_raw @ rotation_matrix


norm 50.0
0.6 0.8


array([[ 2.2000000e+01,  4.0000000e+00],
       [ 5.0000000e+01, -8.8817842e-16],
       [ 7.8000000e+01, -4.0000000e+00],
       [ 1.0600000e+02, -8.0000000e+00]])

In [114]:
print(dx, dy)

0.6 0.8


In [127]:
import numpy as np

coords_raw = np.array([[10, 20], [30, 40], [50, 60], [70, 80]])

# Приклад матриці обертання на 90 градусів
theta = np.pi  # 90°
cos_theta, sin_theta = np.cos(theta), np.sin(theta)
rotation_matrix = np.array([[cos_theta, -sin_theta],
                           [sin_theta,  cos_theta]])

# Обчислюємо dot product кожного вектора з rotation_matrix
transformed_coords = coords_raw @ rotation_matrix

print(transformed_coords)

[[-10. -20.]
 [-30. -40.]
 [-50. -60.]
 [-70. -80.]]
