In [1]:
# === Vedecká vizualizácia "medúzy" z tvojich dát ==========================
# - čitateľný, čistý look (bez kreatívnych "trails"), publikovateľná estetika
# - načíta parametre z Excelu → namapuje na vzhľad/pohyb → uloží MP4 + PNG

import os, re, math, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import imageio
from matplotlib.colors import hsv_to_rgb

# -------- 1) CESTA K EXCELU (uprav podľa potreby) --------
EXCEL_PATH = "Base completa - Encuesta sobre TD e Igualdad de Género en la Industria Argentina.xlsx"  # ← ak je v rovnakom priečinku, nechaj

# -------- 2) EXCEL → PARAMETRE ---------------------------------------------
def yn_to_num(val):
    if val is None or (isinstance(val, float) and math.isnan(val)): return np.nan
    s = str(val).strip().lower()
    yes = {'si','sí','sim','yes','y','true','verdadero','verdadeiro'}
    no  = {'no','nao','não','false','falso'}
    if s in yes: return 1.0
    if s in no:  return 0.0
    if 'sí' in s or s.startswith('si') or 'sim' in s: return 1.0
    if s == 'no' or 'não' in s or 'nao' in s: return 0.0
    return np.nan

def proportion_from_phrase(val):
    if val is None or (isinstance(val, float) and math.isnan(val)): return np.nan
    s = str(val).strip().lower()
    cues = [
        ('ninguna',0.0),('nula',0.0),('muy baja',0.15),('baja',0.3),
        ('media',0.5),('moderada',0.5),('alta',0.8),('muy alta',0.95),
        ('nenhuma',0.0),('muito baixa',0.15),('baixa',0.3),('média',0.5),('muito alta',0.95)
    ]
    for k,v in cues:
        if k in s: return v
    m = re.search(r'(\d+(?:\.\d+)?)\s*%?', s)
    if m:
        x = float(m.group(1))
        return np.clip(x/100.0 if '%' in s else x, 0, 1)
    return np.nan

def find_col(columns, patterns):
    for col in columns:
        s = str(col).lower()
        if any(p in s for p in patterns): return col
    return None

def extract_params_from_excel(path):
    xl = pd.ExcelFile(path)
    df = xl.parse(xl.sheet_names[0]).copy()
    cols = list(df.columns)

    c_has_women     = find_col(cols, ['¿cuenta con mujeres dentro de su fuerza laboral','mujeres dentro de su fuerza'])
    c_prop_total    = find_col(cols, ['proporción de mujeres en el total del personal'])
    c_prop_lider    = find_col(cols, ['proporción de mujeres en roles de liderazgo','liderazgo','directoras','gerentes'])
    c_has_tech      = find_col(cols, ['¿su empresa tiene un área de tecnología?','área de tecnología'])
    c_prop_techteam = find_col(cols, ['proporción de mujeres en el equipo del área de tecnología'])
    c_sector        = find_col(cols, ['¿en qué sector industrial','rama de actividad'])

    def safe(c): return df[c] if c in df.columns else pd.Series([np.nan]*len(df))
    s_has_women     = safe(c_has_women).map(yn_to_num)
    s_prop_total    = safe(c_prop_total).map(proportion_from_phrase)
    s_prop_lider    = safe(c_prop_lider).map(proportion_from_phrase)
    s_has_tech      = safe(c_has_tech).map(yn_to_num)
    s_prop_techteam = safe(c_prop_techteam).map(proportion_from_phrase)

    def fill(series, val): return series if series.notna().any() else pd.Series([val]*len(df))
    s_has_women     = fill(s_has_women, 1.0)
    s_prop_total    = fill(s_prop_total, 0.45)
    s_prop_lider    = fill(s_prop_lider, 0.25)
    s_has_tech      = fill(s_has_tech, 0.6)
    s_prop_techteam = fill(s_prop_techteam, 0.3)

    amplitude = float(np.nanmean(0.6*s_prop_total + 0.4*s_prop_techteam))
    frequency = float(np.nanmean(0.7*s_prop_lider + 0.3*s_has_tech))
    inclusion = float(np.nanmean(0.3*s_has_women + 0.2*s_has_tech + 0.25*s_prop_total + 0.25*s_prop_techteam))
    if c_sector in df.columns:
        hue = (abs(hash(' | '.join(df[c_sector].astype(str).fillna('NA').tolist()))) % 360) / 360.0
    else:
        hue = 0.60

    amplitude = float(np.clip(amplitude if not math.isnan(amplitude) else 0.5, 0.1, 1.0))
    frequency = float(np.clip(frequency if not math.isnan(frequency) else 0.4, 0.1, 1.0))
    inclusion = float(np.clip(inclusion if not math.isnan(inclusion) else 0.5, 0.0, 1.0))
    return dict(amplitude=amplitude, frequency=frequency, inclusion=inclusion, hue=hue)

P = extract_params_from_excel(EXCEL_PATH)
print("Parametre z dát:", P)

# -------- 3) MAPOVANIE DÁT → VZHĽAD ----------------------------------------
# viac bodov pre hladký tvar (vedecký look = bez „efektov“)
N_POINTS = int(6000 + round(P['inclusion'] * 24000))  # ~6k..30k
SIZE  = 1.0 + 2.5*P['amplitude']                      # 1..3.5 (jemné, ale viditeľné)
ALPHA = 0.45 + 0.35*P['amplitude']                    # 0.45..0.80 (konzervatívne)
FPS   = 30                                            # hladké prehrávanie
FRAMES= 600                                           # ~20 s
DT    = np.pi/20                                      # rovnaký krok ako v MATLABe

# farby: nízka saturácia bodov (čitateľnosť), tmavé, neutrálne pozadie s nádychom komplementu
FG = hsv_to_rgb([P['hue'], 0.22, 1.0])                 # takmer biela s jemným nádychom
BG = tuple(hsv_to_rgb([ (P['hue']+0.55)%1.0, 0.35, 0.10 ]))  # tmavé, nenásilné

# -------- 4) VÝPOČET BODOV (rovnaký vzorec ako MATLAB) ---------------------
i_vals = np.arange(1, N_POINTS+1, dtype=float)
x_vals = np.mod(i_vals, 200.0)
y_vals = i_vals / 43.0

def compute_points(t):
    k = 5.0*np.cos(x_vals/14.0)*np.cos(y_vals/30.0)
    e = y_vals/8.0 - 13.0
    d = (np.sqrt(k*k + e*e)**2)/59.0 + 4.0
    q = 60.0 - 3.0*np.sin(np.arctan2(k,e)*e) + k*(3.0 + (4.0/d)*np.sin(d*d - 2.0*t))
    c = d/2.0 + e/99.0 - t/18.0
    px = q*np.sin(c) + 200.0
    py = (q + d*9.0)*np.cos(c) + 200.0
    return px, py

# -------- 5) RENDER → MP4 + PNG -------------------------------------------
W, H = 800, 800   # vyššie rozlíšenie (dobré do reportu)
dpi = 100
fig = plt.figure(figsize=(W/dpi, H/dpi), dpi=dpi)
ax = plt.gca()
fig.patch.set_facecolor(BG)
ax.set_facecolor(BG)
ax.set_xlim(0, 400); ax.set_ylim(0, 400); ax.set_aspect('equal'); ax.axis('off')

def fig_to_rgb():
    fig.canvas.draw()
    w, h = fig.canvas.get_width_height()
    buf = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8)
    return buf.reshape((h, w, 4))[..., :3]

# výstupy s časovou pečiatkou
ts = time.strftime("%Y%m%d-%H%M%S")
OUT_MP4 = os.path.abspath(f"meduza_scientific_{ts}.mp4")
OUT_PNG = os.path.abspath(f"meduza_scientific_{ts}.png")
print("Ukladám do:", OUT_MP4)

writer = imageio.get_writer(OUT_MP4, fps=FPS, codec="libx264", pixelformat="yuv420p")

t = 0.0
first_frame_saved = False
for f in range(FRAMES):
    t += DT
    ax.cla()
    ax.set_facecolor(BG)
    ax.set_xlim(0, 400); ax.set_ylim(0, 400); ax.set_aspect('equal'); ax.axis('off')

    px, py = compute_points(t)
    ax.scatter(px, py, s=SIZE, c=[FG], alpha=ALPHA, marker='o', linewidths=0)

    # malá legenda v rohu (vedecký look)
    txt = (f"amplitude={P['amplitude']:.2f} | frequency={P['frequency']:.2f} | "
           f"inclusion={P['inclusion']:.2f} | hue={P['hue']:.2f}\n"
           f"N={N_POINTS}  FPS={FPS}")
    ax.text(8, 392, txt, ha='left', va='top', fontsize=8, color=(0.9,0.9,0.9), family='DejaVu Sans')

    frame = fig_to_rgb()
    writer.append_data(frame)

    # ulož 1. frame ako PNG (na ilustráciu do dokumentu)
    if not first_frame_saved:
        plt.savefig(OUT_PNG, dpi=160, bbox_inches="tight", facecolor=fig.get_facecolor())
        first_frame_saved = True

writer.close(); plt.close(fig)
print("✓ Hotovo →", OUT_MP4)
print("✓ Náhľad PNG →", OUT_PNG)


Parametre z dát: {'amplitude': 0.38999999999999996, 'frequency': 0.26499999999999996, 'inclusion': 0.24749999999999997, 'hue': 0.09166666666666666}
Ukladám do: C:\Users\jeney\anaconda_projects\db\meduza_scientific_20251031-191206.mp4
✓ Hotovo → C:\Users\jeney\anaconda_projects\db\meduza_scientific_20251031-191206.mp4
✓ Náhľad PNG → C:\Users\jeney\anaconda_projects\db\meduza_scientific_20251031-191206.png
