In [1]:
import pandas as pd
import plotly.express as px
import data_preprocessing.data_preprocess as dp
import numpy as np
from scipy.stats import gaussian_kde
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from scipy.spatial.distance import cdist
from scipy.stats import multivariate_normal
from scipy.spatial.distance import cdist

In [2]:
policy_vars = [
    "Military: Positive",
    "European Community/Union: Positive",
    "Freedom and Human Rights",
    "Democracy",
    "Political Corruption",
    "Environmental Protection",
    "Welfare State",
    "Right-left position",
    "Planned Economy",
    "Equality: Positive",
    "Opposition to Immigration"]

In [3]:
party_scaled, voter_scaled = dp.get_scaled_party_voter_data(x_var='Opposition to Immigration', y_var='Welfare State',year=2021)

  df = pd.read_csv(csv_path)
  df_filtered = df_filtered.apply(pd.to_numeric, errors="ignore")
  df_filtered['Year'] = (pd.to_datetime(df_filtered['Date'], dayfirst=True).dt.year).astype(str)


voters_2021.sav


KeyError: 'None of the columns for policy‐dimension "Opposition to Immigration" were found in your voter data'

In [None]:
party_scaled

In [None]:
voter_scaled

In [None]:
import plotly.io as pio
pio.renderers.default = 'iframe'

fig = px.scatter(
    pd.concat([
        voter_scaled.assign(Type="Voter", Size=5, Color="Voter"),
        party_scaled.assign(Type="Party", Size=15, Color=party_scaled["Party_Name"])
    ]),
    x="Democracy",
    y="Welfare State",
    color="Color",
    symbol="Type",
    size="Size",
    title="Unscaled Voter and Party Positions"
)
fig.update_traces(textposition="top center")
fig.show()

In [None]:
concatenated_df = pd.concat([voter_scaled, party_scaled], ignore_index=True)

fig = px.scatter(
    concatenated_df,
    x='Opposition to Immigration Scaled',
    y='Welfare State Scaled',
    color='Label',
    symbol='Label')
fig.update_traces(marker=dict(size=10))
fig.update_layout(title='Scaled Voter and Party Positions')
fig.show()

In [None]:
from scipy.stats import gaussian_kde
import numpy as np

x_var = "Opposition to Immigration"
y_var = "Welfare State"

x = voter_scaled[f"{x_var} Scaled"].values
y = voter_scaled[f"{y_var} Scaled"].values

data = np.vstack([x, y])

kde = gaussian_kde(data, bw_method='scott')

density_at_5_5 = kde([5, 5])


In [None]:
def voter_density(x_input, y_input):

    xy = np.vstack([np.ravel(x_input), np.ravel(y_input)])
    density_vals = kde(xy)
    return density_vals.reshape(np.shape(x_input))

In [None]:
voter_density(5, 5)

X, Y = np.meshgrid(np.linspace(0, 10, 100), np.linspace(0, 10, 100))
Z = voter_density(X, Y)

In [None]:
from sklearn.mixture import GaussianMixture
import numpy as np

X = voter_scaled[[f"{x_var} Scaled", f"{y_var} Scaled"]].values

gmm = GaussianMixture(n_components=3, covariance_type='full', random_state=0)
gmm.fit(X)

In [None]:
from scipy.stats import multivariate_normal

def gmm_density(x_input, y_input):

    x_flat = np.ravel(x_input)
    y_flat = np.ravel(y_input)
    points = np.column_stack([x_flat, y_flat])
    
    density_vals = np.zeros(len(points))
    for weight, mean, cov in zip(gmm.weights_, gmm.means_, gmm.covariances_):
        rv = multivariate_normal(mean=mean, cov=cov)
        density_vals += weight * rv.pdf(points)
    
    return density_vals.reshape(np.shape(x_input))

In [None]:
Xgrid, Ygrid = np.meshgrid(np.linspace(0, 10, 100), np.linspace(0, 10, 100))
Z = gmm_density(Xgrid, Ygrid)

In [None]:
print("Weights:", gmm.weights_)
print("Means:\n", gmm.means_)
print("Covariances:\n", gmm.covariances_)

In [None]:
from scipy.stats import multivariate_normal
import numpy as np

def gmm_indefinite_integral(x, y):
    total_cdf = 0
    point = np.array([x, y])
    for w, mu, cov in zip(gmm.weights_, gmm.means_, gmm.covariances_):
        total_cdf += w * multivariate_normal.cdf(point, mean=mu, cov=cov)
    return total_cdf

In [None]:
from scipy.stats import multivariate_normal

def gmm_density_and_loggrad(x_input, y_input, gmm):
    x_flat = np.ravel(x_input)
    y_flat = np.ravel(y_input)
    points = np.column_stack([x_flat, y_flat])
    N = len(points)

    density_vals = np.zeros(N)
    grad = np.zeros_like(points)

    for weight, mean, cov in zip(gmm.weights_, gmm.means_, gmm.covariances_):
        rv = multivariate_normal(mean=mean, cov=cov, allow_singular=True)
        pdf_vals = rv.pdf(points)
        diff = points - mean
        inv_cov = np.linalg.pinv(cov) 
        grad_comp = -pdf_vals[:, None] * (diff @ inv_cov.T)

        density_vals += weight * pdf_vals
        grad += weight * grad_comp

    eps = 1e-9
    grad_log_density = grad / (density_vals[:, None] + eps)

    return grad_log_density

In [None]:
def reflect(val, low, high):
    range_size = high - low
    val_shifted = (val - low) % (2 * range_size)
    reflected = np.where(val_shifted < range_size, val_shifted, 2 * range_size - val_shifted)
    return reflected + low

In [None]:
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.mixture import GaussianMixture

def run_simulation(data, T, sigma_noise, gmm_components, alpha, beta, gamma):

    D, N = data.shape
    history = [data.copy()]

    for t in range(T):
        X_t = history[-1]

        X_t_noisy = X_t.T + np.random.normal(scale=1e-6, size=(N, D))

        gmm = GaussianMixture(n_components=gmm_components, covariance_type='full', reg_covar=1e-2)
        gmm.fit(X_t_noisy)

        distances = cdist(X_t_noisy, X_t_noisy, metric='euclidean')
        W = np.exp(-distances ** 2)
        W /= W.sum(axis=1, keepdims=True)

        weighted_sum = W @ X_t_noisy 
        F_x = gmm_density_and_loggrad(X_t[0, :], X_t[1, :], gmm) 

        noise = np.random.normal(0, sigma_noise, size=(N, D))

        X_next = alpha * weighted_sum - beta * F_x + gamma * noise

        X_next = np.clip(X_next, -4, 4)

        for dim in range(D):
            mask_low = X_next[:, dim] <= -4
            X_next[mask_low, dim] = -4 + (-4 - X_next[mask_low, dim])
            mask_high = X_next[:, dim] >= 4
            X_next[mask_high, dim] = 4 - (X_next[mask_high, dim] - 4)

        history.append(X_next.T)

    final_positions = history[-1]
    return final_positions


In [None]:
import numpy as np
import plotly.express as px

def plot_with_simulation_separate(concatenated_df, simulation_points):

    print("Data ranges and checks:")
    print("Opposition to Immigration Scaled min/max:", concatenated_df['Opposition to Immigration Scaled'].min(), concatenated_df['Democracy Scaled'].max())
    print("Welfare State Scaled min/max:", concatenated_df['Welfare State Scaled'].min(), concatenated_df['Welfare State Scaled'].max())
    
    sim_x = np.array(simulation_points[0])
    sim_y = np.array(simulation_points[1])
    
    print("Simulation X min/max:", np.min(sim_x), np.max(sim_x))
    print("Simulation Y min/max:", np.min(sim_y), np.max(sim_y))
    
    print("Any NaNs or infs in simulation X?", np.isnan(sim_x).any(), np.isinf(sim_x).any())
    print("Any NaNs or infs in simulation Y?", np.isnan(sim_y).any(), np.isinf(sim_y).any())
    
    def clip_data(arr, min_val=-1e3, max_val=1e3):
        arr = np.clip(arr, min_val, max_val)
        return arr
    
    sim_x = clip_data(sim_x)
    sim_y = clip_data(sim_y)
    
    fig = px.scatter(
        concatenated_df,
        x='Opposition to Immigration Scaled',
        y='Welfare State Scaled',
        color='Label',
        symbol='Label'
    )
    
    fig.add_scatter(
        x=sim_x,
        y=sim_y,
        mode='markers',
        marker=dict(
            color='rgba(0,0,0,0.2)',
            size=4,
            symbol='circle'
        ),
        name='Simulation Points'
    )
    
    xmin = min(concatenated_df['Opposition to Immigration Scaled'].min(), np.min(sim_x))
    xmax = max(concatenated_df['Opposition to Immigration Scaled'].max(), np.max(sim_x))
    ymin = min(concatenated_df['Welfare State Scaled'].min(), np.min(sim_y))
    ymax = max(concatenated_df['Welfare State Scaled'].max(), np.max(sim_y))
    
    padding_x = (xmax - xmin) * 0.1
    padding_y = (ymax - ymin) * 0.1
    
    fig.update_layout(
        title='Scaled Positions with Simulation Overlay',
        xaxis=dict(range=[xmin - padding_x, xmax + padding_x]),
        yaxis=dict(range=[ymin - padding_y, ymax + padding_y]),
    )
    
    return fig


In [None]:
N = x.__len__()             
D = 2                
T = 500               
sigma_noise = 0.1
gmm_components = 3

In [None]:
import plotly.io as pio
pio.renderers.default = 'iframe'

sim=run_simulation(data,10,sigma_noise,gmm_components,0.01,1,0.1)
fig = plot_with_simulation_separate(concatenated_df,sim)
fig.show()

In [None]:
years = ["2009", "2013", "2017", "2021"]
voter_data_by_year = {}

for year in years:
    party_scaled, voter_scaled = dp.get_scaled_party_voter_data(
        x_var='Opposition to Immigration',
        y_var='Welfare State',
        year=year
    )

    voter_coords = voter_scaled[['Opposition to Immigration Scaled', 'Welfare State Scaled']].to_numpy().T
    
    voter_data_by_year[year] = voter_coords

sorted_years = sorted(voter_data_by_year.keys())
yearly_voter_data = [voter_data_by_year[year] for year in sorted_years]

In [None]:
from scipy.optimize import minimize


def objective(params, yearly_data, T_guess, sigma_noise, gmm_components):
    alpha, beta, gamma = params
    total_divergence = 0.0

    for i in range(len(yearly_data) - 1):
        X_start = yearly_data[i]
        X_real = yearly_data[i + 1]

        X_sim = run_simulation(
            data=X_start,
            T=T_guess,
            sigma_noise=sigma_noise,
            gmm_components=gmm_components,
            alpha=alpha,
            beta=beta,
            gamma=gamma
        )

        gmm_real = GaussianMixture(n_components=gmm_components, covariance_type='full', reg_covar=1e-2).fit(X_real.T)
        gmm_sim = GaussianMixture(n_components=gmm_components, covariance_type='full', reg_covar=1e-2).fit(X_sim.T)

        log_likelihood = gmm_sim.score(X_real.T)

        divergence = -log_likelihood
        total_divergence += divergence

    return total_divergence


D, N = yearly_voter_data[0].shape
T_guess = 50
sigma_noise = 0.1
gmm_components = 4

initial_params = [1.0, 1.0, 0.1]

result = minimize(
    objective,
    initial_params,
    args=(yearly_voter_data, T_guess, sigma_noise, gmm_components),
    method='L-BFGS-B',
    bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 2.0)]
)

alpha_fit, beta_fit, gamma_fit = result.x
print("\n✅ Fitted Parameters:")
print(f"  Alpha: {alpha_fit:.4f}")
print(f"  Beta:  {beta_fit:.4f}")
print(f"  Gamma: {gamma_fit:.4f}")