In [None]:
# %pip install -r requirements.txt

In [None]:
import pandas as pd
import numpy as np

import utilities as ut

# Data Generating Process

In [None]:
seed = 42
np.random.seed(seed)

In [None]:
N = 100
M = 1
n = 50
alpha_1 = 0.25
gamma = 1
sigma_u = 0.5
epsilon = 0.5

In [None]:
print('INITIAL SETUP \n')
print(f'Number of inputs: {N}')
print(f'Number of outputs: {M}')
print(f'Number of DMUs: {n}')
print(f'Parameter alpha_1: {alpha_1}')
print(f'Parameter gamma: {gamma}')
print(f'Parameter sigma_u: {sigma_u}')
print(f'Parameter epsilon: {epsilon}')

In [None]:
x, y = ut.generate_data(
    n=n,
    N=N,
    M=M,
    alpha_1=alpha_1,
    gamma=gamma,
    sigma_u=sigma_u,
    verbose=True
)

## Export results

In [None]:
pd.DataFrame(x).to_csv("mc_simulation/inputs.csv", index=True)

In [None]:
pd.DataFrame(y).to_csv("mc_simulation/outputs.csv", index=False)

## Dimensionality reduction

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import umap
%matplotlib inline

In [None]:
sns.set(style='white', context='poster', rc={'figure.figsize':(14,10)})

In [None]:
np.random.seed(seed5)

In [None]:
fit = umap.UMAP()
%time u = fit.fit_transform(x)

In [None]:
sc = plt.scatter(u[:,0], u[:,1], c=y, s=(10**3)*y)
plt.legend(*sc.legend_elements(alpha=1, num=20), 
           bbox_to_anchor=(0.5,-0.08), 
           loc= 'upper center',
           ncol=9
          )
plt.title('UMAP embedding of input variables');

In [None]:
def draw_umap(n_neighbors=15, min_dist=0.1, n_components=2, metric='euclidean', title=''):
    fit = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=min_dist,
        n_components=n_components,
        metric=metric
    )
    u = fit.fit_transform(x);
    fig = plt.figure()
    if n_components == 1:
        ax = fig.add_subplot(111)
        ax.scatter(u[:,0], range(len(u)), c=y, s=(10**3)*y)
    if n_components == 2:
        ax = fig.add_subplot(111)
        ax.scatter(u[:,0], u[:,1], c=y, s=(10**3)*y)
    if n_components == 3:
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(u[:,0], u[:,1], u[:,2], c=y, s=(10**3)*y)
    plt.title(title, fontsize=18)

In [None]:
for n in (2, 5):
    draw_umap(n_neighbors=n, title='n_neighbors = {}'.format(n))

In [None]:
for d in (0.0, 0.8):
    draw_umap(min_dist=d, title='min_dist = {}'.format(d))

In [None]:
draw_umap(n_components=1, title='n_components = 1')

In [None]:
draw_umap(n_components=3, title='n_components = 3')

In [None]:
fit = umap.UMAP(
    n_components=2,
)
u = fit.fit_transform(x)

In [None]:
pd.DataFrame(u).to_csv("mc_simulation/inputs_umap_02_dims.csv", index=True)

In [None]:
fit = umap.UMAP(
    n_components=5,
)
u = fit.fit_transform(x)

In [None]:
pd.DataFrame(u).to_csv("mc_simulation/inputs_umap_05_dims.csv", index=True)

In [None]:
fit = umap.UMAP(
    n_components=20,
)
u = fit.fit_transform(x)

In [None]:
pd.DataFrame(u).to_csv("mc_simulation/inputs_umap_20_dims.csv", index=True)

In [None]:
fit = umap.UMAP(
    n_components=30,
)
u = fit.fit_transform(x)

In [None]:
pd.DataFrame(u).to_csv("mc_simulation/inputs_umap_30_dims.csv", index=True)

In [None]:
fit = umap.UMAP(
    n_components=40,
)
u = fit.fit_transform(x)

In [None]:
pd.DataFrame(u).to_csv("mc_simulation/inputs_umap_40_dims.csv", index=True)