In [1]:
from services.data_service import load_site_data
import numpy as np

In [2]:
# Load sites' data
(
    zbar_2017,
    gamma_vals,
    z_2017,
    forestArea_2017_ha,
    theta_vals,
    gamma_coe,
    gamma_coe_sd,
    theta_coe,
    theta_coe_sd,
    gamma_vcov_array,
    theta_vcov_array,
    site_theta_2017_df,
    gamma_data,
) = load_site_data(
    10,
    norm_fac=1e11,
)

Data successfully loaded from '/Users/patriciohernandez/Projects/project-amazon/data/hmc/calibration_10SitesModel.csv'


In [8]:
site_theta_2017_df

Unnamed: 0,id,X1,historical_precip,historical_temp,I.historical_temp.2.,lat,I.lat.2.,cattleSlaughter_farmGatePrice_2017,distance,zbar_2017_muni
0,1,1.01511,1.319569,1.028325,1.030690,1.036532,1.064609,0.991444,2.144612,4.556219e+05
1,5,1.01511,1.319569,1.028325,1.030690,1.036532,1.064609,0.991444,2.144612,4.556219e+05
2,1,1.01511,1.407192,1.048532,1.071596,1.048171,1.088652,0.922197,1.599137,1.181881e+06
3,2,1.01511,1.407192,1.048532,1.071596,1.048171,1.088652,0.922197,1.599137,1.181881e+06
4,1,1.01511,1.513815,1.025520,1.025076,1.057716,1.108569,0.992270,1.757139,5.606640e+06
...,...,...,...,...,...,...,...,...,...,...
682,10,1.01511,0.763280,0.941764,0.864474,0.907094,0.815322,1.123952,0.860028,2.018173e+05
683,10,1.01511,0.796777,0.960331,0.898895,0.907236,0.815577,1.445564,0.347741,1.144672e+05
684,10,1.01511,0.812216,0.940058,0.861343,0.915442,0.830398,1.290441,0.601604,1.287097e+06
685,10,1.01511,0.737937,0.986873,0.949270,0.919497,0.837771,1.108330,1.110184,5.881073e+05


In [4]:
def gamma_fitted(gamma_coe, gamma_dataframe):
    # Copy df
    gamma_data = gamma_dataframe.copy()

    # Compute fitted values
    gamma_data["fitted_value"] = np.exp(
        (gamma_data.iloc[:, 1:6] * gamma_coe).sum(axis=1)
    )

    # Subset columns
    gamma_data = gamma_data[["id", "fitted_value"]]

    # Group by id and compute weighted mean for each group
    result = (
        gamma_data.groupby("id")["fitted_value"]
        .mean()
        .reset_index(name="gamma2017_Sites")
    )
    return result["gamma2017_Sites"].to_numpy()

In [5]:
def new_gamma_fitted(gamma_coe, gamma_dataframe):
    # Copy df
    gamma_data = gamma_dataframe.copy()

    # Compute fitted values
    fitted = np.exp((gamma_data.iloc[:, 1:6] * gamma_coe).sum(axis=1))

    # Averaging matrix
    G = np.array([(gamma_data["id"].to_numpy() == i).astype(int) for i in range(1, 11)])
    G = G / G.sum(axis=1, keepdims=True)
    print(G)
    return G @ fitted

In [6]:
gamma_fitted(gamma_coe, gamma_data)

array([844.7341586 , 574.09119512, 413.27406825, 319.21193319,
       760.31541328, 493.2070635 , 330.92917782, 268.47542448,
       392.09250508, 344.99956016])

In [7]:
new_gamma_fitted(gamma_coe, gamma_data)

[[0.08333333 0.         0.08333333 ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.02702703 0.02702703 0.02702703]]


array([844.7341586 , 574.09119512, 413.27406825, 319.21193319,
       760.31541328, 493.2070635 , 330.92917782, 268.47542448,
       392.09250508, 344.99956016])

In [9]:
def theta_fitted(theta_coe, theta_dataframe):
    # Copy df
    theta_data = theta_dataframe.copy()

    # Compute fitted values
    theta_data["fitted_value"] = np.exp(
        (theta_data.iloc[:, 1:9] * theta_coe).sum(axis=1)
    )

    # Subset and filter
    theta_data = theta_data[["id", "zbar_2017_muni", "fitted_value"]]
    theta_data = theta_data[theta_data["zbar_2017_muni"].notna()]

    # Weighted average function
    aux_price_2017 = 44.9736197781184

    def weighted_mean(group):
        return np.average(
            group["fitted_value"] / aux_price_2017, weights=group["zbar_2017_muni"]
        )

    # Take weighted average
    result = (
        theta_data.groupby("id")
        .apply(weighted_mean)
        .reset_index(name="theta2017_Sites")
    )

    # Return as numpy array
    return result["theta2017_Sites"].to_numpy()

In [44]:
def new_theta_fitted(theta_coe, theta_dataframe):
    # Copy df
    theta_data = theta_dataframe.copy()

    # Compute fitted values
    fitted = np.exp((theta_data.iloc[:, 1:9] * theta_coe).sum(axis=1))

    # Weighted average function
    aux_price_2017 = 44.9736197781184

    # Averaging matrix
    G = np.array([(theta_data["id"].to_numpy() == i).astype(int) for i in range(1, 11)])
    
    # Multily by weights
    #G = theta_data["zbar_2017_muni"].to_numpy() * G
    print(G.sum(axis=1, keepdims=True))
    
    
    G = G / G.sum(axis=1, keepdims=True)

    

    print(G.shape)
    return (G @ fitted) / aux_price_2017

In [23]:
theta_fitted(theta_coe=theta_coe, theta_dataframe=site_theta_2017_df).flatten()

array([0.66715948, 0.72877452, 0.65893955, 1.12851283, 1.57180292,
       1.53836421, 1.53313501, 1.87627255, 2.10084797, 2.65624448])

In [45]:
new_theta_fitted(theta_coe=theta_coe, theta_dataframe=site_theta_2017_df).flatten()

[[ 12]
 [ 45]
 [ 70]
 [117]
 [ 45]
 [ 86]
 [119]
 [106]
 [ 56]
 [ 31]]
(10, 687)


array([0.7026527 , 0.66498835, 0.61273959, 0.86658357, 1.43302788,
       1.48626726, 1.93168886, 1.71595256, 2.18693469, 2.59085416])