## Modeling Figure 2 Biomass as Normal Distribution

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import arviz as az

import pymc3 as pm

In [15]:
import math

In [44]:
from datetime import datetime

### Visually obtained values from graph

#### Graph of Biomass: Average relative effect across experiments (red symbol and error bars)

In [4]:
bismass_number_of_data_sets = 12

In [22]:
# We assume since this is not given:
sample_size_in_data_set = 100

In [23]:
biomass_total_sample_size = bismass_number_of_data_sets * sample_size_in_data_set

In [9]:
# As a start, take the value of median as mean:
biomass_average_mean = biomass_average_median = -0.3

In [17]:
biomass_average_confidence_interval_upper_limit = 0.0

In [11]:
biomass_average_confidence_interval_lower_limit = -0.4

##### Based on second equation from https://handbook-5-1.cochrane.org/chapter_7/7_7_3_2_obtaining_standard_deviations_from_standard_errors_and.htm

In [24]:
biomass_average_standard_deviation = math.sqrt(biomass_total_sample_size) * (
    biomass_average_confidence_interval_upper_limit - biomass_average_confidence_interval_lower_limit) / 3.92

In [25]:
biomass_average_standard_deviation

3.5347975664670974

In [48]:
from standard_deviation_from_confidence_interval import standard_deviation_from_confidence_interval

In [49]:
biomass_average_standard_deviation_2 = standard_deviation_from_confidence_interval(
    biomass_total_sample_size, biomass_average_confidence_interval_upper_limit, 
    biomass_average_confidence_interval_lower_limit)

In [50]:
biomass_average_standard_deviation_2

3.5347975664670974

In [54]:
assert biomass_average_standard_deviation == biomass_average_standard_deviation_2

##### Get synthetic data

In [35]:
with pm.Model() as biomass_average_model:
    biomass_average = pm.Normal("biomass_average", mu=biomass_average_mean, 
                                sigma=biomass_average_standard_deviation)
    
    biomass_average_prior = pm.sample_prior_predictive(biomass_total_sample_size)

In [36]:
biomass_average_prior

{'biomass_average': array([-6.10875387, -1.71861535,  5.70923326, ...,  2.44792302,
        -0.51252211,  2.84488161])}

In [37]:
biomass_average_prior['biomass_average'].shape

(1200,)

In [40]:
biomass_average_prior_series = pd.Series(biomass_average_prior['biomass_average'])

In [45]:
current_time_string = datetime.now().strftime("%Y%m%d%H%M%S")

In [47]:
biomass_average_prior_series.to_csv(f'../../../data/synthetic/photosynthetic_acclimation_to_fluctuating_irradiance_in_plants/figure2_biomass_average_prior_{current_time_string}.csv', 
                                    index=False, header=False)