In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd

import gc
import timeit
import sys
import os

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [3]:
from scipy.optimize import minimize

# Residual Sum of Squares calculations: synthetic data against data for seen deer per hunting hour

In [4]:
# Revoking stored reported data
# https://ipython.org/ipython-doc/rel-0.12/config/extensions/storemagic.html

%store -r data_Averoy
%store -r data_Tingvoll
%store -r data_Surnadal
%store -r data_Sunndal
%store -r data_Vestnes
%store -r data_Laerdal

In [5]:
def sum_squares_seen_deer(
    number_of_years,
    municipality_file,
    empirical_observations,
    compare,
):
    """
    Calculating the minimum residual_sum_square between synthetic data and
    reported data.
    """
    # Revoking the targeted municipality data frame created by the synthetic population generator
    save_dir = "synthetic_data/"
    data_frame = pd.read_pickle(save_dir + municipality_file)
    # Discarding all rows where the stags_per_hind_threshold > 0
    data_frame = data_frame.loc[data_frame["sh_threshold"] < 0.01]

    # Defining column to extract values from
    if compare == "before_hunting":
        comp_choice = "tot_pop_bh"
    if compare == "after_hunting":
        comp_choice = "tot_pop_ah"

    RSS_list = []
    for i in range(0, int(len(data_frame) / number_of_years)):
        # Catch the predicted population figures for all observation years
        seen_deer_p = data_frame.iloc[i * number_of_years : (i + 1) * number_of_years][
            comp_choice
        ].values

        seen_deer_e = np.array(empirical_observations)

        # Find the scaling factor that minimises RSS and do the scaling
        scaling_factor = np.sum(np.multiply(seen_deer_p, seen_deer_e)) / np.sum(
            np.multiply(seen_deer_p, seen_deer_p)
        )
        seen_deer_p_scaled = scaling_factor * seen_deer_p

        # Find minimum Residual Sum Square value
        RSS_min = np.sum((seen_deer_p_scaled - seen_deer_e) ** 2)

        RSS_list.append([scaling_factor, RSS_min])

    return RSS_list

In [6]:
def make_RSS_frame(municipality_file, RSS_list):

    # Revoking the targeted municipality data frame created by the synthetic population generator
    save_dir = "synthetic_data/"
    data_frame = pd.read_pickle(save_dir + municipality_file)
    data_frame = data_frame.loc[data_frame["sh_threshold"] < 0.01]

    # Remove all rows except those where obs_year == 2021
    df_filtered = data_frame[data_frame["obs_year"] == 2021]

    # Remove columns not used
    cols = [
        "f_calves",
        "y_hinds",
        "a_hinds",
        "m_calves",
        "y_stags",
        "a_stags",
        "ws_fc",
        "ws_yh",
        "ws_ah",
        "ws_mc",
        "ws_ys",
        "ws_as",
        "c_yh",
        "c_ah",
    ]
    df_filtered2 = df_filtered.drop(cols, axis=1)

    # Resert index, otherwise pd.concat does not work
    df_filtered2 = df_filtered2.reset_index(drop=True)

    # Add two columns from RSS_list
    df_RSS = pd.concat(
        [df_filtered2, pd.DataFrame(RSS_list, columns=["scaling", "RSS"])], axis=1
    )

    # Sort the frame on the RSS value
    sorted_sum_squares_frame = df_RSS.sort_values(by=["RSS"]).reset_index(drop=True)

    return sorted_sum_squares_frame

In [7]:
def extract_top_hits(
    i,
    sorted_sum_squares_frame,
    filtering_strategy,
    frac_init_pop,
    number_of_top_hits,
):
    """
    Filtering the sum_squares frame based on an assumption about the
    size of the Dec 31 2020 population vs the Dec 31 2006 population,
    and delivering only the number_of_top_hits best fits.
    """

    if filtering_strategy == "uninformed":
        sorted_sum_squares_frame_filtered = sorted_sum_squares_frame

    if filtering_strategy == "informed":
        # Educated guess filtering - can play with these criteria
        sorted_sum_squares_frame_filtered = sorted_sum_squares_frame[
            (
                sorted_sum_squares_frame.tot_pop_ah
                > frac_init_pop[i] * sorted_sum_squares_frame.init_pop
            )
        ].reset_index(drop=True)

    return sorted_sum_squares_frame_filtered[0:number_of_top_hits]

In [8]:
def run_the_show():
    # Using all 6 municipalities
    municipalities = ["Averoy", "Tingvoll", "Surnadal", "Sunndal", "Vestnes", "Laerdal"]

    data_municipality = [
        data_Averoy,
        data_Tingvoll,
        data_Surnadal,
        data_Sunndal,
        data_Vestnes,
        data_Laerdal,
    ]

    municipality_frame = [
        "df_original_sorted_Averoy.pkl",
        "df_original_sorted_Tingvoll.pkl",
        "df_original_sorted_Surnadal.pkl",
        "df_original_sorted_Sunndal.pkl",
        "df_original_sorted_Vestnes.pkl",
        "df_original_sorted_Laerdal.pkl",
    ]

    compare = "before_hunting"
    filtering_strategy = "informed"
    frac_init_pop = [0.5] * len(municipalities)  # minumum tot_pop 2020/2006 ratio
    number_of_top_hits = 20

    for q in range(len(municipalities)):
        [
            municipality,
            first_year,
            last_year,
            number_of_years,
            years,
            seen_deer_obs,
            seen_deer_obs_outfield,
            seen_deer_obs_infield,
            hinds_per_stag_obs,
            total_harvest,
            fraction_female_calves_harvested,
            fraction_young_hinds_harvested,
            fraction_adult_hinds_harvested,
            fraction_male_calves_harvested,
            fraction_young_stags_harvested,
            fraction_adult_stags_harvested,
            spring_counts,
        ] = data_municipality[q]

        if municipality == "Surnadal":
            RSS_list = sum_squares_seen_deer(
                number_of_years,
                municipality_frame[q],
                seen_deer_obs_infield,
                compare,
            )
        else:
            RSS_list = sum_squares_seen_deer(
                number_of_years,
                municipality_frame[q],
                seen_deer_obs,
                compare,
            )

        sorted_sum_squares_frame = make_RSS_frame(municipality_frame[q], RSS_list)

        top_hits_frame_filtered = extract_top_hits(
            q,
            sorted_sum_squares_frame,
            filtering_strategy,
            frac_init_pop,
            number_of_top_hits,
        )

        # Storing top hits frames
        save_dir = "synthetic_data/"
        top_hits_frame_filtered.to_pickle(
            save_dir + "top_hits_" + municipality + "_sd_no_emigration" + ".pkl"
        )

        print("Municipality = ", municipality)
        display(top_hits_frame_filtered)
        print()
        print(
            "Percentage difference between min and max RSS value: ",
            round(
                (
                    (
                        top_hits_frame_filtered["RSS"].max()
                        - top_hits_frame_filtered["RSS"].min()
                    )
                    / top_hits_frame_filtered["RSS"].min()
                )
                * 100,
                2,
            ),
        )
        print()
        print("Mean column values of top_hits_frame:")
        display(top_hits_frame_filtered.mean())
        print()
        print()

In [9]:
%%time
# Running all scripts by calling up run_the_show
# This is done to get rid of memory leaks
run_the_show()

Municipality =  Averoy


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,1250,1.3,0.0,143,48,495,143,200,217,2021,1415,1783,1350,0.6039,0.3748,0.0,0,0.000654,0.967004
1,1400,1.3,0.0,231,132,397,231,122,285,2021,1194,1501,1068,0.6065,0.3306,0.0,0,0.000688,0.976974
2,1250,1.5,0.0,187,83,441,187,70,280,2021,1242,1621,1188,0.7657,0.4896,0.0,0,0.000696,0.978248
3,1400,1.4,0.0,203,52,527,203,99,314,2021,1235,1551,1120,0.5537,0.2954,0.0,0,0.00066,0.980555
4,1300,1.5,0.0,162,152,432,162,116,273,2021,1034,1304,879,0.573,0.2669,0.0,0,0.00074,0.982288
5,1400,1.4,0.0,245,90,440,245,113,265,2021,1190,1470,1040,0.4981,0.2348,0.0,0,0.000677,0.982796
6,1300,1.6,0.0,136,107,524,136,157,237,2021,1074,1358,933,0.5859,0.2909,0.0,0,0.00074,0.984973
7,1250,1.4,0.0,125,58,525,125,91,325,2021,1290,1584,1151,0.4999,0.2553,0.0,0,0.000683,0.988435
8,1300,1.3,0.0,149,107,458,149,191,243,2021,1248,1579,1146,0.6117,0.3484,0.0,0,0.00069,0.988861
9,1350,1.3,0.0,202,133,400,202,172,238,2021,1139,1440,1008,0.5881,0.3007,0.0,0,0.000716,0.992644



Percentage difference between min and max RSS value:  5.24

Mean column values of top_hits_frame:


init_pop        1320.000000
init_hps           1.420000
sh_threshold       0.000000
i_fcalves        180.500000
i_yhinds         101.600000
i_ahinds         457.800000
i_mcalves        180.500000
i_ystags         131.300000
i_astags         265.700000
obs_year        2021.000000
spring_pop      1206.450000
tot_pop_bh      1523.700000
tot_pop_ah      1093.750000
hps_bh             0.602430
hps_ah             0.330990
mig%               0.000000
num_migs           0.000000
scaling            0.000696
RSS                0.994117
dtype: float64



Municipality =  Tingvoll


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,2750,1.3,0.0,426,96,975,426,346,478,2021,2694,3557,2862,0.8912,0.8009,0.0,0,0.000152,0.045376
1,2750,1.5,0.0,426,148,990,426,136,622,2021,2287,3004,2309,0.8568,0.7438,0.0,0,0.000162,0.045404
2,2800,1.3,0.0,448,215,860,448,198,629,2021,2394,3126,2431,0.8569,0.7499,0.0,0,0.000158,0.047001
3,3100,1.3,0.0,465,134,1091,465,415,528,2021,2656,3458,2763,0.7955,0.6954,0.0,0,0.000142,0.047143
4,2850,1.3,0.0,327,322,917,327,496,457,2021,2686,3518,2823,0.8481,0.7536,0.0,0,0.000149,0.047235
5,2800,1.3,0.0,434,218,873,434,419,420,2021,2323,3101,2406,0.9538,0.8539,0.0,0,0.000157,0.047442
6,2900,1.4,0.0,362,317,951,362,253,652,2021,2580,3320,2625,0.7346,0.6266,0.0,0,0.00015,0.047555
7,2850,1.4,0.0,498,183,896,498,154,617,2021,2472,3167,2472,0.7357,0.6223,0.0,0,0.000154,0.047565
8,2750,1.3,0.0,412,163,924,412,167,669,2021,2436,3176,2481,0.817,0.7084,0.0,0,0.000154,0.047735
9,2850,1.3,0.0,470,237,841,470,249,581,2021,2265,2952,2257,0.7912,0.6705,0.0,0,0.00016,0.047762



Percentage difference between min and max RSS value:  6.85

Mean column values of top_hits_frame:


init_pop        2770.000000
init_hps           1.380000
sh_threshold       0.000000
i_fcalves        409.150000
i_yhinds         205.850000
i_ahinds         922.800000
i_mcalves        409.150000
i_ystags         300.200000
i_astags         520.200000
obs_year        2021.000000
spring_pop      2460.500000
tot_pop_bh      3220.700000
tot_pop_ah      2525.700000
hps_bh             0.832300
hps_ah             0.726265
mig%               0.000000
num_migs           0.000000
scaling            0.000156
RSS                0.047595
dtype: float64



Municipality =  Surnadal


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,2000,1.5,0.0,310,91,736,310,132,419,2021,1576,2174,1684,1.3234,1.4942,0.0,0,0.000383,1.111984
1,2150,1.4,0.0,365,124,703,365,118,472,2021,1587,2250,1760,1.4895,1.7432,0.0,0,0.000378,1.138366
2,1950,1.4,0.0,312,131,642,312,298,254,2021,1416,1997,1507,1.4383,1.7045,0.0,0,0.000401,1.141695
3,1900,1.4,0.0,285,77,698,285,99,454,2021,1335,1896,1406,1.5447,1.912,0.0,0,0.000416,1.142278
4,1850,1.6,0.0,305,106,655,305,209,266,2021,1315,1838,1348,1.5087,1.8572,0.0,0,0.000419,1.145195
5,2150,1.5,0.0,376,192,645,376,145,413,2021,1355,1887,1397,1.2947,1.4896,0.0,0,0.000397,1.145356
6,1950,1.4,0.0,321,83,678,321,250,293,2021,1223,1728,1238,1.4576,1.8056,0.0,0,0.000415,1.146113
7,2050,1.3,0.0,276,135,710,276,234,416,2021,1305,1807,1317,1.2208,1.3833,0.0,0,0.000393,1.147463
8,1700,1.6,0.0,195,72,733,195,221,281,2021,1202,1736,1246,1.6902,2.268,0.0,0,0.000441,1.148009
9,1850,1.4,0.0,314,64,648,314,162,345,2021,1441,2044,1554,1.479,1.7635,0.0,0,0.000406,1.148112



Percentage difference between min and max RSS value:  3.44

Mean column values of top_hits_frame:


init_pop        1955.000000
init_hps           1.415000
sh_threshold       0.000000
i_fcalves        286.100000
i_yhinds         136.100000
i_ahinds         671.200000
i_mcalves        286.100000
i_ystags         214.650000
i_astags         358.350000
obs_year        2021.000000
spring_pop      1384.050000
tot_pop_bh      1941.000000
tot_pop_ah      1451.000000
hps_bh             1.411455
hps_ah             1.690230
mig%               0.000000
num_migs           0.000000
scaling            0.000403
RSS                1.145361
dtype: float64



Municipality =  Sunndal


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,1650,1.9,0.0,181,185,657,181,213,230,2021,1604,2237,1693,1.5447,1.6742,0.0,0,0.000183,0.044967
1,1950,1.3,0.0,195,132,749,195,230,447,2021,1829,2539,1995,1.32,1.3531,0.0,0,0.00016,0.045056
2,2150,1.3,0.0,268,173,738,268,294,406,2021,1883,2523,1979,1.091,1.0646,0.0,0,0.000153,0.045075
3,2400,1.3,0.0,396,90,817,396,125,573,2021,2166,2961,2417,1.2052,1.2058,0.0,0,0.000144,0.045794
4,2100,1.3,0.0,336,72,734,336,111,509,2021,1815,2496,1952,1.3161,1.3484,0.0,0,0.000159,0.045946
5,2150,1.3,0.0,301,87,787,301,296,376,2021,1825,2448,1904,1.0699,1.0375,0.0,0,0.000157,0.046131
6,1950,1.4,0.0,204,179,718,204,282,359,2021,1810,2515,1971,1.345,1.3863,0.0,0,0.000163,0.046187
7,1900,1.8,0.0,199,202,762,199,117,418,2021,1843,2546,2002,1.3007,1.3277,0.0,0,0.000161,0.046322
8,1550,1.9,0.0,155,73,739,155,111,316,2021,1509,2165,1621,1.8152,2.107,0.0,0,0.000186,0.046403
9,1950,1.3,0.0,195,114,767,195,189,488,2021,1791,2442,1898,1.2832,1.306,0.0,0,0.000163,0.046461



Percentage difference between min and max RSS value:  4.11

Mean column values of top_hits_frame:


init_pop        1992.500000
init_hps           1.490000
sh_threshold       0.000000
i_fcalves        279.650000
i_yhinds         130.750000
i_ahinds         717.750000
i_mcalves        279.650000
i_ystags         192.350000
i_astags         389.750000
obs_year        2021.000000
spring_pop      1811.650000
tot_pop_bh      2488.100000
tot_pop_ah      1944.100000
hps_bh             1.310455
hps_ah             1.351480
mig%               0.000000
num_migs           0.000000
scaling            0.000163
RSS                0.046241
dtype: float64



Municipality =  Vestnes


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,2350,1.3,0.0,387,213,676,387,136,547,2021,1925,2651,2033,1.1878,1.1689,0.0,0,0.000225,0.139335
1,2250,1.3,0.0,247,208,783,247,152,610,2021,2166,3013,2395,1.3507,1.3742,0.0,0,0.000222,0.139765
2,2200,1.4,0.0,275,221,741,275,247,440,2021,1934,2709,2091,1.4063,1.4509,0.0,0,0.000232,0.142468
3,2250,1.3,0.0,292,150,790,292,202,521,2021,2000,2800,2182,1.3708,1.4026,0.0,0,0.000222,0.142783
4,2200,1.5,0.0,352,143,753,352,119,478,2021,1997,2774,2156,1.2993,1.3105,0.0,0,0.000223,0.144472
5,2500,1.3,0.0,437,247,670,437,296,409,2021,2180,2990,2372,1.1249,1.0954,0.0,0,0.000212,0.14478
6,1900,1.9,0.0,256,190,717,256,229,248,2021,1664,2335,1717,1.4094,1.4651,0.0,0,0.000256,0.145024
7,2050,1.4,0.0,225,205,727,225,119,546,2021,1841,2555,1937,1.2865,1.295,0.0,0,0.000236,0.145586
8,2250,1.3,0.0,326,189,713,326,138,555,2021,1930,2632,2014,1.1402,1.1096,0.0,0,0.000228,0.147197
9,2500,1.3,0.0,387,204,770,387,239,510,2021,2134,2871,2253,1.0518,1.0069,0.0,0,0.00021,0.147208



Percentage difference between min and max RSS value:  7.21

Mean column values of top_hits_frame:


init_pop        2220.000000
init_hps           1.445000
sh_threshold       0.000000
i_fcalves        318.250000
i_yhinds         177.400000
i_ahinds         752.000000
i_mcalves        318.250000
i_ystags         202.800000
i_astags         448.600000
obs_year        2021.000000
spring_pop      1929.400000
tot_pop_bh      2669.650000
tot_pop_ah      2051.650000
hps_bh             1.280700
hps_ah             1.290225
mig%               0.000000
num_migs           0.000000
scaling            0.000230
RSS                0.146262
dtype: float64



Municipality =  Laerdal


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,1600,1.8,0.0,192,93,687,192,217,217,2021,1360,1832,1300,1.0368,0.9388,0.0,0,0.000116,0.019292
1,1650,1.4,0.0,222,133,569,222,230,271,2021,1518,2036,1504,0.9663,0.8691,0.0,0,0.000112,0.019308
2,1650,1.5,0.0,189,198,564,189,203,304,2021,1477,1952,1420,0.8813,0.768,0.0,0,0.000112,0.01946
3,1550,1.9,0.0,170,198,594,170,125,291,2021,1466,1955,1423,0.9637,0.8619,0.0,0,0.000112,0.019496
4,1600,1.3,0.0,160,86,636,160,122,434,2021,1380,1857,1325,0.9248,0.8084,0.0,0,0.000116,0.019498
5,1550,1.8,0.0,263,138,519,263,182,182,2021,1319,1809,1277,1.1023,1.0147,0.0,0,0.000121,0.019508
6,1700,1.6,0.0,238,203,549,238,122,348,2021,1386,1837,1305,0.8649,0.74,0.0,0,0.000114,0.019552
7,1650,1.4,0.0,181,105,645,181,225,311,2021,1494,1996,1464,0.8974,0.7878,0.0,0,0.000109,0.019586
8,1700,1.5,0.0,178,217,588,178,139,397,2021,1542,2056,1524,0.9333,0.8329,0.0,0,0.000108,0.019587
9,1650,2.0,0.0,231,182,609,231,71,324,2021,1506,1965,1433,0.8044,0.6838,0.0,0,0.000109,0.019608



Percentage difference between min and max RSS value:  2.34

Mean column values of top_hits_frame:


init_pop        1620.000000
init_hps           1.595000
sh_threshold       0.000000
i_fcalves        194.700000
i_yhinds         149.300000
i_ahinds         600.950000
i_mcalves        194.700000
i_ystags         168.050000
i_astags         309.900000
obs_year        2021.000000
spring_pop      1454.200000
tot_pop_bh      1951.850000
tot_pop_ah      1419.850000
hps_bh             0.975455
hps_ah             0.875155
mig%               0.000000
num_migs           0.000000
scaling            0.000113
RSS                0.019592
dtype: float64



CPU times: user 35min 1s, sys: 9min 42s, total: 44min 44s
Wall time: 44min 39s
