In [1]:
%load_ext lab_black

In [2]:
import numpy as np
import pandas as pd

import gc
import timeit
import sys
import os

from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

In [3]:
from scipy.optimize import minimize

# Residual Sum of Squares calculations: synthetic data against data for seen deer per hunting hour

In [4]:
# Revoking stored reported data
# https://ipython.org/ipython-doc/rel-0.12/config/extensions/storemagic.html

%store -r data_Averoy
%store -r data_Tingvoll
%store -r data_Surnadal
%store -r data_Sunndal
%store -r data_Vestnes
%store -r data_Laerdal

In [5]:
def sum_squares_seen_deer(
    number_of_years,
    municipality_file,
    empirical_observations,
    compare,
):
    """
    Calculating the minimum residual_sum_of_squares between synthetic data and
    reported data
    """

    # Revoking the targeted municipality data frame created by the synthetic population generator
    save_dir = "synthetic_data/"
    data_frame = pd.read_pickle(save_dir + municipality_file)

    # Defining column to extract values from
    if compare == "before_hunting":
        comp_choice = "tot_pop_bh"
    if compare == "after_hunting":
        comp_choice = "tot_pop_ah"

    RSS_list = []
    for i in range(0, int(len(data_frame) / number_of_years)):
        # Catch the predicted total population sizes for all observation years
        seen_deer_p = data_frame.iloc[i * number_of_years : (i + 1) * number_of_years][
            comp_choice
        ].values

        seen_deer_e = np.array(empirical_observations)

        # Find the scaling factor that minimises RSS and do the scaling
        scaling_factor = np.sum(np.multiply(seen_deer_p, seen_deer_e)) / np.sum(
            np.multiply(seen_deer_p, seen_deer_p)
        )
        seen_deer_p_scaled = scaling_factor * seen_deer_p

        # Find minimum Residual Sum Square value
        RSS_min = np.sum((seen_deer_p_scaled - seen_deer_e) ** 2)

        RSS_list.append([scaling_factor, RSS_min])

    return RSS_list

In [6]:
def make_RSS_frame(municipality_file, RSS_list):

    # Revoking the targeted municipality data frame created by the synthetic population generator
    save_dir = "synthetic_data/"
    data_frame = pd.read_pickle(save_dir + municipality_file)

    # Remove all rows except those where obs_year == 2021
    df_filtered = data_frame[data_frame["obs_year"] == 2021]

    # Remove columns not used
    cols = [
        "f_calves",
        "y_hinds",
        "a_hinds",
        "m_calves",
        "y_stags",
        "a_stags",
        "ws_fc",
        "ws_yh",
        "ws_ah",
        "ws_mc",
        "ws_ys",
        "ws_as",
        "c_yh",
        "c_ah",
    ]
    df_filtered2 = df_filtered.drop(cols, axis=1)

    # Resert index, otherwise pd.concat does not work
    df_filtered2 = df_filtered2.reset_index(drop=True)

    # Add two columns from RSS_list
    df_RSS = pd.concat(
        [df_filtered2, pd.DataFrame(RSS_list, columns=["scaling", "RSS"])], axis=1
    )

    # Sort the frame on the RSS value
    sorted_sum_squares_frame = df_RSS.sort_values(by=["RSS"]).reset_index(drop=True)

    return sorted_sum_squares_frame

In [7]:
def extract_top_hits(
    i,
    sorted_sum_squares_frame,
    filtering_strategy,
    frac_init_pop,
    number_of_top_hits,
):
    """
    Filtering the sum_squares frame based on an assumption about the
    size of the Dec 31 2020 population vs the Dec 31 2006 population,
    and delivering only the number_of_top_hits best fits.
    """

    if filtering_strategy == "uninformed":
        sorted_sum_squares_frame_filtered = sorted_sum_squares_frame

    if filtering_strategy == "informed":
        # Educated guess filtering - can play with these criteria
        sorted_sum_squares_frame_filtered = sorted_sum_squares_frame[
            (
                sorted_sum_squares_frame.tot_pop_ah
                > frac_init_pop[i] * sorted_sum_squares_frame.init_pop
            )
        ].reset_index(drop=True)

    return sorted_sum_squares_frame_filtered[0:number_of_top_hits]

In [8]:
def run_the_show():
    # Using all 6 municipalities
    municipalities = ["Averoy", "Tingvoll", "Surnadal", "Sunndal", "Vestnes", "Laerdal"]
    data_municipality = [
        data_Averoy,
        data_Tingvoll,
        data_Surnadal,
        data_Sunndal,
        data_Vestnes,
        data_Laerdal,
    ]

    municipality_frame = [
        "df_original_sorted_Averoy.pkl",
        "df_original_sorted_Tingvoll.pkl",
        "df_original_sorted_Surnadal.pkl",
        "df_original_sorted_Sunndal.pkl",
        "df_original_sorted_Vestnes.pkl",
        "df_original_sorted_Laerdal.pkl",
    ]

    compare = "before_hunting"
    filtering_strategy = "informed"
    frac_init_pop = [0.5] * len(municipalities)  # minmum tot_pop 2020/2006 ratio
    number_of_top_hits = 20

    for q in range(len(municipalities)):
        [
            municipality,
            first_year,
            last_year,
            number_of_years,
            years,
            seen_deer_obs,
            seen_deer_obs_outfield,
            seen_deer_obs_infield,
            hinds_per_stag_obs,
            total_harvest,
            fraction_female_calves_harvested,
            fraction_young_hinds_harvested,
            fraction_adult_hinds_harvested,
            fraction_male_calves_harvested,
            fraction_young_stags_harvested,
            fraction_adult_stags_harvested,
            spring_counts,
        ] = data_municipality[q]

        if municipality == "Surnadal":
            RSS_list = sum_squares_seen_deer(
                number_of_years,
                municipality_frame[q],
                seen_deer_obs_infield,
                compare,
            )
        else:
            RSS_list = sum_squares_seen_deer(
                number_of_years,
                municipality_frame[q],
                seen_deer_obs,
                compare,
            )

        sorted_sum_squares_frame = make_RSS_frame(municipality_frame[q], RSS_list)

        top_hits_frame_filtered = extract_top_hits(
            q,
            sorted_sum_squares_frame,
            filtering_strategy,
            frac_init_pop,
            number_of_top_hits,
        )

        # Storing top hits frames
        save_dir = "synthetic_data/"
        top_hits_frame_filtered.to_pickle(
            save_dir + "top_hits_" + municipality + "_seen_deer" + ".pkl"
        )

        print("Municipality = ", municipality)
        display(top_hits_frame_filtered)
        print()
        print(
            "Percentage difference between min and max RSS value: ",
            round(
                (
                    (
                        top_hits_frame_filtered["RSS"].max()
                        - top_hits_frame_filtered["RSS"].min()
                    )
                    / top_hits_frame_filtered["RSS"].min()
                )
                * 100,
                2,
            ),
        )
        print()
        print("Mean column values of top_hits_frame:")
        display(top_hits_frame_filtered.mean())
        print()
        print()

In [9]:
%%time
# Running all scripts by calling up run_the_show
# This is done to get rid of memory leaks
run_the_show()

Municipality =  Averoy


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,1400,1.4,0.6,147,161,483,147,221,239,2021,1186,1597,1164,1.4739,1.1488,15.1156,54,0.00074,0.632886
1,1450,1.3,0.7,166,138,492,166,145,339,2021,1217,1563,1130,1.2831,0.9564,20.0556,84,0.000738,0.658284
2,1300,1.8,0.6,130,120,548,130,66,304,2021,1084,1441,1008,1.3521,0.9869,16.3239,54,0.000758,0.661097
3,1400,1.3,0.8,154,129,487,154,123,351,2021,1309,1662,1229,1.1868,0.8837,20.907,104,0.00071,0.671749
4,1150,2.0,0.5,120,96,508,120,127,175,2021,1108,1528,1095,1.6428,1.3009,13.2823,39,0.000774,0.673681
5,1600,1.4,0.5,264,143,481,264,89,357,2021,1259,1704,1271,1.6231,1.3226,18.512,65,0.000738,0.685546
6,1200,2.0,0.5,198,91,444,198,53,214,2021,980,1361,928,1.5938,1.1992,10.2493,25,0.000831,0.689359
7,1350,1.4,0.6,168,76,513,168,109,312,2021,950,1252,819,1.4294,0.9895,21.8832,68,0.000813,0.690976
8,1350,1.5,0.7,202,56,510,202,136,241,2021,1126,1450,1017,1.2601,0.9013,20.5847,81,0.000748,0.695316
9,1300,1.3,0.7,175,80,455,175,82,330,2021,1007,1317,884,1.2611,0.8551,18.8661,66,0.000787,0.697476



Percentage difference between min and max RSS value:  12.99

Mean column values of top_hits_frame:


init_pop        1332.500000
init_hps           1.595000
sh_threshold       0.585000
i_fcalves        180.550000
i_yhinds          98.950000
i_ahinds         490.750000
i_mcalves        180.550000
i_ystags         119.800000
i_astags         259.100000
obs_year        2021.000000
spring_pop      1106.950000
tot_pop_bh      1462.350000
tot_pop_ah      1029.350000
hps_bh             1.457130
hps_ah             1.091855
mig%              19.725580
num_migs          68.150000
scaling            0.000770
RSS                0.692259
dtype: float64



Municipality =  Tingvoll


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,2800,2.0,0.6,336,383,1035,336,127,581,2021,2420,3344,2649,1.5098,1.5122,7.2107,51,0.000156,0.032315
1,2650,1.5,0.5,304,110,1114,304,326,489,2021,2163,2943,2248,1.6168,1.652,15.1778,86,0.000172,0.035125
2,2500,1.9,0.5,325,206,1006,325,114,523,2021,2035,2793,2098,1.6609,1.7145,14.1455,77,0.000174,0.035431
3,2650,1.3,0.7,410,103,930,410,333,461,2021,2325,3168,2473,1.3256,1.2851,7.619,57,0.000161,0.03594
4,2600,1.4,0.7,312,161,991,312,428,395,2021,2429,3221,2526,1.3024,1.258,14.637,121,0.000161,0.036053
5,2850,1.3,0.5,313,138,1118,313,444,521,2021,2269,3132,2437,1.6644,1.7114,11.3558,67,0.000165,0.036058
6,2850,1.3,0.6,342,208,1016,342,320,621,2021,2133,2917,2222,1.4614,1.4506,8.961,56,0.000163,0.036199
7,2400,1.7,0.6,324,99,1003,324,207,441,2021,2159,2981,2286,1.5099,1.5126,5.2001,33,0.000168,0.036227
8,2850,1.3,0.5,285,128,1159,285,396,594,2021,2211,3036,2341,1.653,1.6982,11.979,70,0.000166,0.036303
9,2650,1.5,0.5,410,230,866,410,351,380,2021,2079,2892,2197,1.7221,1.7951,10.1646,55,0.000179,0.03647



Percentage difference between min and max RSS value:  14.9

Mean column values of top_hits_frame:


init_pop        2642.500000
init_hps           1.525000
sh_threshold       0.595000
i_fcalves        338.700000
i_yhinds         180.300000
i_ahinds         995.750000
i_mcalves        338.700000
i_ystags         318.550000
i_astags         467.950000
obs_year        2021.000000
spring_pop      2216.350000
tot_pop_bh      3032.750000
tot_pop_ah      2337.750000
hps_bh             1.507385
hps_ah             1.514915
mig%               9.427955
num_migs          60.600000
scaling            0.000167
RSS                0.036236
dtype: float64



Municipality =  Surnadal


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,1900,1.7,0.6,313,200,601,313,169,301,2021,1340,1926,1436,1.6385,2.0785,0.0,0,0.000418,1.095772
1,2100,1.5,0.7,325,147,721,325,150,428,2021,1515,2140,1650,1.4926,1.7655,0.0,0,0.000391,1.097538
2,1850,1.6,0.5,314,112,638,314,84,385,2021,1282,1848,1358,1.7903,2.4087,0.0,0,0.000437,1.09819
3,2050,1.3,0.7,338,69,706,338,322,274,2021,1454,2016,1526,1.3451,1.548,0.0,0,0.000405,1.100382
4,1800,2.0,0.5,270,92,747,270,92,327,2021,1373,1959,1469,1.9138,2.5858,0.0,0,0.000416,1.104775
5,2200,1.4,0.5,385,91,742,385,297,297,2021,1205,1717,1227,1.7353,2.3599,0.0,0,0.000428,1.105546
6,1900,1.9,0.5,313,191,642,313,149,289,2021,1221,1793,1303,1.9411,2.7983,0.0,0,0.00044,1.106126
7,1850,1.4,0.7,259,93,683,259,288,266,2021,1208,1734,1244,1.6376,2.1569,0.0,0,0.000442,1.106934
8,2000,1.3,0.8,290,136,666,290,333,284,2021,1253,1791,1301,1.6216,2.0952,0.0,0,0.000409,1.107827
9,2100,1.3,0.5,283,147,719,283,213,453,2021,1487,2132,1642,1.6867,2.0962,0.0,0,0.000403,1.108737



Percentage difference between min and max RSS value:  1.92

Mean column values of top_hits_frame:


init_pop        1957.500000
init_hps           1.515000
sh_threshold       0.575000
i_fcalves        301.050000
i_yhinds         134.250000
i_ahinds         675.850000
i_mcalves        301.050000
i_ystags         191.250000
i_astags         351.550000
obs_year        2021.000000
spring_pop      1332.600000
tot_pop_bh      1895.550000
tot_pop_ah      1405.550000
hps_bh             1.624530
hps_ah             2.092870
mig%               0.000000
num_migs           0.000000
scaling            0.000418
RSS                1.108568
dtype: float64



Municipality =  Sunndal


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,2050,1.7,0.5,276,226,716,276,99,454,2021,1678,2332,1788,1.6576,1.8342,7.5813,32,0.00017,0.041143
1,2050,1.6,0.5,328,154,703,328,117,418,2021,1732,2414,1870,1.7323,1.9352,5.757,25,0.000169,0.042104
2,2000,1.5,0.6,230,230,693,230,258,357,2021,1752,2417,1873,1.4595,1.5434,4.3334,21,0.000172,0.042309
3,2100,1.5,0.5,357,74,756,357,166,388,2021,1641,2335,1791,1.7168,1.9231,0.0,0,0.000172,0.042565
4,1850,1.8,0.6,249,173,694,249,241,241,2021,1597,2177,1633,1.4748,1.5781,8.6657,40,0.000176,0.042831
5,2050,1.7,0.6,287,232,697,287,240,306,2021,1960,2707,2163,1.4758,1.553,2.8403,15,0.000156,0.042952
6,1850,1.7,0.5,268,90,736,268,107,379,2021,1585,2248,1704,1.6932,1.8976,0.2085,0,0.000175,0.043181
7,1850,1.7,0.7,203,109,799,203,96,438,2021,1816,2502,1958,1.3498,1.3923,0.0,0,0.000159,0.043384
8,1950,1.6,0.6,302,215,612,302,144,372,2021,1690,2371,1827,1.4912,1.59,0.0,0,0.00017,0.043389
9,1900,1.8,0.5,256,115,775,256,118,376,2021,1549,2195,1651,1.66,1.8556,3.3893,13,0.000183,0.043491



Percentage difference between min and max RSS value:  6.85

Mean column values of top_hits_frame:


init_pop        2005.000000
init_hps           1.595000
sh_threshold       0.585000
i_fcalves        282.200000
i_yhinds         144.350000
i_ahinds         734.450000
i_mcalves        282.200000
i_ystags         171.300000
i_astags         388.000000
obs_year        2021.000000
spring_pop      1735.600000
tot_pop_bh      2408.100000
tot_pop_ah      1864.100000
hps_bh             1.534525
hps_ah             1.656110
mig%               3.617670
num_migs          16.950000
scaling            0.000168
RSS                0.043229
dtype: float64



Municipality =  Vestnes


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,2300,1.3,0.8,264,100,900,264,323,446,2021,2307,3170,2552,1.2417,1.2381,0.0,0,0.000206,0.127186
1,2200,1.5,0.5,286,234,742,286,234,416,2021,1697,2398,1780,1.6473,1.8063,0.0,0,0.000249,0.129611
2,2550,1.3,0.7,408,98,882,408,407,346,2021,1961,2714,2096,1.3104,1.3251,0.0,0,0.000222,0.131601
3,2000,1.7,0.5,250,103,840,250,211,344,2021,1566,2268,1650,1.7541,1.9951,0.0,0,0.000271,0.132408
4,2350,1.5,0.7,399,241,688,399,310,310,2021,2188,3053,2435,1.3648,1.3916,0.0,0,0.000218,0.132576
5,2050,1.9,0.5,307,112,827,307,227,267,2021,1872,2670,2052,1.7265,1.9004,0.0,0,0.00024,0.132641
6,2250,1.6,0.5,393,224,675,393,179,382,2021,1809,2571,1953,1.6295,1.768,2.0065,8,0.000246,0.133972
7,1900,1.6,0.7,190,112,823,190,245,339,2021,1916,2686,2068,1.4741,1.5423,0.0,0,0.000239,0.133987
8,2100,1.7,0.5,336,107,791,336,190,338,2021,1787,2580,1962,1.6988,1.8712,0.3827,1,0.000245,0.134147
9,2000,1.6,0.6,230,151,796,230,272,319,2021,1784,2548,1930,1.5922,1.7159,0.0,0,0.000243,0.134265



Percentage difference between min and max RSS value:  6.69

Mean column values of top_hits_frame:


init_pop        2187.500000
init_hps           1.540000
sh_threshold       0.600000
i_fcalves        309.200000
i_yhinds         145.400000
i_ahinds         798.250000
i_mcalves        309.200000
i_ystags         232.100000
i_astags         390.900000
obs_year        2021.000000
spring_pop      1875.900000
tot_pop_bh      2652.450000
tot_pop_ah      2034.450000
hps_bh             1.546390
hps_ah             1.653315
mig%               0.408940
num_migs           1.850000
scaling            0.000239
RSS                0.133579
dtype: float64



Municipality =  Laerdal


Unnamed: 0,init_pop,init_hps,sh_threshold,i_fcalves,i_yhinds,i_ahinds,i_mcalves,i_ystags,i_astags,obs_year,spring_pop,tot_pop_bh,tot_pop_ah,hps_bh,hps_ah,mig%,num_migs,scaling,RSS
0,1500,1.7,0.5,165,103,633,165,225,208,2021,1236,1697,1165,1.6987,1.8299,15.6815,53,0.000123,0.018216
1,1750,1.3,0.6,201,99,662,201,105,480,2021,1371,1891,1359,1.575,1.6332,6.4699,26,0.000116,0.018371
2,1650,1.4,0.6,181,97,653,181,246,289,2021,1290,1728,1196,1.4723,1.4946,11.8196,45,0.000115,0.018376
3,1850,1.4,0.7,249,189,598,249,225,337,2021,1510,2023,1491,1.335,1.3128,10.057,50,0.000108,0.018378
4,1650,1.3,0.5,165,74,671,165,309,264,2021,1475,2066,1534,1.6637,1.7496,9.0927,35,0.000117,0.018435
5,1650,1.7,0.5,198,86,702,198,213,250,2021,1380,1882,1350,1.6595,1.7535,13.4585,50,0.000116,0.018514
6,1550,1.7,0.5,170,114,647,170,205,241,2021,1206,1678,1146,1.6753,1.7959,10.5254,33,0.000124,0.018524
7,1700,1.6,0.5,170,100,736,170,251,272,2021,1273,1772,1240,1.6683,1.7773,12.2701,43,0.000119,0.018552
8,1450,2.0,0.5,188,164,550,188,157,200,2021,1471,2064,1532,1.6962,1.7958,10.7679,42,0.00012,0.018586
9,1500,2.0,0.5,202,146,584,202,160,204,2021,1278,1794,1262,1.6733,1.7818,7.3419,24,0.000118,0.018598



Percentage difference between min and max RSS value:  3.39

Mean column values of top_hits_frame:


init_pop        1597.500000
init_hps           1.625000
sh_threshold       0.530000
i_fcalves        184.250000
i_yhinds         125.300000
i_ahinds         629.050000
i_mcalves        184.250000
i_ystags         192.500000
i_astags         279.650000
obs_year        2021.000000
spring_pop      1324.400000
tot_pop_bh      1832.100000
tot_pop_ah      1300.100000
hps_bh             1.623885
hps_ah             1.710445
mig%              10.433030
num_migs          38.150000
scaling            0.000119
RSS                0.018604
dtype: float64



CPU times: user 3h 49min 9s, sys: 13min 42s, total: 4h 2min 51s
Wall time: 4h 30s
