In [None]:
import random

import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import patches
from scipy import stats

from utils import get_dataframe

In [None]:
def _scatter(ax, sample, color, label):
    sample_tourists = [ t for (t,e) in sample ]
    sample_excursionists = [ e for (t,e) in sample ]
    ax.scatter(sample_tourists, sample_excursionists, color=color, edgecolors='black', label=label)

def show(sample, circles = None, label='presences'):
    plt.figure()
    ax = plt.gca()
    _scatter(ax, sample, 'gainsboro', label)
    if circles is not None:
        for c in circles:
            ax.add_patch(patches.Ellipse((c[0], c[2]), 2*c[1], 2*c[3], edgecolor='red', fill=False))
    ax.set_xlim(left=0, right=10000)
    ax.set_ylim(bottom=0, top=10000)
    plt.xlabel("Tourists")
    plt.ylabel("Excursionist")
    plt.legend()
    plt.show()

def compare(sample1, sample2, labels=('first series','second series')):
    plt.figure()
    ax = plt.gca()
    _scatter(ax, sample1, 'red', labels[0])
    _scatter(ax, sample2, 'yellow', labels[1])
    ax.set_xlim(left=0, right=10000)
    ax.set_ylim(bottom=0, top=10000)
    plt.xlabel("Tourists")
    plt.ylabel("Excursionist")
    plt.legend()
    plt.show()

In [None]:
season_stats = get_dataframe('season_stats_ot', local=True)
print(season_stats)

In [None]:
def sample_presence(size=20):
    seasons = random.choices(season_stats.index.to_list(), k=size,
                             weights=[season_stats.loc[s,'freq_rel']
                                      for s in season_stats.index.to_list()])
    samples = []

    for s in seasons:
        t_distr = { "mean" : season_stats.loc[s,'mean_tourists'],
                    "std" : season_stats.loc[s,'std_tourists']}
        e_distr = { "mean" : season_stats.loc[s,'mean_excursionists'],
                    "std" : season_stats.loc[s,'std_excursionists']}
        t = stats.truncnorm.rvs(-t_distr["mean"] / t_distr["std"], 10,
                                loc=t_distr["mean"], scale=t_distr["std"])
        e = stats.truncnorm.rvs(-e_distr["mean"] / e_distr["std"], 10,
                                loc=e_distr["mean"], scale=e_distr["std"])
        samples += [(t,e)]

    return samples

show(sample_presence(100), [(s['mean_tourists'], s['std_tourists'],
                             s['mean_excursionists'], s['std_excursionists'])
                            for _,s in season_stats.iterrows()])