# UBI Intervention

Here I propose 5 IH for every person who qualifies for TEC tokens. This is a participation reward to incentivise the long tail members as well as to dampen the variation in the random noise contained in the impact hour distribution.

Additionally, I propose a gaussian UBI centered at the geometric mean index of the top 50% of hatchers. 

In [9]:
import pandas as pd
import param as pm
import panel as pn
import holoviews as hv
import scipy.stats as ss
from scipy.stats.mstats import gmean
import numpy as np
import hvplot.pandas
import warnings
warnings.filterwarnings('ignore')

In [10]:
data = pd.read_csv('outputs/praise_data.csv')
data = data.groupby('To')[['IH per Praise']].sum().sort_values('IH per Praise', ascending=False).reset_index().rename({'IH per Praise': 'Impact Hours'}, axis=1)

In [15]:
class DistributionInterventions(pm.Parameterized):
    top_percent_hatchers = pm.Number(0.5, bounds=(0,1), step=0.01)
    ubi = pm.Number(5, bounds=(0, 100), step=1)
    pareto_beta = pm.Number(0.4, bounds=(0,1), step=0.01, precedence=-1)
    apply_constant_ubi = pm.Boolean(True)
    
    def __init__(self, data, **params):
        super(DistributionInterventions, self).__init__(**params)
        self.original_data = data
        self.data = data.copy()
        self.add_ubi()
        
        
    @pm.depends('ubi', 'apply_constant_ubi', watch=True)    
    def add_ubi(self):
        if self.apply_constant_ubi:
            self.data['Impact Hours'] = self.original_data['Impact Hours'] + self.ubi
        else:
            self.data['Impact Hours'] = self.original_data['Impact Hours']

        
    def filtered_data(self):
        data = self.data.iloc[:round(len(self.data)*self.top_percent_hatchers)]
        data['% of distribution'] = data['Impact Hours'] / data['Impact Hours'].sum()
        return data
        
    def total_impact_hours(self):
        return pn.Column(
            pn.Row(pn.Column(
                "Filtered Impact Hours:", 
                round(self.filtered_data()['Impact Hours'].sum(), 2),
                ), pn.Column(
                "Percent of total Impact Hours:", 
                round(self.filtered_data()['Impact Hours'].sum() / self.data['Impact Hours'].sum(), 2),
                )),
            "Summary:",
            self.filtered_data()['Impact Hours'].describe(),
        )

    def percent_line(self):
        return hv.VLine(len(self.data)*self.top_percent_hatchers, color='red').opts(hv.opts.VLine(color='red'))

    def distribution(self):
        return (self.augmented_data().hvplot.area(y='Impact Hours', title='Impact Hours Distribution') * self.data.hvplot.line(y='Impact Hours', title='Impact Hours Distribution') * self.percent_line()).opts(shared_axes=False)

    def cum_dist(self, val): #cumulative distribution function
        prob_lt_val = (self.augmented_data()['Impact Hours'] < val).mean() # you can get proportions by taking average of boolean values
        return prob_lt_val
    
    def filtered_pareto(self):
        pct_values = np.arange(self.filtered_data()['Impact Hours'].min(),self.augmented_data()['Impact Hours'].max())
        cum_dist_values = [self.cum_dist(p) for p in pct_values]

        pareto_rv = ss.pareto(self.pareto_beta)
        pareto = [pareto_rv.cdf(p) for p in range(len(pct_values))]

        distributions = pd.DataFrame(zip(cum_dist_values, pareto), columns=['IH Cumulative Distribution', f'Pareto Distribution beta={self.pareto_beta}'])
        
        return distributions.hvplot.line().opts(hv.opts.VLine(color='red')).opts(shared_axes=False)
    
    def augmented_data(self):
        return self.filtered_data()
    
    def resources_percentage(self, p):
        data = self.augmented_data()
        relevant_percentile = np.percentile(data['Impact Hours'],p)
        is_gt_relevant_percentile = data['Impact Hours'] > relevant_percentile
        filtered_data = data[is_gt_relevant_percentile]
        filtered_hours = filtered_data['Impact Hours']
        pct_hours = filtered_hours.sum()/data['Impact Hours'].sum()
        return pct_hours

    def view_resources_percentage(self):
        message = ""
        for p in [50,80,90,95,99]:
            message += "The top {} percent of the population\n".format(100-p)
            message += "received {:0.2f} percent of the Impact Hours \n \n".format(self.resources_percentage(p))
            
        return pn.Pane(message)
    
    def gini_coefficient(self):
        x = self.augmented_data()['Impact Hours'].values
        n = len(x)
        x_bar = np.mean(x)
        abs_diffs = np.array([np.sum(np.abs(x[i] - x)) for i in range(n)])
        sum_abs_diffs = np.sum(abs_diffs)
        denominator = 2*n*n*x_bar
        return sum_abs_diffs/denominator
    
    def view_data(self):
        return self.augmented_data().head(10)
    
    

class GaussianIntervention(DistributionInterventions):
    gubi_height = pm.Number(0.02, bounds=(0,0.05), step=0.01, doc="Standard Deviation")
    apply_constant_ubi = pm.Boolean(True)
    apply_gaussian_ubi = pm.Boolean(True)
    ubi = pm.Number(15, bounds=(0, 100), step=1)
    gubi = pm.Number(35, bounds=(0, 100), step=1)
    
    def gaussian_function(self, x):
        mean = len(self.data[self.data['Impact Hours'] > gmean(self.filtered_data()['Impact Hours'])])
        return self.gubi * np.exp(-((x - mean)**2) / 2*self.gubi_height**2)
    
    def intervention(self):
        xs = np.linspace(0, len(self.filtered_data()), len(self.filtered_data()))
        ys = self.gaussian_function(xs)
        return pd.DataFrame(zip(xs,ys), columns=['x','y'])
    
    def view_intervention(self):
        intervention = self.intervention()
        return gaus.augmented_data().hvplot.area(title='Impact Hours Distribution', y='% of distribution') * gaus.augmented_data().hvplot.line(line_width=10, title='Impact Hours Distribution', y='% of distribution')# * intervention.hvplot.line(x='x',y='y', title='Gaussian Intervention')
    
    def augmented_data(self):
        data = self.filtered_data()
        if self.apply_gaussian_ubi:
            data = pd.DataFrame(data['Impact Hours'] + self.intervention()['y'], columns=['Impact Hours'])
        else:
            data = pd.DataFrame(data['Impact Hours'], columns=['Impact Hours'])
        data['% of distribution'] = data['Impact Hours'] / data['Impact Hours'].sum()
        return data
    
    def ubi_info(self):
        if self.apply_gaussian_ubi:
            gubi = round(self.intervention()['y'].sum(),0)
        else:
            gubi = 0
            
        if self.apply_constant_ubi:
            ubi = round(len(self.filtered_data())*self.ubi,0)
        else:
            ubi = 0
        return pd.DataFrame({
            'Gaussian UBI Hours': gubi,
            'Constant UBI Hours': ubi,
            'Total UBI Hours': ubi+gubi,
        },index=['value'])
        

In [16]:
gaus = GaussianIntervention(data)

pn.Column(
    pn.Row(gaus, gaus.view_intervention), 
    pn.Row(gaus.view_data, pn.Column("GINI Coefficient of filtered data", gaus.gini_coefficient), gaus.ubi_info),
)

In [17]:
df = gaus.data.copy()


df.loc[:len(gaus.augmented_data()), 'Augmented Impact Hours'] = gaus.augmented_data()['Impact Hours']
df.loc[len(gaus.augmented_data()):, 'Augmented Impact Hours'] = df.loc[len(gaus.augmented_data()):, 'Impact Hours']


df['Impact Hours'] = gaus.original_data['Impact Hours']
df['% of distribution'] = df['Impact Hours'] / df['Impact Hours'].sum()
df['Augmented % of distribution'] = df['Augmented Impact Hours'] / df['Augmented Impact Hours'].sum()

df

Unnamed: 0,To,Impact Hours,Augmented Impact Hours,% of distribution,Augmented % of distribution
0,zeptimusQ,677.772308,701.893022,0.058546,0.032011
1,Santigs67,600.889988,625.314411,0.051905,0.028519
2,ygganderson,421.106805,445.841119,0.036375,0.020334
3,cranders71,413.032911,438.083247,0.035678,0.019980
4,sembrestels,371.431392,396.803818,0.032084,0.018097
...,...,...,...,...,...
410,banteg,0.000000,15.000000,0.000000,0.000684
411,jujusca,0.000000,15.000000,0.000000,0.000684
412,coopahtroopanew,0.000000,15.000000,0.000000,0.000684
413,guillaumepalayer,0.000000,15.000000,0.000000,0.000684


In [18]:
df['Augmentation'] = df['Augmented Impact Hours'] - df['Impact Hours']

In [19]:
df.head(50)

Unnamed: 0,To,Impact Hours,Augmented Impact Hours,% of distribution,Augmented % of distribution,Augmentation
0,zeptimusQ,677.772308,701.893022,0.058546,0.032011,24.120714
1,Santigs67,600.889988,625.314411,0.051905,0.028519,24.424424
2,ygganderson,421.106805,445.841119,0.036375,0.020334,24.734315
3,cranders71,413.032911,438.083247,0.035678,0.01998,25.050335
4,sembrestels,371.431392,396.803818,0.032084,0.018097,25.372425
5,solsista,304.235494,329.936009,0.02628,0.015047,25.700515
6,akrtws,288.212747,314.247273,0.024896,0.014332,26.034526
7,iviangita,261.809717,288.184084,0.022615,0.013143,26.374367
8,OOPS FIX ME,252.406538,279.126478,0.021803,0.01273,26.719941
9,mateodaza,234.681516,261.752653,0.020272,0.011938,27.071137


In [20]:
df.to_csv("outputs/YGG-UBI-Augmentation.csv",index=False)