In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import analysis_functions as af

In [None]:
filename="meritocracy_simulation.csv"
#filename="meritocracy_simulation_big.csv"
df=pd.read_csv(filename)

In [None]:
df

# Additional Columns

In [None]:
af.split_dataset(df,40)

In [None]:
af.make_filters_from_nominal_variable(df,"personality")
df["not_normal_filter"]=np.invert(df["normal_filter"])

In [None]:
df["time_in_company"]=df["promotion_time"]-df["entry_time"]

In [None]:
af.create_time_since_last_win_col(df)

In [None]:
af.create_single_point_per_group_filter(df)

In [None]:
af.group_shares_to_unique_string(df) # "n-o-c"

In [None]:
af.make_filters_from_nominal_variable(df,"group_composition_n-o-c")

In [None]:
all_grouptypes,initial_prob=af.initial_probabilities([0.5,0.25,0.25],group_size=5)
print(all_grouptypes)
len(all_grouptypes)

In [None]:
# in order of ascending probability
grouptype_ordering_index=np.argsort(initial_prob)
initial_prob=initial_prob[grouptype_ordering_index]
all_grouptypes=np.array(all_grouptypes)[grouptype_ordering_index]

In [None]:
# not important, just to notice that not all grouptypes occur in the dataset
unique_grouptypes=np.unique(df["group_composition_n-o-c"])
len(unique_grouptypes)

In [None]:
group_comp_hist=af.group_composition_occurence_frequency(df,all_grouptypes,filter_bool=df["time_step"]==0)
group_comp_hist=np.array(group_comp_hist)/np.sum(group_comp_hist)

plt.bar(np.arange(len(initial_prob))+0.2,group_comp_hist,width=0.4,label="actually occuring distribution")
plt.bar(np.arange(len(initial_prob))-0.2,initial_prob,width=.4,label="probability due to population distr.")

plt.xticks(np.arange(len(initial_prob)),all_grouptypes,rotation=90)
plt.xlabel("goup composition n-o-c")
plt.ylabel("probability")
plt.legend()
plt.show()

In [None]:
"""
group_comp_hist=af.group_composition_occurence_frequency(df,all_grouptypes,filter_bool=df["time_step"]==0)
group_comp_hist=np.array(group_comp_hist)/np.sum(group_comp_hist)

plt.bar(np.arange(len(initial_prob))+0.2,group_comp_hist,width=0.4,label="actually occuring distribution")
plt.bar(np.arange(len(initial_prob))-0.2,initial_prob,width=.4,label="probability due to population distr.")

plt.xticks(np.arange(len(initial_prob)),all_grouptypes,rotation=90)
plt.xlabel("goup composition n-o-c")
plt.ylabel("probability")
plt.legend()
plt.title("big dataset")
plt.show()
"""

In [None]:
group_comp_hist=af.group_composition_occurence_frequency(df,all_grouptypes,filter_bool=df["static"])
group_comp_hist=np.array(group_comp_hist)/np.sum(group_comp_hist)

plt.bar(np.arange(len(initial_prob))+0.2,group_comp_hist,width=0.4,label="actually occuring distribution")
plt.bar(np.arange(len(initial_prob))-0.2,initial_prob,width=.4,label="probability due to population distr.")

plt.xticks(np.arange(len(initial_prob)),all_grouptypes,rotation=90)
plt.xlabel("goup composition n-o-c")
plt.ylabel("probability")
plt.legend()
plt.show()

In [None]:
# indiv_groups: cover the group_composition transitions

unique_indices,group_codes=af.group_composition_code_to_index(df["group_composition_n-o-c"],all_grouptypes)

indiv_groups=[]
for i in range(200):
    select_group=df["group_id"]==i
    indiv_groups.append(unique_indices[df["single_point_per_group_filter"]*select_group])

In [None]:
np.shape(indiv_groups)

In [None]:
af.intra_group_stats(df)

In [None]:
af.group_score_change_after_rehiring(df)

In [None]:
af.performance_change_after_rehiring(df)

In [None]:
af.create_group_score_increased_by_learning(df)

In [None]:
af.create_performance_increased_by_learning(df)

In [None]:
af.newest_group_member_and_last_promoted_personality(df)

In [None]:
df.keys()

In [None]:
for i in df:
    if df[i].dtype==np.double:
        df[i]=np.round(df[i],4)

In [None]:
# save as csv
df.to_csv("meritocracy_detailed.csv",index=False,na_rep="NA")             

In [None]:
# save as h5
title = 'dataset'
with pd.HDFStore('meritocracy_detailed.h5', complevel=9, complib='zlib') as store:
    store[title] = df

In [None]:
# load h5-file
title = 'dataset'
with pd.HDFStore('test.h5', complevel=9, complib='zlib') as store:
    data_retrieved = store[title]
data_retrieved

# Plots

In [None]:
fig,ax=af.plot_descr_and_distr(df,"group_score",aspect=1.5)

In [None]:
fig,ax=af.plot_descr_and_distr(df,"performance",aspect=1.5)

In [None]:
fig,ax=af.plot_descr_and_distr(df,"performance",filter_bool=df["overachiever_filter"],aspect=1.5)
fig.suptitle("overachiever")
plt.show()

In [None]:
fig,ax=af.plot_descr_and_distr(df,"performance",filter_bool=df["charismatic_idiot_filter"],aspect=1.5)
fig.suptitle("charismatic idiot")
plt.show()

In [None]:
fig,ax=af.plot_descr_and_distr(df,"performance",filter_bool=df["normal_filter"],aspect=1.5)
fig.suptitle("normal")
plt.show()

In [None]:
fig,ax=af.plot_descr_and_distr(df,"group_share_normal",res=6,aspect=12)

In [None]:
fig,ax=af.plot_descr_and_distr(df,"group_share_overachiever",res=6,aspect=12)

In [None]:
fig,ax=af.plot_descr_and_distr(df,"group_share_charismatic_idiot",res=6,aspect=12)

In [None]:
fig,ax=af.plot_descr_and_distr(df,"personality_effect",filter_bool=df["not_normal_filter"],aspect=2)
fig.suptitle("not normal")
plt.show()

In [None]:
fig,ax=af.plot_descr_and_distr(df,"personality_effect",res=20,aspect=2,filter_bool=df["overachiever_filter"])
fig.suptitle("overachiever")
plt.show()

In [None]:
fig,ax=af.plot_descr_and_distr(df,"personality_effect",res=20,aspect=2,filter_bool=df["charismatic_idiot_filter"])
fig.suptitle("charismatic idiot")
plt.show()

In [None]:
plt.scatter(df["time_step"],df["performance"],alpha=0.01)

In [None]:
fig,ax=af.color_scatterplot(df,"time_step","performance","group_share_normal","gnuplot",alpha=0.3,figsize=[12,7])#group_share_charismatic_idiot",alpha=0.4)

In [None]:
fig,ax=af.color_scatterplot(df,"time_step","performance","group_share_overachiever","gnuplot",alpha=0.3,clims=[0,1],figsize=[12,7])#group_share_charismatic_idiot",alpha=0.4)

In [None]:
fig,ax=af.color_scatterplot(df,"time_step","performance","group_share_charismatic_idiot","gnuplot",alpha=0.3,figsize=[12,7])

In [None]:
fig,ax=af.color_scatterplot(df,"time_step","group_score","group_share_normal","gist_rainbow",alpha=0.1,figsize=[12,7])

In [None]:
# note to each group correspond 5 performance values at each time step

In [None]:
fig,ax=af.color_scatterplot(df,"group_score","performance","time_step","gnuplot",alpha=0.3,figsize=[12,7])

In [None]:
fig,ax=af.color_scatterplot(df,"group_score","performance","time_step","gnuplot",filter_bool=df["group_winning"],alpha=0.3,figsize=[12,7])
fig.suptitle("winning groups")
plt.show()

In [None]:
fig,ax=af.color_scatterplot(df,"group_score","performance","time_step","gnuplot",filter_bool=df["promotion_time"],alpha=0.3,figsize=[12,7])
fig.suptitle("promoted trainees (from winning groups)")
plt.show()

In [None]:
fig,ax=af.color_scatterplot(df,"group_score","performance","personality_effect","gnuplot",filter_bool=df["promotion_time"],alpha=0.3,figsize=[12,7])
fig.suptitle("promoted trainees (from winning groups)")
plt.show()

In [None]:
fig,ax=af.color_scatterplot(df,"promotion_time","performance","personality_effect","gnuplot",alpha=0.3,figsize=[12,7])
fig.suptitle("performance at time of promotion (from winning groups)")
plt.show()

In [None]:
fig,ax=af.color_scatterplot(df,"group_score","promotion_time","personality_effect","gnuplot",alpha=0.3,figsize=[12,7])
fig.suptitle("promoted trainees (from winning groups)")
plt.show()

In [None]:


fig,ax=af.color_scatterplot(df,"performance","time_in_company","personality_effect","gnuplot",alpha=0.3,figsize=[12,7])
fig.suptitle("promoted trainees (from winning groups)")
plt.show()

In [None]:
grouptype=df["group_composition_n-o-c"]==unique_grouptypes[-1]
order_stats,normal_stats=af.descriptive_stats(df,"performance",filter_bool=grouptype)

In [None]:
plt.plot(normal_stats[:,0])

In [None]:
result,cbins=af.distribution_at_timesteps(df,"performance",filter_bool=grouptype)
plt.imshow(result.T[::-1,:])

In [None]:
N=len(all_grouptypes)
for i in range(200):
    testx,testy,margin_x,margin_y=af.datachaosrepr_linear(0.02,N,indiv_groups[i])
    plt.plot(testx,testy,alpha=0.1,c='b')
plt.scatter(margin_x,margin_y,c='r')
text_shift=1.15
yoffset=-0.05
for i in range(len(margin_x)):
    plt.text(margin_x[i]*text_shift,margin_y[i]*text_shift+yoffset,group_codes[i],ha="center")
size=1.3
plt.xlim([-size,size])
plt.ylim([-size,size])
plt.axis("off")
plt.show()

In [None]:
for i in range(200):
    testx,testy,margin_x,margin_y=af.datachaosrepr(0.5,N,indiv_groups[i])
    plt.plot(testx,testy,alpha=0.02,c='b')
plt.scatter(margin_x,margin_y,c='r')
text_shift=1.15
yoffset=-0.05
for i in range(len(margin_x)):
    plt.text(margin_x[i]*text_shift,margin_y[i]*text_shift+yoffset,group_codes[i],ha="center")
size=1.3
plt.xlim([-size,size])
plt.ylim([-size,size])
plt.axis("off")
plt.show()

In [None]:
plt.figure(figsize=[8,8])
for i in range(200):
    rep_filter=af.filter_consecutive_repetitions(indiv_groups[i])
    testx,testy,margin_x,margin_y=af.datachaosrepr_linear(.125,N,indiv_groups[i][rep_filter])
    plt.plot(testx,testy,'-',alpha=.05,c='b')
plt.scatter(margin_x,margin_y,c='r')
text_shift=1.15
yoffset=-0.05
for i in range(len(margin_x)):
    plt.text(margin_x[i]*text_shift,margin_y[i]*text_shift+yoffset,group_codes[i],ha="center")
size=1.3
plt.xlim([-size,size])
plt.ylim([-size,size])
plt.axis("off")
plt.show()

In [None]:
for i in range(200):
    rep_filter=af.filter_consecutive_repetitions(indiv_groups[i])
    testx,testy,margin_x,margin_y=af.datachaosrepr(1,N,indiv_groups[i][rep_filter])
    plt.plot(testx,testy,alpha=0.01,c='b')
plt.scatter(margin_x,margin_y,c='r')
text_shift=1.15
yoffset=-0.05
for i in range(len(margin_x)):
    plt.text(margin_x[i]*text_shift,margin_y[i]*text_shift+yoffset,group_codes[i],ha="center")
size=1.3
plt.xlim([-size,size])
plt.ylim([-size,size])
plt.axis("off")
plt.show()