In [None]:
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from scipy.ndimage import gaussian_filter

def heat_map(dataG, transformation='minmax',col_ini=0,col_end=None,gaussian=True):
    plt.clf()

    sns.set_theme(style="whitegrid")
    sns.set_theme(rc={'figure.figsize':(75,50)})

    if col_end is not None:
        dataft1 = dataG.iloc[:,col_ini:col_end]
    else:
        dataft1 = dataG.iloc[:,col_ini:]
        
    dataft1 = dataft1.reindex(dataft1.mean(axis=1).sort_values().index, axis=0)

    if transformation == 'standardization':
        dataft1 = pd.DataFrame(preprocessing.StandardScaler().fit_transform(dataft1.T).T, columns=dataft1.columns, index=dataft1.index)
    else:
        if transformation == 'minmax':
            dataft1 = pd.DataFrame(preprocessing.MinMaxScaler().fit_transform(dataft1.T).T, index=dataft1.index, columns=dataft1.columns)
        else:
            if transformation == 'submean':
                dataft1 = dataft1.sub(dataft1.median(1), axis=0)#.div(dataft1.std(1), axis=0)
            else:
                if transformation == 'divsum':
                    dataft1 = dataft1.div(dataft1.sum(axis=1), axis=0)
                else:
                    dataft1 = dataft1.sub(dataft1[0], axis=0).div(dataft1[0], axis=0)
    if gaussian:
        dataft1gg = gaussian_filter(dataft1, sigma=0.9)
    else:
        dataft1gg = dataft1

    g = sns.heatmap(dataft1gg,linewidths=0.25, linecolor='black',cbar=False,square=True,cmap=sns.color_palette("rocket_r"))#xticklabels=False,center=0.5) 
    # center=0,

    xticks = [x if  i % 15 == 0 else '' for i,x in enumerate(dataft1.columns.tolist())] # dejo uno cada x

    print(dataft1.index)

    _ = g.set_yticklabels(dataft1.index, rotation=0,fontsize = 20)
    _ = g.set_xticklabels(xticks,rotation=0,fontsize = 20)
    
    return g


In [None]:
import scipy
import numpy as np

def plot_heatmap(dataftD,col_ini,col_end):
    dataftDT = dataftD.fillna(0).T # transpose para poder sacar más fácil los datos para el gradiente

    listi = []
    length = 7

    for r in dataftDT.columns:
        listi.append(scipy.ndimage.filters.convolve1d(dataftDT[r],weights=[1/length]*length))

    dataG = pd.DataFrame(listi,columns=dataftD.columns)
    dataG = dataG.set_index(dataftDT.columns)


    dataG = dataG[list(dataftD.columns)]
    dataG = dataG.apply(np.log10)

    dataG = dataG.replace([np.inf, -np.inf], np.nan)
    dataG = dataG.apply(lambda col: col.fillna(col.mean()), axis=0)

    gg = heat_map(dataG, transformation='minmax',col_ini=col_ini,col_end=col_end,gaussian=True)

    return gg

In [None]:
dir_path = './'

In [None]:
dict_categories = {}
dict_categories['all'] = None

dict_categories['anxiety'] = ['sadness','nervousness','fear','suffering','horror','disappointment','health','confusion','shame','anger']
dict_categories['depression'] = ['sadness','suffering','shame','neglect','emotional','disgust','torment','nervousness','disappointment','pain']
dict_categories['stress'] = ['sadness','nervousness','anger','suffering','fear','shame','torment','neglect','disgust','health']

dict_categories['crisis'] = list(set(['fear','sadness','nervousness','horror','neglect','aggression','anticipation','disappointment','communication','trust','sadness','suffering','nervousness','shame','neglect','disgust','fear','anger','health','disappointment','sadness','suffering','disgust','anger','nervousness','irritability','disappointment','fear','neglect','rage','love','anticipation']))
dict_categories['crisis-preparedness'] = ['fear','sadness','nervousness','horror','neglect','aggression','anticipation','disappointment','communication','trust']
dict_categories['crisis-response'] = ['sadness','suffering','nervousness','fear','shame','neglect','disgust','anger','health','disappointment']
dict_categories['crisis-recovery'] = ['sadness','suffering','disgust','anger','nervousness','irritability','disappointment','fear','neglect','rage']
dict_categories['crisis-mitigation'] = ['love','anticipation']


In [None]:
datapath = f'{dir_path}10-Tweets_em.csv'
dataftD = pd.read_csv(datapath,index_col=0)

cc = list(dataftD.columns)
confs = {'march-october':(cc.index('2020-03-01'),cc.index('2020-10-31')),
         'march-june':(cc.index('2020-03-01'),cc.index('2020-06-30')),
         'march':(cc.index('2020-03-01'),cc.index('2020-03-31')),
         'april':(cc.index('2020-04-01'),cc.index('2020-04-30')),
         'may':(cc.index('2020-05-01'),cc.index('2020-05-31')),
         'june':(cc.index('2020-06-01'),cc.index('2020-06-30')),
         'july':(cc.index('2020-07-01'),cc.index('2020-07-31')),
         'august':(cc.index('2020-08-01'),cc.index('2020-08-31')),
         'september':(cc.index('2020-09-01'),cc.index('2020-09-30')),
         'october':(cc.index('2020-10-01'),cc.index('2020-10-31')),
         'march-may':(cc.index('2020-03-01'),cc.index('2020-05-31')),
         'june-august':(cc.index('2020-06-01'),cc.index('2020-08-31')),
         'september-october':(cc.index('2020-09-01'),cc.index('2020-10-31'))}


for which in tqdm(dict_categories.keys()):

    if dict_categories[which] is not None:
        df_which = dataftD.T[dict_categories[which]].T

    for date in confs.keys():
        print(which,date)
        gg = plot_heatmap(df_which,confs[date][0],confs[date][1])
        gg.figure.savefig(f'{dir_path}heatmap_{which}_{date}.png',dpi=600,bbox_inches='tight')

In [None]:
datapath = f'{dir_path}10-Tweets_emotions.csv'
dataftD = pd.read_csv(datapath,index_col=0)

cc = list(dataftD.columns)
confs = {'march-october':(cc.index('2020-03-01'),cc.index('2020-10-31')),
         'march-june':(cc.index('2020-03-01'),cc.index('2020-06-30')),
         'march':(cc.index('2020-03-01'),cc.index('2020-03-31')),
         'april':(cc.index('2020-04-01'),cc.index('2020-04-30')),
         'may':(cc.index('2020-05-01'),cc.index('2020-05-31')),
         'june':(cc.index('2020-06-01'),cc.index('2020-06-30')),
         'july':(cc.index('2020-07-01'),cc.index('2020-07-31')),
         'august':(cc.index('2020-08-01'),cc.index('2020-08-31')),
         'september':(cc.index('2020-09-01'),cc.index('2020-09-30')),
         'october':(cc.index('2020-10-01'),cc.index('2020-10-31')),
         'march-may':(cc.index('2020-03-01'),cc.index('2020-05-31')),
         'june-august':(cc.index('2020-06-01'),cc.index('2020-08-31')),
         'september-october':(cc.index('2020-09-01'),cc.index('2020-10-31'))}

for date in confs.keys():
    print(which,date)
    gg = plot_heatmap(dataftD,confs[date][0],confs[date][1])
    gg.figure.savefig(f'{dir_path}heatmap_emotions_{date}.png',dpi=600,bbox_inches='tight')

#### Boxplot creation

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def get_long_dataset(dataftD):
    ll = []
    for row in dataftD.iterrows():
        
        for j in range(0,len(dataftD.columns)):
            dd = {}
            dd['category'] = row[0]
            dd['day'] = dataftD.columns[j]
            dd['month'] = '-'.join(dataftD.columns[j].split('-')[0:-1])
            dd['score'] = row[1][dataftD.columns[j]]
            ll.append(dd)

    df_long = pd.DataFrame(ll)
    return df_long

In [None]:
datapath = f'{dir_path}11-Tweets_emotions.csv'
dataftD = pd.read_csv(datapath,index_col=0)
dd = get_long_dataset(dataftD)

plt.figure(figsize=(10, 3))
plt.clf()
ax = sns.boxplot(data=dd, x='month', y='score', 
                 color='white',width=0.3,dodge=True, whis=(1,99.8),flierprops={"marker": ".",'markersize':1.5}) #whis=(0,99)

sns.stripplot(data=dd, x='month', y='score', hue='category', alpha=0.7, jitter=True, size=1)

_ = plt.legend(title='', loc='lower center', bbox_to_anchor=(0.5, -0.3), ncol=7, 
           fontsize=8, frameon=False, markerscale=5)

plt.xlabel('')
plt.ylabel('Emotions prevalence')

plt.savefig(f'{dir_path}boxplot_emotions_prevalence.png',dpi=600,bbox_inches='tight', transparent=False,pad_inches=0)

In [None]:
datapath = f'{dir_path}10-Tweets_em.csv'
dataftD = pd.read_csv(datapath,index_col=0)
dd = get_long_dataset(dataftD)
dd = dd[dd['month'] > '2020-02']

for which in tqdm(dict_categories.keys()):

    if which == 'all':
        continue

    df_which = dd[dd['category'].isin(dict_categories[which])]
    
    plt.figure(figsize=(10, 3))
    plt.clf()
    ax = sns.boxplot(data=df_which, x='month', y='score', 
                    color='white',width=0.3,dodge=True, whis=(1,99.8),flierprops={"marker": ".",'markersize':1.5}) #whis=(0,99)

    sns.stripplot(data=df_which, x='month', y='score', hue='category', alpha=0.7, jitter=True, size=1)

    _ = plt.legend(title='', loc='lower center', bbox_to_anchor=(0.5, -0.3), ncol=7, 
            fontsize=8, frameon=False, markerscale=5)

    plt.xlabel('')
    plt.ylabel(f'{which} prevalence')

    plt.savefig(f'{dir_path}boxplot_{which}_prevalence.png',dpi=600,bbox_inches='tight', transparent=False,pad_inches=0)