# SETUP

In [None]:
# mount drive folder
from google.colab import drive
drive.mount('/content/drive')
%cd drive/MyDrive/TESI

In [None]:
%%capture
!pip install -U plotly
!pip install -U scipy
!pip install git+https://github.com/RaffaeleMorganti/gsdmm.git

In [None]:
import pandas as pd, numpy as np, numba as nb
from scipy import stats
from wordcloud import WordCloud
import pickle, warnings
import matplotlib.pyplot as plt
import plotly.express as px, plotly.graph_objects as go
from plotly.subplots import make_subplots
from gsdmm import GSDMM
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
%cd testi/parquet
esp = pd.read_parquet("ESPERTI_PS.pqt")
ist = pd.read_parquet("ISTITUZIONI_PS.pqt")
reg = pd.read_parquet("REGIONI_PS.pqt")
new = pd.read_parquet("NEWS_PS.pqt")
twi = twi = pd.concat((pd.read_parquet("TWEET_P.pqt"),pd.read_parquet("TWEET_S.pqt")),1)
%cd ../..

In [None]:
esp["dataset"] = "Esperti"
ist["dataset"] = "Istituzioni"
reg["dataset"] = "Regioni"
twi["dataset"] = "Tweet"
new["dataset"] = "Notizie"
new.rename(columns={"date":"datetime"},inplace=True)

In [None]:
sub = lambda x,r=None,c=["dataset","datetime","preprocess","sentiment","emotion"]: x.loc[:,c] if r is None else x.loc[r,c]
full = pd.concat((sub(twi),sub(reg),sub(new),sub(esp),sub(ist)),ignore_index=True)
full["weight"] = 1e3/full.groupby("dataset").transform("count").preprocess

In [None]:
#@title Parole di rilievo

def wordRelevance(words,scores,**kwargs):
  def betaSkew(x):
    beta = None
    try:
      beta = stats.beta.fit(x,floc=0,fscale=1,method="mm")
    except:
      try:
        beta = stats.beta.fit(x,floc=0,fscale=1,method="mle")
      except:
        print("Impossible fit beta on data:")
        print(x)
        return 0
    return stats.beta.stats(*beta,moments="s")

  cv = CountVectorizer(**kwargs)
  mat = cv.fit_transform(words)
  sel = mat.toarray()
  with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=RuntimeWarning)
    base = betaSkew(scores)
    par = [0]*sel.shape[1]
    names = cv.get_feature_names()
    print("Estimating importance of %d words" % len(names))
    for i in range(sel.shape[1]):
      pos = sel[:,i] != 0
      new = betaSkew(scores[pos])
      par[i] = (base - new) * np.log(sum(pos))
  return dict(zip(names, par))

def wordRelPlot(val,title="Relevant words", cloud={"background_color": "white"}, plot={}):
  wc = WordCloud(**cloud)
  fig = plt.figure(**plot)
  plt.imshow(wc.generate_from_frequencies(val), interpolation='bilinear')
  plt.suptitle(title)
  plt.axis('off')
  return fig

In [None]:
#@title SentiPlot
def plotSentiment(data,freq="W"):
  data["senti"] = np.stack(data.sentiment).argmax(1)
  data["senti"] = data.senti.astype("category").cat.rename_categories(("Positivo","Neutrale","Negativo"))
  df = data.groupby([pd.Grouper(key="datetime",freq=freq),"senti"]).size().reset_index().rename(columns={0:"N"})
  df["D"] = df.groupby([pd.Grouper(key="datetime",freq="W")]).transform("sum").N
  df["freq"] = df.N / df.D
  if freq =="W":
    df.datetime -= pd.DateOffset(days=3)
  if freq =="2W":
    df.datetime -= pd.DateOffset(days=7)
  fig = px.area(df, x="datetime", y="freq", color="senti", line_group="senti", line_shape="spline",
                color_discrete_sequence=('#00CC96', '#636EFA', '#EF553B'), range_y=(0,1))
  fig.update_layout(yaxis_title='Frequenza',
            legend_title="Sentiment",
          xaxis_title='',
          xaxis = dict(
            ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
            tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
            tickmode = "array"
          ), width=1100,height=420)
  return fig

def plotSentiments(data,freq="W"):
  df = pd.DataFrame(data.sentiment.tolist()).rename(columns={0:"positivo",1:"neutrale",2:"negativo"})
  data["senti"] = df.positivo - df.negativo
  data = data.groupby(["dataset",pd.Grouper(key="datetime",freq=freq)]).agg(senti=("senti","mean")).reset_index()
  fig = px.line(data, x="datetime", y="senti", line_shape="spline", color="dataset", line_group="dataset", range_y=(-1,1))
  return fig

def plotFreq(df,freq="W"):
  df = df.groupby(["dataset",pd.Grouper(key="datetime",freq=freq)]).size().reset_index().rename(columns={0:"N"})
  df["D"] = df.groupby(["dataset"]).transform("sum").N
  df["freq"] = df.N / df.D * 100
  fig = px.line(df, x="datetime", y="freq",hover_data=["N"], color="dataset", line_group="dataset", line_shape="spline")
  return fig

def plotEmotion(data,freq="W"):
  df = pd.DataFrame(data.emotion.tolist()).rename(columns={0:"Rabbia",1:"Paura",2:"Gioia",3:"Tristezza"})
  df["date"] = list(data.datetime)
  df = df.groupby([pd.Grouper(key="date",freq=freq)])
  df = pd.concat((df.date.count().rename("count"),df.mean()),1).reset_index().fillna(0)
  if freq =="W":
    df.date -= pd.DateOffset(days=3)
  if freq =="2W":
    df.date -= pd.DateOffset(days=7)
  fig = px.area(df, x="date", y=["Rabbia","Paura","Tristezza","Gioia"],hover_data=["count"], line_shape="spline", range_y=(0,1))
  fig.update_layout(yaxis_title='Frequenza',
              legend_title="Emotion",
            xaxis_title='',
            xaxis = dict(
              ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
              tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
              tickmode = "array"
            ), width=1100,height=420)
  
  return fig

In [None]:
with open("MODELS/GSDMM.pkl","rb") as f:
  gsdmm = pickle.load(f)

In [None]:
full["clust"] = gsdmm.predict(full.preprocess)

In [None]:
getEmotionRelevances = lambda df,min_df=0.003: dict(
      rabbia = wordRelevance(df.preprocess,np.stack(df.emotion)[:,0],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b"),
      paura = wordRelevance(df.preprocess,np.stack(df.emotion)[:,1],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b"),
      gioia = wordRelevance(df.preprocess,np.stack(df.emotion)[:,2],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b"),
      tristezza = wordRelevance(df.preprocess,np.stack(df.emotion)[:,3],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
  )

getSentimentRelevances = lambda df,min_df=0.003: dict(
      positivo = wordRelevance(df.preprocess,np.stack(df.sentiment)[:,0],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b"),
      neutrale = wordRelevance(df.preprocess,np.stack(df.sentiment)[:,1],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b"),
      negativo = wordRelevance(df.preprocess,np.stack(df.sentiment)[:,2],min_df=min_df,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
  )

In [None]:
def plotEmotionRelevances(ls,ax=None,plot={},cloud={"background_color": "white"},single=False):
  wc = WordCloud(**cloud)
  if single:
    fig, axs = plt.subplots(2,2, **plot)
  else:
    fig, axs = plt.subplots(len(ls), 4, **plot)
  axs = np.array(axs).reshape(-1)
  [ax.axis("off") for ax in axs]
  axs[0].set_title("Rabbia")
  axs[1].set_title("Paura")
  axs[2].set_title("Gioia")
  axs[3].set_title("Tristezza")
  if ax is not None:
    for i in range(len(ax)):
      axs[i].set_ylabel(ax[i])
  #axs[x].set_ylabel("")
  for i in range(len(ls)):
      axs[i*4].imshow(wc.generate_from_frequencies(ls[i]["rabbia"]), interpolation='bilinear')
      axs[i*4+1].imshow(wc.generate_from_frequencies(ls[i]["paura"]), interpolation='bilinear')
      axs[i*4+2].imshow(wc.generate_from_frequencies(ls[i]["gioia"]), interpolation='bilinear')
      axs[i*4+3].imshow(wc.generate_from_frequencies(ls[i]["tristezza"]), interpolation='bilinear')
  
  fig.tight_layout()
  return fig

def plotSentimentRelevances(ls,plot={},cloud={"background_color": "white"}):
  wc = WordCloud(**cloud)
  fig, axs = plt.subplots(len(ls), 3, **plot)
  axs = np.array(axs).reshape(-1)
  [ax.axis("off") for ax in axs]
  axs[0].set_title("Positivo")
  axs[1].set_title("Neutrale")
  axs[2].set_title("Negativo")
  #axs[x].set_ylabel("")
  for i in range(len(ls)):
      axs[i*3].imshow(wc.generate_from_frequencies(ls[i]["positivo"]), interpolation='bilinear')
      axs[i*3+1].imshow(wc.generate_from_frequencies(ls[i]["neutrale"]), interpolation='bilinear')
      axs[i*3+2].imshow(wc.generate_from_frequencies(ls[i]["negativo"]), interpolation='bilinear')
  
  fig.tight_layout()
  return fig

* Risposta alle misure contenitive
*Opinioni sull'app Immuni
*Fiducia in istituzioni e esperti
*Efficacia della campagna vaccinale

# App immuni

cluster #41 gsdmm

In [None]:
tamponi = pd.read_csv("dataset_accessori/tamponi.csv",sep=";") #nuovi_positivi
tamponi.data = pd.DatetimeIndex(pd.to_datetime(tamponi.data)).normalize()
immuni = pd.read_csv("dataset_accessori/immuni.csv",sep=";")# notifiche_inviate e utenti_positivi
immuni.data = pd.to_datetime(immuni.data)
# 10 mln download al 10/12/2020

In [None]:
  df = pd.merge(tamponi,immuni,how="outer",on="data")
  df = df.groupby([pd.Grouper(key="data",freq="2W")]).sum().reset_index()
  df.data -= pd.DateOffset(days=7)
  #df.utenti_positivi *= 200
  #fig = px.line(df, x="data", y=["nuovi_positivi","utenti_positivi"], line_shape="spline")
  

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(y=df.nuovi_positivi, x=df.data, name="tamponi positivi",line_shape='spline'),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(y=df.utenti_positivi, x=df.data, name="utenti positivi",line_shape='spline'),
    secondary_y=True,
)

# Set y-axes titles
fig.update_yaxes(title_text="Numero tamponi positivi", range=(0,500000), secondary_y=False)
fig.update_yaxes(title_text="Numero utenti positivi", range=(0,2500), secondary_y=True)

fig.update_layout(
        xaxis_range=("2020-03-01","2021-07-01"),
        xaxis = dict(
          ticktext = ["","Apr20","","Giu20","","Ago20","","Ott20","","Dic20","","Feb21","","Apr21","","Giu21",""],
          tickvals = pd.date_range("2020-03-01","2021-07-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

fig.show()

In [None]:
block = full.loc[full.clust==41,]

In [None]:
def plotFreq(dfn,dfd,freq="2W"):
  df = pd.concat([
          dfn.groupby(["dataset",pd.Grouper(key="datetime",freq="W")]).size(),
          dfd.groupby(["dataset",pd.Grouper(key="datetime",freq="W")]).size()],
        1).rename(columns={0:"N",1:"D"}).reset_index().fillna(0)
  df = df.groupby(["dataset",pd.Grouper(key="datetime",freq=freq)]).sum().reset_index()
  df["freq"] = df.N / df.D * 100
  df.datetime -= pd.DateOffset(days=7)
  fig = px.line(df, x="datetime", y="freq", hover_data=["N"], color="dataset", line_group="dataset", line_shape="spline")
  return fig

In [None]:
fig = plotFreq(block,full)
fig.update_layout(yaxis_title='Frequenza',
          legend_title="Dataset",
        xaxis_title='',
        xaxis_range=("2020-03-01","2021-01-01"),
        xaxis = dict(
          ticktext = ["","Apr20","","Giu20","","Ago20","","Ott20","","Dic20",""],
          tickvals = pd.date_range("2020-03-01","2021-01-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

In [None]:
fig = plotSentiment(block.loc[block.dataset=="Tweet",],"2W")
fig.update_layout(yaxis_title='Frequenza',
          legend_title="Sentiment",
        xaxis_title='',
        xaxis_range=("2020-03-01","2021-01-01"),
        xaxis = dict(
          ticktext = ["","Apr20","","Giu20","","Ago20","","Ott20","","Dic20",""],
          tickvals = pd.date_range("2020-03-01","2021-01-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

In [None]:
fig = plotEmotion(block.loc[block.dataset=="Tweet",],"2W")
fig.update_layout(yaxis_title='Frequenza',
          legend_title="Emotion",
        xaxis_title='',
        xaxis_range=("2020-03-01","2021-01-01"),
        xaxis = dict(
          ticktext = ["","Apr20","","Giu20","","Ago20","","Ott20","","Dic20",""],
          tickvals = pd.date_range("2020-03-01","2021-01-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

In [None]:
sset = block.loc[block.dataset=="Tweet",]

In [None]:
def plotRelevances(ls,le,plot={},cloud={"background_color": "white"}):
  names = ["Positivo","Negativo","Paura","Rabbia"]
  wc = WordCloud(**cloud)
  fig, axs = plt.subplots(2, 2, **plot)
  axs = np.array(axs).reshape(-1)
  [ax.axis("off") for ax in axs]
  for i in range(4):
    axs[i].set_title(names[i])
  axs[0].imshow(wc.generate_from_frequencies(ls["positivo"]), interpolation='bilinear')
  axs[1].imshow(wc.generate_from_frequencies(ls["negativo"]), interpolation='bilinear')
  axs[2].imshow(wc.generate_from_frequencies(le["paura"]), interpolation='bilinear')
  axs[3].imshow(wc.generate_from_frequencies(le["rabbia"]), interpolation='bilinear')
  
  fig.tight_layout()
  return fig

In [None]:
imms = getSentimentRelevances(sset,2)
plotSentimentRelevances([imms],plot={'figsize':(14,3)}).show()

In [None]:
imme = getEmotionRelevances(sset,2)
plotEmotionRelevances([imme],plot={'figsize':(9,5)},single=True).show()

# ondate

In [None]:
ricoveri = pd.read_csv("dataset_accessori/tamponi.csv",sep=";") #ricoverati_con_sintomi terapia_intensiva
ricoveri.data = pd.DatetimeIndex(pd.to_datetime(ricoveri.data)).normalize()
df = ricoveri.groupby([pd.Grouper(key="data",freq="W")]).sum().reset_index()
df.data -= pd.DateOffset(days=3)

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Scatter(y=df.ricoverati_con_sintomi, x=df.data, name="altri reparti",line_shape='spline'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(y=df.terapia_intensiva, x=df.data, name="terapia intensiva",line_shape='spline'),
    secondary_y=True,
)

# Set y-axes titles
fig.update_yaxes(title_text="Pazienti ricoverati in altri reparti", range=(0,250000), secondary_y=False)
fig.update_yaxes(title_text="Pazienti in terapia intensiva", range =(0,30000), secondary_y=True,
                 tickvals = np.linspace(0, 30000, num=6), tickmode = "array")

fig.update_layout(
        xaxis_range=("2020-03-01","2021-07-01"),
        xaxis = dict(
          ticktext = ["","Apr20","","Giu20","","Ago20","","Ott20","","Dic20","","Feb21","","Apr21","","Giu21",""],
          tickvals = pd.date_range("2020-03-01","2021-07-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

fig.show()

ti / altro = 12%
- prima ondata: 01/03 08/04 e 09/04 31/08
- seconda ondata 01/09 26/11 e 27/11 20/02
- terza ondata: 21/02 06/04 e 07/04 31/07

In [None]:
# 0 4 6 12 21 22 23 27 31 35 38 44 52 59 63 64 82 88 100 104 121 122 137 139 148 149 153 157 162 164 173 176 191 192 194 197 198
clusters = [0,4,6,12,21,22,23,27,31,35,38,44,52,59,63,64,82,88,100,104,121,122,137,139,148,149,153,157,162,164,173,176,191,192,194,197,198]
tw = full[full.dataset=="Tweet"]
tw["interesse"] = tw.clust.isin(clusters)

In [None]:
words = pd.DataFrame(list(twi.pos))
noun = words.NOUN.str.join(' ')
propn = words.PROPN.str.join(' ')
verb = words.VERB.str.join(' ')
string = noun + ' ' + propn + ' ' + verb
tw["words"] = string.fillna("")

In [None]:
def plotFreq(df,freq="W"):
  df = df.groupby([pd.Grouper(key="datetime",freq=freq),"interesse"]).size().reset_index().rename(columns={0:"N"})
  df["D"] = df.groupby(pd.Grouper(key="datetime",freq=freq)).transform("sum").N
  df.datetime -= pd.DateOffset(days=3)
  df["freq"] = df.N / df.D * 100
  df = df[df.interesse]
  fig = px.line(df, x="datetime", y="freq",hover_data=["N"], line_shape="spline")
  return fig

In [None]:
fig = plotFreq(tw)
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='',
        xaxis_range=("2020-02-01","2021-08-01"),
        xaxis = dict(
          ticktext = ["Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-03-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

In [None]:
rel1 = getEmotionRelevances(tw.loc[tw.interesse&(tw.datetime<"2020-04-09")])
plotEmotionRelevances([rel1])

In [None]:
rel2 = getEmotionRelevances(tw.loc[tw.interesse&(tw.datetime>="2020-04-09")&(tw.datetime<"2020-09-01")])
plotEmotionRelevances([rel2])

In [None]:
rel3 = getEmotionRelevances(tw.loc[tw.interesse&(tw.datetime>="2020-09-01")&(tw.datetime<"2020-11-27")])
plotEmotionRelevances([rel3])

In [None]:
rel4 = getEmotionRelevances(tw.loc[tw.interesse&(tw.datetime>="2020-11-27")&(tw.datetime<"2021-02-21")])
plotEmotionRelevances([rel4])

In [None]:
rel5 = getEmotionRelevances(tw.loc[tw.interesse&(tw.datetime>="2021-02-21")&(tw.datetime<"2021-04-07")])
plotEmotionRelevances([rel5])

In [None]:
rel6 = getEmotionRelevances(tw.loc[tw.interesse&(tw.datetime>="2021-04-07")])
plotEmotionRelevances([rel6])

In [None]:
rel = [rel1,rel2,rel3,rel4,rel5,rel6]

In [None]:
date = ["01/01/20-08/04/20","09/04/20-31/08/20","01/09/20-26/11/20",
        "27/11/20-20/02/21","21/02/21-06/04/21",,"07/04/21-31/07/21"]
plotEmotionRelevances(rel,date,plot={'figsize':(18,15)}).show()

In [None]:
for i in range(len(rel)):
  for w in ("paura","ansia"):
    rel[i]["paura"][w]=0
  for w in ("piangere","triste"):
    rel[i]["tristezza"][w]=0
  for w in ("bello","felice","gioia"):
    rel[i]["gioia"][w]=0
  for w in ("coglione","culo","cazzo","merda"):
    rel[i]["rabbia"][w]=0
plotEmotionRelevances(rel,plot={'figsize':(18,15)}).show()

In [None]:
for w in ("cazzo","merda","triste","schifo","inutile","odiare","cagare","coglione","porco"):
  rel1s[w] = 0
  rel2s[w] = 0
  rel3s[w] = 0
  rel4s[w] = 0
  rel5s[w] = 0
  rel6s[w] = 0

In [None]:
for w in ("cazzo","coglione","culo","merda","porco"):
  rel1e[w] = 0
  rel2e[w] = 0
  rel3e[w] = 0
  rel4e[w] = 0
  rel5e[w] = 0
  rel6e[w] = 0

# vaccini

In [None]:
# 3 16 17 32 50 54 56 86 87 89 103 113 125 140 147 150 151 161 172 189 190 195
clusters = [3,16,17,32,50,54,56,86,87,89,103,113,125,140,147,150,151,161,172,189,190,195]
full["interesse"] = full.clust.isin(clusters)

In [None]:
words = pd.DataFrame(list(twi.pos))
noun = words.NOUN.str.join(' ')
propn = words.PROPN.str.join(' ')
verb = words.VERB.str.join(' ')
string = noun + ' ' + propn + ' ' + verb
tw["words"] = string.fillna("")

In [None]:
def plotFreq(df,freq="2W"):
  df = df.groupby(["dataset",pd.Grouper(key="datetime",freq=freq),"interesse"]).size().reset_index().rename(columns={0:"N"})
  df["D"] = df.groupby(["dataset",pd.Grouper(key="datetime",freq=freq)]).transform("sum").N
  df.datetime -= pd.DateOffset(days=7)
  df["freq"] = df.N / df.D * 100
  df = df[df.interesse]
  fig = px.line(df, x="datetime", y="freq",hover_data=["N"],color="dataset", line_group="dataset", line_shape="spline")
  return fig

In [None]:
fig = plotFreq(full)
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='',
        xaxis_range=("2020-02-01","2021-08-01"),
        xaxis = dict(
          ticktext = ["Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-03-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)

In [None]:
tw = full.loc[full.interesse&(full.dataset=="Tweet")]

In [None]:
fig = plotSentiment(tw,"2W")
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='Sentiment',
       # xaxis_range=("2020-01-01","2021-07-25"),
        xaxis = dict(
          ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)
fig.add_vline(x="2020-03-01", line_color="red")
fig.add_vline(x="2020-04-08", line_color="green")
fig.add_vline(x="2020-09-01", line_color="red")
fig.add_vline(x="2020-11-26", line_color="green")
fig.add_vline(x="2021-02-20", line_color="red")
fig.add_vline(x="2021-04-06", line_color="green")

In [None]:
fig = plotEmotion(tw,"2W")
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='Emotion',
        #xaxis_range=("2020-02-01","2021-08-01"),
        xaxis = dict(
          ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)
fig.add_vline(x="2020-03-01", line_color="red")
fig.add_vline(x="2020-04-08", line_color="green")
fig.add_vline(x="2020-09-01", line_color="red")
fig.add_vline(x="2020-11-26", line_color="green")
fig.add_vline(x="2021-02-20", line_color="red")
fig.add_vline(x="2021-04-06", line_color="green")

In [None]:
vacs = getSentimentRelevances(tw)
plotSentimentRelevances([vacs],plot={'figsize':(14,3)}).show()

In [None]:
vace = getEmotionRelevances(tw)
plotEmotionRelevances([vace],plot={'figsize':(9,5)},single=True).show()

In [None]:
plotRelevances(vacs,vace,plot={'figsize':(9,5)}).show()

In [None]:
for w in ("felice","bello","contento","ottimo","buon","buono"):
  vacs["positivo"][w]=0
vacs["negativo"]["merda"]=0
vace["paura"]["paura"]=0
vace["paura"]["ansia"]=0
vace["rabbia"]["coglione"]=0
plotRelevances(vacs,vace,plot={'figsize':(9,5)}).show()

In [None]:
vacei = getEmotionRelevances(tw.loc[tw.datetime<"2021-05-01"])
plotEmotionRelevances([vacei],plot={'figsize':(9,5)},single=True).show()

In [None]:
vacef = getEmotionRelevances(tw.loc[tw.datetime>="2021-05-01"])
plotEmotionRelevances([vacef],plot={'figsize':(9,5)},single=True).show()

In [None]:
vacs["positivo"]["felice"]=0
vacs["positivo"]["bello"]=0
vacs["positivo"]["contento"]=0
plotSentimentRelevances([vacs],plot={'figsize':(14,3)}).show()

In [None]:
vacsf = getSentimentRelevances(tw.loc[tw.datetime>="2021-05-01"])
plotSentimentRelevances([vacsf],plot={'figsize':(14,3)}).show()

In [None]:
sset = tw.loc[tw.interesse&(tw.datetime>="2020-09-01")&(tw.datetime<"2020-11-27")]
rel3s = wordRelevance(sset.preprocess,np.stack(sset.sentiment)[:,2],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
rel3e = wordRelevance(sset.preprocess,np.stack(sset.emotion)[:,0],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
wordRelPlot(rel3s,"Sentiment: Negativo").show()
wordRelPlot(rel3e,"Emotion: Rabbia").show()

In [None]:
sset = tw.loc[tw.interesse&(tw.datetime>="2020-11-27")&(tw.datetime<"2021-02-21")]
rel4s = wordRelevance(sset.preprocess,np.stack(sset.sentiment)[:,2],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
rel4e = wordRelevance(sset.preprocess,np.stack(sset.emotion)[:,0],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
wordRelPlot(rel4s,"Sentiment: Negativo").show()
wordRelPlot(rel4e,"Emotion: Rabbia").show()

In [None]:
sset = tw.loc[tw.interesse&(tw.datetime>="2021-02-21")&(tw.datetime<"2021-04-07")]
rel5s = wordRelevance(sset.preprocess,np.stack(sset.sentiment)[:,2],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
rel5e = wordRelevance(sset.preprocess,np.stack(sset.emotion)[:,0],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
wordRelPlot(rel5s,"Sentiment: Negativo").show()
wordRelPlot(rel5e,"Emotion: Rabbia").show()

In [None]:
sset = tw.loc[tw.interesse&(tw.datetime>="2021-04-07")]
rel6s = wordRelevance(sset.preprocess,np.stack(sset.sentiment)[:,2],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
rel6e = wordRelevance(sset.preprocess,np.stack(sset.emotion)[:,0],min_df=0.003,token_pattern=r"(?u)\b(?<!\.|\/|\?|#)\w{3,}(?!:|\.)\b")
wordRelPlot(rel6s,"Sentiment: Negativo").show()
wordRelPlot(rel6e,"Emotion: Rabbia").show()

In [None]:
for w in ("cazzo","merda","triste","schifo","inutile","odiare","cagare","coglione","porco"):
  rel1s[w] = 0
  rel2s[w] = 0
  rel3s[w] = 0
  rel4s[w] = 0
  rel5s[w] = 0
  rel6s[w] = 0

In [None]:
wordRelPlot(rel1s,"Sentiment: Negativo").show()
wordRelPlot(rel2s,"Sentiment: Negativo").show()
wordRelPlot(rel3s,"Sentiment: Negativo").show()
wordRelPlot(rel4s,"Sentiment: Negativo").show()
wordRelPlot(rel5s,"Sentiment: Negativo").show()
wordRelPlot(rel6s,"Sentiment: Negativo").show()

In [None]:
for w in ("cazzo","coglione","culo","merda","porco"):
  rel1e[w] = 0
  rel2e[w] = 0
  rel3e[w] = 0
  rel4e[w] = 0
  rel5e[w] = 0
  rel6e[w] = 0

In [None]:
wordRelPlot(rel1e,"Emotion: Rabbia").show()
wordRelPlot(rel2e,"Emotion: Rabbia").show()
wordRelPlot(rel3e,"Emotion: Rabbia").show()
wordRelPlot(rel4e,"Emotion: Rabbia").show()
wordRelPlot(rel5e,"Emotion: Rabbia").show()
wordRelPlot(rel6e,"Emotion: Rabbia").show()

# esperti vs politica

In [None]:
stresp="bassetti|brusaferro|burioni|capua|cartabellotta|crisanti|galli |gismondo|locatelli|lopalco|palù|pregliasco| rezza|sileri|zangrillo|cts"
strpol="conte|draghi|governo"

In [None]:
twi = full[full.dataset=="Tweet"]

In [None]:
esp = twi.preprocess.str.contains(stresp)
pol = twi.preprocess.str.contains(strpol)

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

dfe = twi[esp].groupby([pd.Grouper(key="datetime",freq="2W")]).count().reset_index().iloc[:,0:2]
dfe.datetime -= pd.DateOffset(days=7)
dfp = twi[pol].groupby([pd.Grouper(key="datetime",freq="2W")]).count().reset_index().iloc[:,0:2]
dfp.datetime -= pd.DateOffset(days=7)

fig.add_trace(
    go.Scatter(y=dfe.dataset, x=dfe.datetime, name="altri reparti",line_shape='spline'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(y=dfp.dataset, x=dfp.datetime, name="terapia intensiva",line_shape='spline'),
    secondary_y=True,
)

# Set y-axes titles
fig.update_yaxes(title_text="Pazienti ricoverati in altri reparti", range=(0,1000), secondary_y=False)
fig.update_yaxes(title_text="Pazienti in terapia intensiva", range =(0,1000), secondary_y=True)
                 #tickvals = np.linspace(0, 300, num=6), tickmode = "array")
"""
fig.update_layout(
        xaxis_range=("2020-03-01","2021-07-01"),
        xaxis = dict(
          ticktext = ["","Apr20","","Giu20","","Ago20","","Ott20","","Dic20","","Feb21","","Apr21","","Giu21",""],
          tickvals = pd.date_range("2020-03-01","2021-07-01",freq='MS'),
          tickmode = "array"
        ), width=1100,height=420)
"""
fig.show()

In [None]:
fig = plotSentiment(twi[esp],"2W")
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='Esperti',
       # xaxis_range=("2020-01-01","2021-07-25"),
        xaxis = dict(
          ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=900,height=420)

In [None]:
fig = plotSentiment(twi[pol],"2W")
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='Istituzioni',
       # xaxis_range=("2020-01-01","2021-07-25"),
        xaxis = dict(
          ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=900,height=420)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

In [None]:
fig = plotEmotion(twi[esp],"2W")
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='Esperti',
       # xaxis_range=("2020-01-01","2021-07-25"),
        xaxis = dict(
          ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=900,height=420)

In [None]:
fig = plotEmotion(twi[pol],"2W")
fig.update_layout(yaxis_title='Frequenza',
        xaxis_title='Istituzioni',
       # xaxis_range=("2020-01-01","2021-07-25"),
        xaxis = dict(
          ticktext = ["","Mar20","","Mag20","","Lug20","","Set20","","Nov20","","Gen21","","Mar21","","Mag21","","Lug21",""],
          tickvals = pd.date_range("2020-02-01","2021-08-01",freq='MS'),
          tickmode = "array"
        ), width=900,height=420)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

In [None]:
esps = getSentimentRelevances(twi[esp])
plotSentimentRelevances([esps],plot={'figsize':(14,3)}).show()

In [None]:
pols = getSentimentRelevances(twi[pol])
plotSentimentRelevances([pols],plot={'figsize':(14,3)}).show()

In [None]:
pole = getEmotionRelevances(twi[pol])
plotEmotionRelevances([pole],plot={'figsize':(9,5)},single=True).show()

In [None]:
espe = getEmotionRelevances(twi[esp])
plotEmotionRelevances([espe],plot={'figsize':(9,5)},single=True).show()