In [40]:
import pandas as pd
import plotly.express as px

In [2]:
resilience = pd.read_pickle('../Data/Resilience/final.pkl')
emag       = pd.read_pickle('../Data/Emagazine/emag_final.pkl')
icn        = pd.read_pickle('../Data/Inside Climate News/icn.pkl')
grist      = pd.read_pickle('../Data/Grist/grist.pkl')

In [13]:
emag['url'] = emag.link
resilience['source'] = 'Resilience'
emag['source'] = 'EMagazine'
icn['source'] = 'InsideClimateNews'
grist['source'] = 'Grist'

In [24]:
emag['date'] = pd.to_datetime(emag.date)
icn['date'] = pd.to_datetime(icn.date)
grist['date'] = pd.to_datetime(grist.date)

In [25]:
stack = pd.concat([resilience, emag, icn, grist])[['source', 'url', 'title', 'date', 'author', 'text']]

In [35]:
stack = stack.copy()[~stack.text.isna()]

In [33]:
stack.source.value_counts()

Resilience           33370
Grist                22566
EMagazine             7915
InsideClimateNews     5742
Name: source, dtype: int64

In [36]:
stack['year'] = stack.date.dt.year

In [49]:
fig = px.line(stack.groupby(['source', 'year']).agg({'url':'count'}).reset_index(),
              x='year', y='url', color='source',
              labels={'year':'Year', 'url':'Articles Published', 'source':'Source'},
              title='Environmental Discourse: Number of Articles Published by Source')

fig.update_layout({
     'font_family':'Times New Roman',
     'font_color':'black',
     'plot_bgcolor':'rgba(0, 0, 0, 0)',

     'legend':dict(
            yanchor="top",
            xanchor="left",
            y=0.9,
            x=0.05
        ),
     'yaxis': dict(
         mirror=True,
         ticks='outside',
         showline=True
     ),
     'xaxis': dict(
         mirror=True,
         ticks='outside',
         showline=True
     )

    }
)
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

In [58]:
stack['wordcount'] = stack.text.apply(lambda x: len(x.split()))

In [None]:
fig = px.line(stack.groupby(['source', 'year']).agg({'wordcount':'mean'}).reset_index(),
              x='year', y='wordcount', color='source',
              labels={'year':'Year', 'wordcount':'Average Word Count', 'source':'Source'},
              title='Environmental Discourse: Words per Article by Source')

fig.update_layout({
     'font_family':'Times New Roman',
     'font_color':'black',
     'plot_bgcolor':'rgba(0, 0, 0, 0)',

     'legend':dict(
            yanchor="top",
            xanchor="right",
            y=0.9,
            x=0.95
        ),
     'yaxis': dict(
         mirror=True,
         ticks='outside',
         showline=True
     ),
     'xaxis': dict(
         mirror=True,
         ticks='outside',
         showline=True
     )

    }
)
fig.update_xaxes(showline=True, linewidth=1, linecolor='black')
fig.update_yaxes(showline=True, linewidth=1, linecolor='black')

In [None]:
stack['cleaned_wordlist'] = stack.text.apply(
    lambda x: x.lower().split()
)

stack['science_count'] = stack.cleaned_wordlist.apply(
    lambda x: x.count('science')
)

In [None]:
stack.head()