In [1]:
import pandas as pd
import plotly.express as px

from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
sci = pd.read_pickle('../Data/Scientific-Discourse/env_sci.pkl')
env = pd.read_csv('../Data/Environmental Discourse/env_processed_text.csv', index_col=0)
wos = pd.read_pickle('../Data/WoS/wos_processed.pkl')

In [3]:
env['tokens']= env.text_processed.apply(lambda x: x.split())
env['date'] = pd.to_datetime(env.date)
env['year'] = env.date.dt.year

In [4]:
sci = sci[sci.year <= 2021]
wos['year'] = wos.pubyear

In [134]:
ex = pd.Series([4, 5, 2, 10, 11, 7])
ex * 100/ex.iloc[0]

0    100.0
1    125.0
2     50.0
3    250.0
4    275.0
5    175.0
dtype: float64

In [5]:
def avg_mentioning(word):
    series = []
    for data in [env, sci, wos]:
        data[word] = data.tokens.apply(lambda x: int(word in x))
        s = data.groupby('year')[word].mean()
        #s = s * 100/s.iloc[0]
        s = 100 * s
        series.append(s)
    
    df = pd.concat(series, axis=1)
    df.columns = ['env', 'sci', 'wos']
    df = df.loc[2005:]
    #print(df)
    #fig = go.Scatter(df, title=word)
    #fig.show()
    
    return df

In [6]:
kw1 = ['climate_change', 'justice', 'racism', 
      'activism', 'indigenous', 'community', 'food', 'agriculture',
      'water', 'energy', 'coal', 'solar']

kw2 = ['global_warming', 'emission', 'temperature', 'politic', 
       'minority', 'energy', 'oil', 'natural_gas', 'wind', 'electric']

r = [1]*3 + [2]*3 +[3]*3 + [4]*3
c = [1, 2, 3]*4

r2 = r[:10]
c2 = c[:10]

In [11]:
fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=kw1,
    shared_xaxes=True,
    vertical_spacing=0.07,
    horizontal_spacing=0.06,
    specs=[[{"secondary_y": True}] * 3]*4
)

for w, rc in zip(kw1, zip(r, c)):
    df = avg_mentioning(w)
    sl = w == 'climate_change'
    
  
    fig.add_trace(
        go.Scatter(x=df.index, y=df.env, line=dict(color='green'), name='Environmental Magazines (left axis)', showlegend=sl),
        row=rc[0], col=rc[1]
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df.sci, line=dict(color='black'), name='Environmental Social Science (left axis)', showlegend=sl),
        row=rc[0], col=rc[1],
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df.wos, line=dict(color='#66cce5'), name='Social Science (right axis)', showlegend=sl),
        row=rc[0], col=rc[1],
        secondary_y=True
    ) 

    
fig.update_layout(
    title='Figure XX: Proportion of Documents Mentioning Keywords Across Datasets',
    font_family='Times New Roman',
    font_color='black',
    plot_bgcolor='#E8E7E7',
    height=900,
    width=1000,
    legend=dict(orientation='h',
                yanchor='bottom', y=-0.08, xanchor='left', x=0.2 )
)
fig.update_xaxes(showgrid=False)

fig.show()

In [10]:
fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=kw2,
    shared_xaxes=True,
    vertical_spacing=0.07,
    horizontal_spacing=0.06,
    specs=[[{"secondary_y": True}] * 3]*4
)

for w, rc in zip(kw2, zip(r2, c2)):
    df = avg_mentioning(w)
    sl = w == 'global_warming'
    
  
    fig.add_trace(
        go.Scatter(x=df.index, y=df.env, line=dict(color='green'), name='Environmental Magazines (left axis)', showlegend=sl),
        row=rc[0], col=rc[1]
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df.sci, line=dict(color='black'), name='Environmental Social Science (left axis)', showlegend=sl),
        row=rc[0], col=rc[1],
    )
    fig.add_trace(
        go.Scatter(x=df.index, y=df.wos, line=dict(color='#66cce5'), name='Social Science (right axis)', showlegend=sl),
        row=rc[0], col=rc[1],
        secondary_y=True
    ) 

    
fig.update_layout(
    title='Appendix Figure XX: Proportion of Documents Mentioning Keywords Across Datasets',
    font_family='Times New Roman',
    font_color='black',
    plot_bgcolor='#E8E7E7',
    height=900,
    width=1000,
    legend=dict(orientation='h',
                yanchor='bottom', y=-0.08, xanchor='left', x=0.2  )
)
fig.update_xaxes(showgrid=False)

fig.show()

In [131]:
fig.data

(Scatter({
     'line': {'color': 'green'},
     'name': 'Environmental Magazines (left axis)',
     'showlegend': True,
     'x': array([2005., 2006., 2007., 2008., 2009., 2010., 2011., 2012., 2013., 2014.,
                 2015., 2016., 2017., 2018., 2019., 2020., 2021.]),
     'xaxis': 'x',
     'y': array([15.59251559, 19.96805112, 29.13172176, 24.77933262, 39.96844596,
                 30.47385621, 23.38370077, 23.76108503, 23.99883075, 31.31825273,
                 42.99578059, 43.44400675, 50.34272658, 54.33198381, 58.86871508,
                 54.60565924, 53.66847826]),
     'yaxis': 'y'
 }),
 Scatter({
     'line': {'color': 'black'},
     'name': 'Environmental Social Science (left axis)',
     'showlegend': True,
     'x': array([2005., 2006., 2007., 2008., 2009., 2010., 2011., 2012., 2013., 2014.,
                 2015., 2016., 2017., 2018., 2019., 2020., 2021.]),
     'xaxis': 'x',
     'y': array([28.33333333, 21.73913043, 30.1369863 , 29.59183673, 36.55913978,
         

In [98]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

### Just looking at the number of EnvJournal articles w/ and w/out abstract

In [4]:
sci.source.value_counts()

Ecology and Society                           2708
Applied Ecology and Environmental Sciences     372
Environmental Sociology                        258
Name: source, dtype: int64

In [10]:
sci[sci.source == 'Ecology and Society'].tokens.apply(lambda x: 'abstract' in x).value_counts()

False    2276
True      432
Name: tokens, dtype: int64

In [17]:
(sci[sci.source != 'Ecology and Society'].abstract.apply(len) < 10).value_counts()

False    611
True      19
Name: abstract, dtype: int64

In [18]:
(sci[sci.source != 'Ecology and Society'].article.apply(len) < 10).value_counts()

False    419
True     211
Name: article, dtype: int64

In [19]:
sci.shape

(3338, 9)

In [20]:
432 + 611 + 419

1462

In [71]:
eas = sci.copy()[sci.source == 'Ecology and Society']
eas['has_abstract'] = eas.tokens.apply(lambda x: 'abstract' in x)

eas['has_article'] = True

In [72]:
aees = sci.copy()[sci.source == 'Applied Ecology and Environmental Sciences']
aees['has_abstract'] = aees.abstract.apply(lambda x: len(x) > 10)
aees['has_article'] = aees.article.apply(lambda x: len(x) > 10)

In [73]:
es = sci.copy()[sci.source == 'Environmental Sociology']
es['has_abstract'] = es.abstract.apply(lambda x: len(x) > 10)
es['has_article'] = es.article.apply(lambda x: len(x) > 10)

In [75]:
sci = pd.concat([eas, aees, es])

In [50]:
sci = sci[sci.year < 2022]

In [76]:
sci.source.value_counts()

Ecology and Society                           2708
Applied Ecology and Environmental Sciences     350
Environmental Sociology                        257
Name: source, dtype: int64

In [80]:
sci.shape

(3315, 12)

In [77]:
sci[['has_abstract', 'has_article']].value_counts()

has_abstract  has_article
False         True           2294
True          True            810
              False           211
dtype: int64

In [79]:
sci[['has_abstract', 'has_article']].value_counts(normalize=True)

has_abstract  has_article
False         True           0.692006
True          True           0.244344
              False          0.063650
dtype: float64

In [45]:
sci['len'] = sci.text.apply(lambda x: len(x.split()))

In [84]:
fig = px.scatter(sci.groupby(['source', 'year']).agg({'url':'count', 'len':'mean'}).reset_index(),
                 x='year', y='url', color='source', size='len',
                 title = 'Figure XX. Number and Average Length of Environmental Social Science Articles',
                 labels = {'source':'', 'year':'Year', 'url':'Number of Articles'})

fig.update_layout(
    font_family='Times New Roman',
    plot_bgcolor='rgba(0,0,0,0)',
    font_color='black',
    xaxis = dict(
        tickmode='linear',
        tick0=4,
        dtick=2,
        showline=True,
        mirror=True,
        linecolor='black'
    ),
    yaxis = dict(
        showline=True,
        mirror=True,
        linecolor='black'
        
    ),
    legend = dict(
        xanchor='left',
        x=0.1,
        yanchor='top',
        y=0.95
    )
)

fig.show()

### Avg. number of unique tokens per document

In [142]:
sci.tokens.apply(set).apply(len).mean()

874.0180995475113

In [143]:
env.tokens.apply(set).apply(len).mean()

275.347122263748

In [144]:
wos.tokens.apply(set).apply(len).mean()

58.93180749900568