# Responses and third-party responses per harvest

    – input data: responses_total_combined.json - contains the number of total, EU/EEA, and EU/EEA with TP communication responses per harvest
    – output plot: bar chart with harvest date on x-axis and number of responses on y-axis
    – purpose: Visualise the distribution of number of responses per harvest based on three criteria: (i) total number of responses, (ii) all responses to EU/EEA first-party requests, and (iii) only TP responses to EU/EEA first-party requests. This offers a more granular overview of responses, and comparison if e.g. the decrease in total number of responses influences also the EU/EEA or only nonEU, or if the decrease in number of TP responses to EU/EEA origin first-party requests influence the overall number of EU/EEA responses.

In [1]:
# Import
import pandas as pd
import csv
import json
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objects as go
from itertools import cycle
import plotly.express as px

In [2]:
# Jupyter setup
init_notebook_mode(connected=True)

In [3]:
# Set up the path to data
f_path = '/home/ubuntu/data/processed/crawls/response_enriched/analysis_v.3/'

In [4]:
# Load number of responses per harvest
total_res_name = 'responses_total_combined.json'

with open(f_path + total_res_name) as f:
    json_res = json.load(f)
    
df_res = pd.json_normalize(json_res)
df_res.sort_values('date', inplace=True)
df_res.head()

Unnamed: 0,date,total_res_EU_FT_TP,total_res_EU,total_res
34,2018-02-07,738642,1174656,1492074
55,2018-02-09,732836,1168590,1484670
15,2018-02-14,729397,1165955,1488655
20,2018-03-21,733288,1164850,1483718
30,2018-03-29,734508,1165377,1491943


In [5]:
# Set up the date DF
df_dates = pd.DataFrame()
df_dates['date'] = df_res['date']
df_dates.reset_index(inplace=True)
del df_dates['index']
df_dates.at[9, 'date'] = '<b>2018-05-25</b>'
df_dates.head()

Unnamed: 0,date
0,2018-02-07
1,2018-02-09
2,2018-02-14
3,2018-03-21
4,2018-03-29


In [7]:
# Choose a color palette
colors = ['#1F77B4', '#FF7F0E', '#9467BD']
palette = cycle(colors)

# Initialie figure
fig = go.Figure()

# Add traces
fig.add_trace(
    go.Bar(name='All responses',
           x=df_dates['date'],
           y=df_res['total_res'],
           marker_color=next(palette)
          )
)

fig.add_trace(
    go.Bar(name='All EU/EEA responses',
           x=df_dates['date'],
           y=df_res['total_res_EU'],
           width=0.5,
           marker_color=next(palette)
          )
)

fig.add_trace(
    go.Bar(name='EU/EEA responses with TP communication',
           x=df_dates['date'],
           y=df_res['total_res_EU_FT_TP'],
           width=0.35,
           marker_color=next(palette)
          )
)

# Make x-axis categorical
fig.update_xaxes(type='category')

# Use date values from DF for x-axis 
fig.update_xaxes(tickvals=df_dates['date'])

# Set the mode
fig.update_layout(barmode='overlay')

# Set y-axis title
fig.update_yaxes(title_text="Number of <b>responses</b>")

# Add centered figure title, set the legent and font
fig.update_layout(
    title=
    {
       'text': "Number of <i>total</i>, <i>EU/EEA</i>, and <i>EU/EEA TP visited site responses</i> per harvest<br>",
       'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font':dict(size=20)},
    font=dict(
        family="Courier New, monospace",
    ),
)

# Add annotation
fig.add_annotation(
        x=9,
        y=1430000,
        xref="x",
        yref="y",
        text="GDPR",
        showarrow=True,
        font=dict(
            family="Courier New, monospace",
            size=14,
            color="#ffffff"
            ),
        align="center",
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor="#636363",
        ax=20,
        ay=-30,
        bordercolor="#c7c7c7",
        borderwidth=2,
        borderpad=4,
        bgcolor="#ff7f0e",
        opacity=0.8
        )

# Change legend location
fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="center",
        x=0.5
    ),
)

# Set plot size - Use when also exporting
#fig.update_layout(
#    autosize=False,
#    width=1100,
#    height=600,
#    )

# Plot 
iplot(fig)

In [7]:
# Export
fig.write_image("/home/ubuntu/Plots/FIG_1.pdf")