In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
# from jupyter_dash import JupyterDash
# from dash import dcc, html
# from dash.dependencies import Input, Output
import csv
import os

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/maddikia/euphoria-data/main/euphoria.csv')
dict_labels_df = pd.read_csv('https://raw.githubusercontent.com/maddikia/euphoria-data/main/dict_labels.csv', header=None, index_col=0)
q_to_col = dict_labels_df.to_dict()[1]
dict_labels_df.reset_index(inplace=True)
dict_labels_df.index = dict_labels_df[1]
col_to_q = dict_labels_df.to_dict()[0]
df = df.rename(columns=q_to_col)

In [None]:
euphoria_questions = [str(i) for i in range(1, 20)]
general_questions = [str(i) for i in range(22, 35)]
all_questions = euphoria_questions + general_questions
for i in all_questions:
  df[i] = pd.Categorical(df[i], categories=[1, 2, 3, 4, 5])
for i in ['A', 'B', 'C', 'D']:
  df[i] = pd.Categorical(df[i])

In [None]:
if not os.path.exists("images"):
  os.mkdir("images")

# Chart Generation

In [152]:
def gen_chart(title, x_data, y_data, l_padding=150, t_padding=100, width_mod=0):
    top_labels = ['Strongly Agree', 'Agree', 'Neutral', 'Disagree',
                    'Strongly Disagree']

    colors = ['rgb(158,34,162)', 'rgb(198,156,234)', 'rgb(200, 192, 234)', 'rgb(114,133,225)', 'rgb(51, 51, 154)']

    fig = go.Figure()

    for i in range(0, len(x_data[0])):
        for idx, (xd, yd) in enumerate(zip(x_data, y_data)):
            fig.add_trace(go.Bar(
                x=[xd[i]], y=[yd],
                orientation='h',
                marker=dict(
                    color=colors[i],
                    line=dict(color='white', width=0)
                ),
                name = top_labels[-(i + 1)],
                showlegend = idx == 0,
            ))
    
    y_val = 30
    if t_padding == 130:
        y_val = 50 

    fig.update_layout(
        title=dict(
            text=title,
            font=dict(family='Proxima Nova', size=25, color='white'),
            pad=dict(t = y_val),
            y=1, 
            yanchor='top'
        ),
        xaxis=dict(
            tickmode="linear",
            tick0=0,
            dtick=.25, 
            tickfont=dict(family='PT Sans', size=14, color='white'),
            showticklabels=False,
            tickformat=',.0%',
            gridwidth=0,
            linewidth=0,
            gridcolor='#151E27',
            linecolor='#151E27',
            zeroline=False,
            domain=[0.08, 1]
        ),
        yaxis=dict(
            autorange='reversed',
            showticklabels=False,
        ),
        barmode='stack',
        bargap=0.2,
        paper_bgcolor='#151E27',
        plot_bgcolor='#151E27',
        margin=dict(l=l_padding, r=10, t=t_padding, b=50),
        showlegend=True,
        width=1000 + width_mod,
        # height=200 + len(x_data) * 100,
        height=200 + len(x_data) * 60,
        legend=dict(
            traceorder='normal',
            font=dict(family='PT Sans', size=16, color='white'),
            orientation="h",
            yanchor="bottom",
            y=1,
            xanchor="right",
            x=.96,
            bgcolor='#151E27'
        )   
    )

    annotations = []

    for yd, xd in zip(y_data, x_data):
        # labeling the y-axis
        annotations.append(dict(xref='paper', yref='y',
                                x=.065, y=yd,
                                xanchor='right',
                                text= str(yd),
                                font=dict(family='PT Sans', size=18,
                                            color='white'),
                                showarrow=False, align='right'))
        # labeling the first percentage of each bar (x_axis)
        if xd[0] > 0:
            annotations.append(dict(xref='x', yref='y',
                                    x=xd[0] / 2, y=yd,
                                    text='<b>' + str(np.rint(xd[0] * 100))[:-2] + '%</b>',
                                    font=dict(family='PT Sans', size=15,
                                            color='rgb(255, 255, 255)'),
                                    showarrow=False))
        space = xd[0]
        for i in range(1, len(xd)):
                if xd[i] >  0:
                    # labeling the rest of percentages for each bar (x_axis)
                    annotations.append(dict(xref='x', yref='y',
                                            x=space + (xd[i]/2), y=yd,
                                            text='<b>' + str(np.rint(xd[i] * 100))[:-2] + '%</b>',
                                            font=dict(family='PT Sans', size=15,
                                                    color='rgb(255, 255, 255)'),
                                            showarrow=False))
                space += xd[i]

    fig.update_layout(annotations=annotations)


    return fig

In [None]:
def gen_diverging_chart(title, x_data, y_data, l_padding=150, t_padding=120, width_mod=0, split=.88):
  top_labels = ['Strongly Agree', 'Agree', 'Neutral', 'Disagree',
                'Strongly Disagree']

  colors = ['rgb(158,34,162)', 'rgb(198,156,234)', 'rgb(200, 192, 234)', 'rgb(114,133,225)', 'rgb(51, 51, 154)']


  fig = make_subplots(rows=1, cols=2, column_widths=[split, 1-split], shared_yaxes=True, horizontal_spacing=0)

  for i in range(1, -1, -1):
    for idx, (xd, yd) in enumerate(zip(x_data, y_data)):
          fig.add_trace(go.Bar(
              x=[-xd[i]], y=[yd],
              orientation='h',
              marker=dict(
                  color=colors[i],
                  line=dict(color='white', width=0)
              ),
              text= '<b>' + str(np.rint(xd[i] * 100))[:-2] + '%</b>',
              textfont=dict(family='PT Sans', size=15,
                        color='white'),
              textposition='inside',
              insidetextanchor='middle', 
              name = top_labels[-(i + 1)],
              legendgroup = top_labels[-(i + 1)],
              showlegend = idx == 0,
              customdata = [xd[i]],
              hovertemplate = "%{y}: %{customdata}"), 
              row=1, col=1)
  for i in range(3, 5):
      for idx, (xd, yd) in enumerate(zip(x_data, y_data)):
            fig.add_trace(go.Bar(
              x=[xd[i]], y=[yd],
              orientation='h',
              text= '<b>' + str(np.rint(xd[i] * 100))[:-2] + '%</b>',
              textfont=dict(family='PT Sans', size=15,
                        color='white'),
              textposition='inside',
              insidetextanchor='middle', 
              marker=dict(
                  color=colors[i],
                  line=dict(color='white', width=0)
              ),
              name = top_labels[-(i + 1)],
              legendgroup = top_labels[-(i + 1)],
              showlegend = idx == 0,
              hovertemplate="%{y}: %{x}"),
              row=1, col=1) 

  neutrals = np.array(x_data)[:, 2]

  fig.add_trace(
    go.Bar(
      x=neutrals,
      y=y_data,
      orientation='h',
      marker = dict(
          color=colors[2],
          line=dict(color='white', width=0)
      ),
      text= ['<b>' + str(np.rint(x * 100))[:-2] + '%</b>' for x in neutrals],
      textfont=dict(family='PT Sans', size=15,
                color='white'),
      textposition='inside',
      insidetextanchor='middle', 
      name = "Neutral",
      hovertemplate="%{y}: %{x}",
      xaxis = "x2"
    ), row=1, col=2) 

  fig.update_layout(
    #   title=dict(
    #       text=title,
    #       font=dict(family='Proxima Nova', size=50, color='white')
    #   ),
      xaxis=dict(
          showgrid=False,
          showline=False,
          tickmode="linear",
          tick0=0,
          automargin=True,
          dtick=.25, 
          showticklabels=False,
          tickformat=',.0%',
          tickfont=dict(family='PT Sans', size=14, color='white'),
          domain=[.1, split-.005]
      ),
      xaxis2=dict(
          showgrid=False,
          showline=False,
          tickmode="linear",
          tick0=0,
          automargin=True,
          dtick=.25, 
          showticklabels=False,
          tickfont=dict(family='PT Sans', size=14, color='white'),
          tickformat=',.0%',
      ),
      yaxis=dict(
          autorange='reversed',
          showgrid=False,
          automargin=True,
          showline=False,
          showticklabels=True,
          tickfont=dict(family='PT Sans', size=20, color=colors[2]),
          zeroline=False,
      ),
      barmode='relative',
      bargap=0.2,
      paper_bgcolor='#151E27',
      plot_bgcolor='#151E27',
      margin=dict(l=l_padding, r=30, t=t_padding, b=70, pad=5),
      showlegend=False,
      width=1600 + width_mod,
      height=200 + len(x_data) * 100,
  )

  annotations = []

  fig.update_layout(annotations=annotations)
  fig.add_vline(x=0, line_width=3, line_color='white')


  return fig

# 100% Stacked Bar Charts

In [150]:
def get_pcts(data):
  return data.apply(pd.value_counts).transpose().apply(lambda x: x/sum(x), axis=1).rename(col_to_q)

def gen_comparisons(x1, x2, y, mk_dir_name, text_pad=150):
  dir_path = f'./images/{mk_dir_name}'
  if not os.path.exists(dir_path):
    os.mkdir(dir_path)
  x1_data = x1.values.tolist()
  x2_data = x2.values.tolist()
  titles = x1.index.tolist() # indexes should be the same
  figures = []
  for i in range(len(x1_data)):
    top_padding = 100
    title = titles[i].strip()
    if len(title) > 80:
      top_padding = 130
      title_words = title.split(" ")
      title = ""
      for j in range(len(title_words)):
        if len(title) + len(title_words[j]) < 80:
          title += title_words[j] + " " 
        else:
          title += "<br>" + " ".join(title_words[j:])
          break
    fig = gen_chart('<b>' + title + '</b>', [x1_data[i], x2_data[i]], y, l_padding=text_pad, t_padding=top_padding)
    img_name = f'{i}_{"_".join(titles[i].strip()[:-1].split(" ")[:6])}'
    fig.write_image(f"{dir_path}/{img_name}.png", scale=5)
    # fig.write_image(f"{dir_path}/{img_name}.svg")
    figures.append(fig)
  return figures

In [None]:
def gen_comparisons2(x1, x2, y, mk_dir_name, text_pad=150):
#   dir_path = f'./images/{mk_dir_name}'
#   if not os.path.exists(dir_path):
#     os.mkdir(dir_path)
  x1_data = x1.values.tolist()
  x2_data = x2.values.tolist()
  titles = x1.index.tolist() # indexes should be the same
  figures = []
  for i in range(len(x1_data)):
    top_padding = 100
    title = titles[i].strip()
    if len(title) > 80:
      top_padding = 130
      title_words = title.split(" ")
      title = ""
      for j in range(len(title_words)):
        if len(title) + len(title_words[j]) < 7:
          title += title_words[j] + " " 
        else:
          title += "<br>" + " ".join(title_words[j:])
          break
    fig = gen_diverging_chart('<b>' + title + '</b>', [x1_data[i], x2_data[i]], y, l_padding=text_pad, t_padding=top_padding)
    # img_name = f'{i}_{"_".join(titles[i].strip()[:-1].split(" ")[:6])}'
    # fig.write_image(f"{dir_path}/{img_name}.png", scale=5)
    # fig.write_image(f"{dir_path}/{img_name}.svg")
    figures.append(fig)
  return figures

In [153]:
watched = get_pcts(df[df['C'] == 'Yes'][general_questions])
not_watched = get_pcts(df[df['C'] == 'No'][general_questions])
figures = gen_comparisons(watched, not_watched, ["Watched", "Not Watched"], "watchedcomp2", text_pad=90)
for fig in figures:
    fig.show()

In [None]:
figures = gen_comparisons2(watched, not_watched, ["Watched", "Not Watched"], "watched_vs_not", text_pad=50)
figures[0].show()
# for fig in figures:
#     fig.show()

In [None]:
drugs_gen = get_pcts(df[df['A'] == 'Yes'][general_questions])
no_drugs_gen = get_pcts(df[df['A'] == 'No'][general_questions])
drugs_euphoria = get_pcts(df[df['A'] == 'Yes'][euphoria_questions])
no_drugs_euphoria = get_pcts(df[df['A'] == 'No'][euphoria_questions])
y_labels = ["Has Used Drugs<br>and/or Had Alcohol", "Has Not Used Drugs<br>and/or Had Alcohol"]

if not os.path.exists("images/drugs_vs_no"):
  os.mkdir("images/drugs_vs_no")

print(col_to_q['A'])
figures_gen = gen_comparisons(drugs_gen, no_drugs_gen, y_labels, "drugs_vs_no/general_qs")
figures_gen[0].show()

figures_euphoria = gen_comparisons(drugs_euphoria, no_drugs_euphoria, y_labels, "drugs_vs_no/euphoria_qs")
for fig in figures_euphoria:
  fig.show()

In [None]:
dv_gen = get_pcts(df[df['B'] == 'Yes'][general_questions])
no_dv_gen = get_pcts(df[df['B'] == 'No'][general_questions])
dv_euphoria = get_pcts(df[df['B'] == 'Yes'][euphoria_questions])
no_dv_euphoria = get_pcts(df[df['B'] == 'No'][euphoria_questions])
y_labels = ["Experienced<br>Dating Violence", "Did Not Experience<br>Dating Violence"]

if not os.path.exists("images/dating_violence_vs_no"):
  os.mkdir("images/dating_violence_vs_no")

print(col_to_q['B'])
figures_gen = gen_comparisons(dv_gen, no_dv_gen, y_labels, "dating_violence_vs_no/general_qs")
# for fig in figures_gen:
#   fig.show()

figures_euphoria = gen_comparisons(dv_euphoria, no_dv_euphoria, y_labels, "dating_violence_vs_no/euphoria_qs")
# for fig in figures_euphoria:
#   fig.show()
# len(figures_euphoria)
# for i in range(17, 19):
#   figures_euphoria[i].write_img()

In [None]:
euphoria_likert = df[df['C'] == 'Yes'][euphoria_questions].apply(pd.value_counts).transpose().apply(lambda x: x/sum(x), axis=1).rename(col_to_q)

In [None]:
general_figures = []

In [None]:
x_data = euphoria_likert.values.tolist()[:2]
y_data = ['I enjoyed watching the show.',
          'I believe the TV show<br>depicted teenage life accurately.']
title = "<b>General Perceptions on <i>Euphoria</i></b>"
fig = gen_chart(title, x_data, y_data, l_padding=250, width_mod=200)
general_figures.append(fig)
fig.show()

In [None]:
x_data = euphoria_likert.values.tolist()[2:7]
y_data = ['The show made<br>substances look <b>fun</b>.', 'The show made<br>substances look <b>desirable</b>.',
          'The cinematography of the show<br>influenced how I perceived drugs.',
          "The show's narrative made me<br>change how I experience and/or<br>perceive substance abuse.",
          "After watching the show, I felt<br><b>more inclined to experiment with<br>the usage of substances.</b>"]
title = "<b>Impact of <i>Euphoria</i> on Youth Perceptions of Substance Use</b>"
fig = gen_chart(title, x_data, y_data, l_padding=260, width_mod=200)
general_figures.append(fig)
fig.show()

In [None]:
x_data = euphoria_likert.values.tolist()[7:11]
y_data = ['I believe the show depicted<br>substance abuse <b>accurately</b>.', 
          'The show was <b>educational</b> in regards<br>to substance abuse and helped me<br>understand the topic better.',
          'I believe the show would make others<br><b>more likely to reach out for help</b> if<br>they are struggling with substance use.',
          'The show portrayed substance<br>abuse treatments in a positive light.']
title = "<b>Impact of <i>Euphoria</i> on Substance Abuse Education and Awareness</b>"
fig = gen_chart(title, x_data, y_data, l_padding=300, width_mod=200)
general_figures.append(fig)
fig.show()

In [None]:
x_data = euphoria_likert.values.tolist()[7:11]
y_data = ['<b>I believe the show depicted<br>substance abuse accurately</b>.', 
          '<b>The show was <b>educational</b> in regards<br>to substance abuse and helped me<br>understand the topic better.</b>',
          '<b>I believe the show would make others<br><b>more likely to reach out for help</b> if<br>they are struggling with substance use.</b>',
          '<b>The show portrayed substance<br>abuse treatments in a positive light.</b>']
title = "<b>Impact of <i>Euphoria</i> on Substance Abuse Education and Awareness</b>"
fig2 = gen_diverging_chart(title, x_data, y_data, l_padding=250, width_mod=-300, split=.75)
# general_figures.append(fig)
fig2.write_image(f"images/summary/diverging_education_awareness.png", scale=5)
fig2.show()

In [None]:
x_data = euphoria_likert.values.tolist()[11:13]
y_data = ['The show positively affected<br>how I viewed romantic relationships.', 
          'I have more positive views of sex<br>after watching the show.']
title = "<b>Impact of <i>Euphoria</i> on Youth Perceptions of Relationships and Sex</b>"
fig = gen_chart(title, x_data, y_data, l_padding=280, width_mod=200)
general_figures.append(fig)
fig.show()

In [None]:
x_data = euphoria_likert.values.tolist()[13:16]
y_data = ["Overall, I believe that Nate and<br>Maddy's relationship showcases<br>signs of an unhealthy relationship.",
          "I think Nate and Maddy’s relationship<br>is desirable despite its pitfalls.",
          "The attractiveness of the actors<br>who play Nate and Maddy influenced<br>how I saw their relationship."]
title = "<b>Youth Perceptions on Nate and Maddy's Relationship in <i>Euphoria</i></b>"
fig = gen_chart(title, x_data, y_data, l_padding=280, width_mod=200)
general_figures.append(fig)
fig.write_image(f"images/summary/nate_maddy_smaller.png", scale=5)
fig.show()

In [None]:
x_data = euphoria_likert.values.tolist()[16:19]
y_data = ["<b>I believe Euphoria is informative of<br>the signs of teen dating violence.</b>",
          "<b>I believe Euphoria depicts teen<br>dating violence accurately.</b>",
          "<b>I believe that the show would make others<br>more likely to reach out for help if they<br>are struggling with teen dating violence.</b>"]
title = "<b>Impact of <i>Euphoria</i> on Youth Perceptions of Dating Violence</b>"
fig = gen_chart(title, x_data, y_data, l_padding=320, width_mod=200)
general_figures.append(fig)
# fig.show()
fig2 = gen_diverging_chart(title, x_data, y_data, l_padding=250, width_mod=-300, split=.82)
# general_figures.append(fig)
fig2.write_image(f"images/summary/diverging_education_dv.png", scale=5)
fig2.show()

In [None]:
if not os.path.exists('images/summary'):
    os.mkdir('images/summary')
for i, fig in enumerate(general_figures):
    fig.write_image(f"images/summary/{i}.png", scale=5)

In [None]:
x_data = euphoria_likert.values.tolist()
y_data = ['I enjoyed watching the show.',
          'I believe the TV show<br>depicted teenage life accurately.',
          'The show made<br>substances look fun.', 'The show made<br>substances look desirable.',
          'The cinematography of the show<br>influenced how I perceived drugs.',
          "The show's narrative made me<br>change how I experience and/or<br>perceive substance abuse.",
          "After watching the show, I felt<br>more inclined to experiment with<br>the usage of substances.",
          'I believe the show depicted<br>substance abuse accurately.', 
          'The show was educational<br>in regards to substance abuse and<br>helped me understand the topic better.',
          'I believe the show would make others<br>more likely to reach out for help if<br>they are struggling with substance use.',
          'The show portrayed substance<br>abuse treatments in a positive light.',
          'The show positively affected<br>how I viewed romantic relationships.', 
          'I have more positive views of sex<br>after watching the show.',
          "Overall, I believe that Nate and<br>Maddy's relationship showcases<br>signs of an unhealthy relationship.",
          "I think Nate and Maddy’s relationship<br>is desirable despite its pitfalls.",
          "The attractiveness of the actors<br>who play Nate and Maddy influenced<br>how I saw their relationship.",
          "I believe Euphoria is informative of<br>the signs of teen dating violence.",
          "I believe Euphoria depicts teen<br>dating violence accurately.",
          "I believe that the show would make others<br>more likely to reach out for help if they<br>are struggling with teen dating violence."]
title = "<b>Youth Responses to Questions About <i>Euphoria</i></b>"
all = gen_chart(title, x_data, y_data, l_padding=320, width_mod=200)
all.write_image(f"images/summary/all_euphoria_questions.png", scale=5)

In [None]:
general_likert = df[df['C'] == 'Yes'][general_questions].apply(pd.value_counts).transpose().apply(lambda x: x/sum(x), axis=1).rename(col_to_q)
x_data = general_likert.values.tolist()
y_data = [
    'It is acceptable to use substances.',
    'I have thought about using substances.',
    'Using substances is fun or desirable.',
    'I am aware of resources available<br>to those struggling with addiction.',
    'I am knowledgeable about<br>the danger of lacing drugs.',
    'I am knowledgeable about drug<br>harm reduction methods such as<br>narcan and test strips.',
    'I believe substance abuse<br>is common among teens.',
    'I am knowledgeable about consent<br>in romantic relationships.',
    'I am aware of resources available to<br>those facing teen dating violence.',
    'I am knowledgeable about healthy<br>attachment in relationships.',
    'I am more inclined to overlook<br>dating violence if the other party’s<br>physical appearance is attractive.',
    'I am aware of the signs of<br>an abusive relationship.',
    'Dating violence is only physical.'
]
title = "<b>Youth Responses to Questions About Substance Use & Dating Violence</b>"
gen = gen_chart(title, x_data, y_data, l_padding=300, width_mod=200)
# gen.show()
gen.write_image(f"images/summary/all_general_questions.png", scale=5)

# Pie Charts

In [None]:
df['Age'].value_counts().to_frame().sort_index().values[:,0]

In [None]:
colors = ['rgb(158,34,162)', 'rgb(198,156,234)', 'rgb(200, 192, 234)', 'rgb(114,133,225)', 'rgb(51, 51, 154)']
all_euphoria_colors = ['rgb(51, 51, 154)', 'rgb(114,133,225)', 'rgb(198,156,234)', 'rgb(178,99,210)', 'rgb(158,34,162)', 'rgb(112,48,154)', 'rgb(86,24,96)']

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(
    hole=0.5,
    sort=False,
    values=df['Age'].value_counts().to_frame().sort_index().values[:,0],
    labels=df['Age'].value_counts().to_frame().sort_index().index.to_list(),
    texttemplate = '<b>%{percent}</b>',
    marker=dict(
                  colors=all_euphoria_colors,
                  line=dict(color='white', width=2),
              ),
   textfont=dict(family="Proxima Nova", size=30, color='white'),
   textposition='inside'
    # labels=,
    # textinfo='label',
))
fig.update_layout(
  # title=dict(
  #   text="<b>Age Breakdown</b>",
  #   font=dict(family="Proxima Nova", size=50, color=colors[2]),
  # ),
  font=dict(family="PT Sans", size=25, color='white'),
  paper_bgcolor='rgba(0,0,0,0)',
  plot_bgcolor='rgba(0,0,0,0)',
  width=600,
  height=600
)

if not os.path.exists("images/pie_charts"):
  os.mkdir("images/pie_charts")
fig.write_image(f"images/pie_charts/age_pie.png", scale=5)

fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(
    hole=0.4,
    sort=False,
    values=df['Gender'].value_counts().to_frame().sort_index().values[:,0],
    labels=df['Gender'].value_counts().to_frame().sort_index().index.to_list(),
    texttemplate = '%{label}<br><b>%{percent}</b>',
    marker=dict(
                  colors=all_euphoria_colors,
                  line=dict(color='white', width=2),
              ),
   textfont=dict(family="Proxima Nova", size=20, color='white'),
   textposition='auto'
    # labels=,
    # textinfo='label',
))
fig.update_layout(
  # title=dict(
  #   text="<b>Gender Breakdown</b>",
  #   font=dict(family="Proxima Nova", size=50, color=colors[2]),
  # ),
  font=dict(family="PT Sans", size=15, color='white'),
  paper_bgcolor='rgba(0,0,0,0)',
  plot_bgcolor='rgba(0,0,0,0)',
  width=600,
  height=600,
  showlegend=False
)

if not os.path.exists("images/pie_charts"):
  os.mkdir("images/pie_charts")
fig.write_image(f"images/pie_charts/gender_pie.png", scale=5)

fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(
    hole=0,
    sort=False,
    values=df['B'].value_counts().to_frame().sort_index().values[:,0],
    labels=df['B'].value_counts().to_frame().sort_index().index.to_list(),
    texttemplate = '%{label}<br><b>%{percent}</b>',
    marker=dict(
                  colors=[colors[0], colors[4]],
                  line=dict(color='white', width=2),
              ),
    pull=[0, .1]
    # labels=,
    # textinfo='label',
))
fig.update_layout(
  title=dict(
    text="<b>Have you ever experienced<br>dating violence?</b>",
    font=dict(family="Proxima Nova", size=25, color=colors[2]),
  ),
  showlegend=False,
  font=dict(family="Proxima Nova", size=25, color='white'),
  paper_bgcolor='rgba(0,0,0,0)',
  plot_bgcolor='rgba(0,0,0,0)',
  width=500,
  height=500,
  legend=dict(
    font=dict(family="Proxima Nova", size=25, color='white'),
    traceorder='reversed'
  )
)

if not os.path.exists("images/pie_charts"):
  os.mkdir("images/pie_charts")
fig.write_image(f"images/pie_charts/dv_pie.png", scale=5)

fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(
    hole=0,
    sort=False,
    values=df['A'].value_counts().to_frame().sort_index().values[:,0],
    labels=df['A'].value_counts().to_frame().sort_index().index.to_list(),
    texttemplate = '%{label}<br><b>%{percent}</b>',
    marker=dict(
                  colors=[colors[0], colors[4]],
                  line=dict(color='white', width=2),
              ),
    pull=[0, .05]
    # labels=,
    # textinfo='label',
))
fig.update_layout(
  title=dict(
    text="<b>Have you ever used drugs<br>and/or alcohol?</b>",
    font=dict(family="Proxima Nova", size=25, color=colors[2]),
  ),
  showlegend=False,
  legend=dict(
    traceorder='reversed'
  ),
  font=dict(family="Proxima Nova", size=25, color='white'),
  paper_bgcolor='rgba(0,0,0,0)',
  plot_bgcolor='rgba(0,0,0,0)',
  width=500,
  height=500
)

if not os.path.exists("images/pie_charts"):
  os.mkdir("images/pie_charts")
fig.write_image(f"images/pie_charts/drugs_pie.png", scale=5)

fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(
    hole=0,
    sort=False,
    values=df['C'].value_counts().to_frame().sort_index().values[:,0],
    labels=df['C'].value_counts().to_frame().sort_index().index.to_list(),
    texttemplate = '%{label}<br><b>%{percent}</b>',
    marker=dict(
                  colors=[colors[0], colors[4]],
                  line=dict(color='white', width=2),
              ),
    pull=[0, .05]
    # labels=,
    # textinfo='label',
))
fig.update_layout(
  # title=dict(
  #   text="<b>Have you watched <i>Euphoria</i>?</b>",
  #   font=dict(family="Proxima Nova", size=25, color=colors[2]),
  # ),
  showlegend=False,
  legend=dict(
    traceorder='reversed',
    font=dict(family="PT Sans", size=20, color='white'),
  ),
  font=dict(family="Proxima Nova", size=30, color='white'),
  paper_bgcolor='rgba(0,0,0,0)',
  plot_bgcolor='rgba(0,0,0,0)',
  width=500,
  height=500
)

if not os.path.exists("images/pie_charts"):
  os.mkdir("images/pie_charts")
fig.write_image(f"images/pie_charts/watched_pie.png", scale=5)

fig.show()

In [None]:
import plotly.express as px
curr_df = df.sort_values('24')
fig = px.sunburst(curr_df, path=['C', 'B'], 
                  values=[1] * len(curr_df['C']), color='24')
fig.show()

In [None]:
df_copy = df[['C', 'B']]
df_copy['C'] = df_copy['C'].replace({"Yes": "Watched", "No": "Didn't Watch"})
fig = px.sunburst(df_copy, path=['C', 'B'], 
                  values=[1] * len(df_copy['C']), color='C',
                  color_discrete_map={
                      "Watched": colors[0],
                      "Didn't Watch": colors[4]
                  })
fig.show()

In [None]:
inner_values = df['C'].value_counts().to_frame().values[:,0].tolist() # yes, no order
yes_values = df[df['C'] == "Yes"]['A'].value_counts().to_frame().values[:,0].tolist()
no_values = df[df['C'] == "No"]['A'].value_counts().to_frame().values[:,0][::-1].tolist()

In [None]:
np.array(inner_values + yes_values + no_values) / 62

In [None]:
np.around(np.array(inner_values + yes_values + no_values) * 100 / 62, 1)

In [None]:
np.around(
    (inner_values / 62 * 100).tolist() + 
    (yes_values / 34 * 100).tolist() + 
    (no_values / 28 * 100).tolist()
, 1)

In [None]:
inner_values.tolist() + yes_values.tolist() + no_values.tolist()

In [None]:
inner_values = df['C'].value_counts().to_frame().values[:,0] # yes, no order
yes_values = df[df['C'] == "Yes"]['A'].value_counts().to_frame().values[:,0]
no_values = df[df['C'] == "No"]['A'].value_counts().to_frame().values[:,0][::-1]
values = inner_values.tolist() + yes_values.tolist() + no_values.tolist()
percents = np.around(
    (inner_values / 62 * 100).tolist() + 
    (yes_values / 34 * 100).tolist() + 
    (no_values / 28 * 100).tolist()
, 0)
text = ["Watched", "Not<br>Watched", "Yes", "No", "Yes", "No"]
fig = go.Figure(
    go.Sunburst(
        labels=["Watched Euphoria", "Didn't Watch<br>Euphoria", "Yes1", "No1", "Yes2", "No2"],
        parents=["", "", "Watched Euphoria", "Watched Euphoria", "Didn't Watch<br>Euphoria", "Didn't Watch<br>Euphoria"],
        values=values,
        branchvalues='total',
        marker=dict(
            colors=[colors[4], colors[0]],
            line=dict(color='white', width=4)
        ),
        textfont=dict(
            family="Proxima Nova",
            size=25
        ),
        texttemplate=[f'{t}<br><b>{str(p)[:-2]}%</b>' for t, p in zip(text, percents)],
        rotation=90,
        insidetextorientation="horizontal",
        domain=dict(x=[0,1])
    ),
    layout=dict(
        width=700,
        height=700,
        paper_bgcolor='#151E27',
        plot_bgcolor='#151E27',
    )
)
# fig.update_traces(labels=['',] * len(fig.data[0]['labels']))
fig.show()
fig.write_image("test.png", scale=5)

In [None]:
inner_values = df['C'].value_counts().to_frame().values[:,0] # yes, no order
yes_values = df[df['C'] == "Yes"]['B'].value_counts().to_frame().values[:,0][::-1]
no_values = df[df['C'] == "No"]['B'].value_counts().to_frame().values[:,0][::-1]
values = inner_values.tolist() + yes_values.tolist() + no_values.tolist()
percents = np.around(
    (inner_values / 62 * 100).tolist() + 
    (yes_values / 34 * 100).tolist() + 
    (no_values / 28 * 100).tolist()
, 0)
text = ["Watched", "Not<br>Watched", "Yes", "No", "Yes", "No"]
fig = go.Figure(
    go.Sunburst(
        labels=["Watched Euphoria", "Didn't Watch<br>Euphoria", "Yes1", "No1", "Yes2", "No2"],
        parents=["", "", "Watched Euphoria", "Watched Euphoria", "Didn't Watch<br>Euphoria", "Didn't Watch<br>Euphoria"],
        values=values,
        branchvalues='total',
        marker=dict(
            colors=[colors[4], colors[0]],
            line=dict(color='white', width=4)
        ),
        textfont=dict(
            family="Proxima Nova",
            size=25
        ),
        texttemplate=[f'{t}<br><b>{str(p)[:-2]}%</b>' for t, p in zip(text, percents)],
        rotation=90,
        insidetextorientation="horizontal",
        domain=dict(x=[0,1])
    ),
    layout=dict(
        width=700,
        height=700,
        paper_bgcolor='#151E27',
        plot_bgcolor='#151E27',
    )
)
# fig.update_traces(labels=['',] * len(fig.data[0]['labels']))
fig.show()
fig.write_image("test.png", scale=5)

In [None]:
new_df = df.replace({
    1: "Strongly<br>Disagree",
    2: "Disagree",
    3: "Neutral",
    4: "Agree",
    5: "Strongly<br>Agree"
})
new_df.head()

In [None]:
len(new_df['22'])

# Histograms

In [None]:
def format_title(title):
    title_lines = []
    if len(title) > 60:
      title_words = title.strip().split(" ")
      curr_line = ""
      for j in range(len(title_words)):  
        if len(curr_line) + len(title_words[j]) < 60:
          curr_line += title_words[j] + " " 
        else:
          title_lines.append(curr_line)
          curr_line = title_words[j] + " "
        if j == len(title_words) - 1:
            title_lines.append(curr_line)
      title = "<br>".join(title_lines)
    else:
      title_lines = [title]
    return f'<b>{title}</b>', len(title_lines)

In [None]:
just_counts = df[all_questions].apply(pd.value_counts)

In [None]:
just_percents = df[all_questions].apply(pd.value_counts).apply(lambda x: np.round(x/sum(x)*100, 1), axis=0)

In [None]:
if not os.path.exists("images/histograms"):
  os.mkdir("images/histograms")

In [None]:
[f'<b>{i}</b> ({j}%)' for i, j in zip(just_counts['7'].to_list(), just_percents['7'].to_list())]

In [None]:
categories = ['Strongly<br>Disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly<br>Agree']

for col in ['7']:
    new_df = new_df.sort_values(col)
    text = [f'<b>{i}</b> ({j}%)' for i, j in zip(just_counts[col].to_list(), just_percents[col].to_list())]
    title, pad_mod = format_title(col_to_q[col])

    fig = go.Figure(
        go.Histogram(
            x=new_df[col],
            marker=dict(color=colors),
            text=text,
            textfont=dict(color='white', family='PT Sans'),
            textposition='auto'
        )
    )

    fig.update_layout(
    title=dict(
        text=title,
        font=dict(family="Proxima Nova", size=25, color=colors[2]),
    ),
    xaxis=dict(
        categoryorder='array',
        categoryarray=['Strongly<br>Disagree', 'Disagree', 'Neutral', 'Agree', 'Strongly<br>Agree'],
        range=[-.5, 4.5]
    ),
    yaxis=dict(
        linecolor='#151E27',
        gridcolor='rgb(210,210,210)'
    ),
    font=dict(family="Proxima Nova", size=15, color='white'),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    margin=dict(t=60*pad_mod),
    width=800, #normally 600
    # height=700,
    height=300 + 60*pad_mod,
    bargap=.1
    )
    fig.write_image(f"images/histograms/{col}.png", scale=5)
    fig.show()