In [44]:
import pandas as pd
import numpy as np
import plotly.express as px
from dateparser import parse

In [45]:
column_names = ["timestamp", "learning_stage", "started_learning", "tools", "scoring_time", "lack_of_software", "desire_to_use", "adt_desirable", "knows_adt_apps", "adt_example"]
responses_df = pd.read_excel("responses.xlsx", header=0, names=column_names)
responses_df

Unnamed: 0,timestamp,learning_stage,started_learning,tools,scoring_time,lack_of_software,desire_to_use,adt_desirable,knows_adt_apps,adt_example
0,2023-04-18 18:50:53.390,1,3 месяца,Не расписываю,1,Да,Да,Да,Нет,
1,2023-04-18 18:52:28.978,2,3 месяца,Софтом для создания табулатур в формате GuitarPro,3,Да,Нет,Да,Нет,
2,2023-04-18 18:55:00.704,1,3 месяца,Ручкой и бумагой,4,Не задумывался (-ась),Да,Да,Нет,
3,2023-04-18 19:01:22.496,1,2 месяца,Не расписываю,1,Не задумывался (-ась),Да,Да,Нет,
4,2023-04-18 19:10:13.247,1,3 месяца,Ручкой и бумагой,5,Не задумывался (-ась),Да,Да,Нет,
...,...,...,...,...,...,...,...,...,...,...
257,2023-04-21 01:27:03.000,2,7 месяцев,Ручкой и бумагой,3,Не задумывался (-ась),Да,Да,Нет,
258,2023-04-21 06:21:20.000,Преподаватель,20 лет,Ручкой и бумагой,2,Да,Да,Да,Нет,
259,2023-04-21 07:58:15.000,2,Полтора года,Ручкой и бумагой,2,Не задумывался (-ась),Да,Да,"Да, не пользовался (-ась)",Groove scribe
260,2023-04-21 10:14:27.000,Преподаватель,6 лет,"Ручкой и бумагой, Софтом для создания табулату...",4,Да,Да,Нет,Нет,


In [46]:
def parse_yn(value):
    if value == "Да":
        return "Yes"
    elif value == "Нет":
        return "No"
    elif value == "Не задумывался (-ась)":
        return "Did not consider"
    elif value == "Да, пользовался (-ась)":
        return "Yes, did use"
    elif value == "Да, не пользовался (-ась)":
        return "Yes, did not use"
    else:
        return None
    
def translate_value(value):
    if value == "Ручкой и бумагой":
        return "Pen and paper"
    elif value == "Софтом для создания табулатур в формате GuitarPro":
        return "GuitarPro"
    elif value == "Цифровая звуковая рабочая станция (FL Studio Ableton и т.д.)":
        return "DAW"
    elif value == "Не расписываю":
        return "Does not transcribe"
    elif value == "Другое":
        return "Other"
    else:
        return value
    
def parse_stage(value):
    if value == "Преподаватель":
        return "Instructor"
    elif value == 1:
        return "First stage"
    elif value == 2:
        return "Second stage"
    elif value == 3:
        return "Third stage"
    elif value == 4:
        return "Fourth stage"
    elif value == 5:
        return "Fifth stage"
    elif value == "Самоучка":
        return "Self-taught"
    elif value == "Индивидуальные занятия":
        return "Personal tutoring"
    else:
        return "Other"
    
group_mapping = {
    'First stage': 'Students',
    'Second stage': 'Students',
    'Third stage': 'Students',
    'Fourth stage': 'Students',
    'Fifth stage': 'Students',
    'Instructor': 'Instructors'
}

In [47]:
delta_df = responses_df.copy()
delta_df['started_learning'] = delta_df['started_learning'].apply(lambda x: parse(x))
delta_df['lack_of_software'] = delta_df['lack_of_software'].apply(lambda x: parse_yn(x))
delta_df['desire_to_use'] = delta_df['desire_to_use'].apply(lambda x: parse_yn(x))
delta_df['adt_desirable'] = delta_df['adt_desirable'].apply(lambda x: parse_yn(x))
delta_df['knows_adt_apps'] = delta_df['knows_adt_apps'].apply(lambda x: parse_yn(x))
delta_df['learning_stage'] = delta_df['learning_stage'].apply(lambda x: parse_stage(x))
delta_df["tools"] = delta_df["tools"].str.split(", ")
delta_df["tools"] = delta_df["tools"].apply(lambda x: [translate_value(v) for v in x])
delta_df['group'] = delta_df['learning_stage'].map(group_mapping).fillna('Other')
delta_df

Unnamed: 0,timestamp,learning_stage,started_learning,tools,scoring_time,lack_of_software,desire_to_use,adt_desirable,knows_adt_apps,adt_example,group
0,2023-04-18 18:50:53.390,First stage,2023-01-21 11:49:00.661640,[Does not transcribe],1,Yes,Yes,Yes,No,,Students
1,2023-04-18 18:52:28.978,Second stage,2023-01-21 11:49:00.664632,[GuitarPro],3,Yes,No,Yes,No,,Students
2,2023-04-18 18:55:00.704,First stage,2023-01-21 11:49:00.667623,[Pen and paper],4,Did not consider,Yes,Yes,No,,Students
3,2023-04-18 19:01:22.496,First stage,2023-02-21 11:49:00.669618,[Does not transcribe],1,Did not consider,Yes,Yes,No,,Students
4,2023-04-18 19:10:13.247,First stage,2023-01-21 11:49:00.672611,[Pen and paper],5,Did not consider,Yes,Yes,No,,Students
...,...,...,...,...,...,...,...,...,...,...,...
257,2023-04-21 01:27:03.000,Second stage,2022-09-21 11:49:01.310419,[Pen and paper],3,Did not consider,Yes,Yes,No,,Students
258,2023-04-21 06:21:20.000,Instructor,2003-04-21 11:49:01.311417,[Pen and paper],2,Yes,Yes,Yes,No,,Instructors
259,2023-04-21 07:58:15.000,Second stage,2021-10-21 11:49:01.314408,[Pen and paper],2,Did not consider,Yes,Yes,"Yes, did not use",Groove scribe,Students
260,2023-04-21 10:14:27.000,Instructor,2017-04-21 11:49:01.315438,"[Pen and paper, GuitarPro]",4,Yes,Yes,No,No,,Instructors


In [48]:
import plotly.io as pio

fig = px.sunburst(delta_df, path=['group', 'learning_stage'])
fig.update_layout(
    margin=dict(t=30, l=0, r=0, b=0),
    sunburstcolorway=['#636efa', '#EF553B', '#00cc96', '#ab63fa', '#19d3f3', '#FFA15A', '#FF6692', '#B6E880', '#FF97FF', '#FECB52']
)
fig.update_traces(
    textinfo="label+percent root",
    hovertemplate="%{label}<br>%{percentRoot:.2f}%<br>Count: %{value}"
)
fig.show()
pio.write_image(fig, 'grouped_pie_chart.png', width=400, height=400, scale=3)

In [49]:
df = delta_df.explode('tools')
df

Unnamed: 0,timestamp,learning_stage,started_learning,tools,scoring_time,lack_of_software,desire_to_use,adt_desirable,knows_adt_apps,adt_example,group
0,2023-04-18 18:50:53.390,First stage,2023-01-21 11:49:00.661640,Does not transcribe,1,Yes,Yes,Yes,No,,Students
1,2023-04-18 18:52:28.978,Second stage,2023-01-21 11:49:00.664632,GuitarPro,3,Yes,No,Yes,No,,Students
2,2023-04-18 18:55:00.704,First stage,2023-01-21 11:49:00.667623,Pen and paper,4,Did not consider,Yes,Yes,No,,Students
3,2023-04-18 19:01:22.496,First stage,2023-02-21 11:49:00.669618,Does not transcribe,1,Did not consider,Yes,Yes,No,,Students
4,2023-04-18 19:10:13.247,First stage,2023-01-21 11:49:00.672611,Pen and paper,5,Did not consider,Yes,Yes,No,,Students
...,...,...,...,...,...,...,...,...,...,...,...
258,2023-04-21 06:21:20.000,Instructor,2003-04-21 11:49:01.311417,Pen and paper,2,Yes,Yes,Yes,No,,Instructors
259,2023-04-21 07:58:15.000,Second stage,2021-10-21 11:49:01.314408,Pen and paper,2,Did not consider,Yes,Yes,"Yes, did not use",Groove scribe,Students
260,2023-04-21 10:14:27.000,Instructor,2017-04-21 11:49:01.315438,Pen and paper,4,Yes,Yes,No,No,,Instructors
260,2023-04-21 10:14:27.000,Instructor,2017-04-21 11:49:01.315438,GuitarPro,4,Yes,Yes,No,No,,Instructors


In [50]:
# create a new DataFrame with a row for each tool and time estimation combination
tool_counts = pd.DataFrame(
    [(t, time) for t_list, time in zip(delta_df['tools'], delta_df['scoring_time']) for t in t_list],
    columns=['tools', 'scoring_time']
)

# create a grouped bar chart
fig = px.histogram(tool_counts, x='scoring_time', color='tools', barmode='group')

# customize the layout
fig.update_layout(
    title='Scoring Method and Personal Time Estimation',
    xaxis_title='Time Estimation',
    yaxis_title='Number of Responses',
    legend_title='Scoring Method'
)

fig.update_xaxes(
    tickvals=['1', '2', '3', '4', '5'],
    ticktext=['Very short time', 'Short time', 'Medium time', 'Long time', 'Very long time']
)

# show the chart
fig.show()
pio.write_image(fig, 'histogram.png', width=800, height=400, scale=3)

In [18]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

data = {"type": "pie"}

rows = 1
cols = 2

pie_data = [[data for j in range(cols)] for i in range(rows)]

fig = make_subplots(rows=rows, cols=cols, specs=pie_data)
row = 'lack_of_software'
#lack_of_software	desire_to_use	adt_desirable	knows_adt_apps

for i, group in enumerate(["Students", "Instructors"]):
    fig.add_trace(
        go.Pie(labels=df[df['group']==group][row], title=group),
        row=1, col=i+1, 
    )

fig.update_layout(
    title='Do you feel a lack of available drum transcription software?',
    legend_title='Responses'
)       

fig.show(width=800, height=400)
pio.write_image(fig, 'piess.png', width=800, height=500, scale=1)

In [25]:
grouped_df = df.groupby(['tools', 'group']).size().reset_index(name='count')

# Plot the stacked bar chart
fig = px.bar(grouped_df, x="tools", y="count", color="group", barmode="stack")

# Add axis labels and title
fig.update_layout(xaxis_title="Learning Stage", yaxis_title="Number of Responses",
                  title="Distribution of Scoring Methods by Group",
                      xaxis=dict(
                    categoryorder='array',
                    categoryarray=['Does not transcribe', 'DAW', 'GuitarPro', "Pen and paper"]))
# Show the chart
fig.show()
pio.write_image(fig, 'stacked.png', width=800, height=400, scale=3)

In [51]:
# create a dataframe with the count of tools for each group
tool_counts = df.groupby(['group', 'tools']).size().reset_index(name='count')

# create the plot
fig = px.bar(tool_counts, x='tools', y='count', color='group', barmode='group', 
             title='Popularity of Tools by Group',
             category_orders={'group': ["Students", "Instructors", "Other"]})

# update the x-axis labels
fig.update_xaxes(title_text='Scoring Methods')

fig.update_layout(
    legend_title='Groups',
                          xaxis=dict(
                    categoryorder='array',
                    categoryarray=['Does not transcribe', 'DAW', 'GuitarPro', "Pen and paper"])
)  
# update the y-axis labels
fig.update_yaxes(title_text='Number of Responses')

fig.show()
pio.write_image(fig, 'barz2.png', width=800, height=400, scale=3)

In [52]:
# create a dataframe with the count of tools for each group
tool_counts = df.groupby(['group', 'lack_of_software']).size().reset_index(name='count')

# create the plot
fig = px.bar(tool_counts, x='lack_of_software', y='count', color='group', barmode='group', 
             title='Have you felt a lack of dedicated drum transcription software?',
             category_orders={'group': ["Students", "Instructors", "Other"]})

# update the x-axis labels
fig.update_xaxes(title_text='Response')

fig.update_layout(
    legend_title='Groups'
)  
# update the y-axis labels
fig.update_yaxes(title_text='Number of Responses')

fig.show()
pio.write_image(fig, 'barz3.png', width=800, height=400, scale=3)

In [53]:
# create a dataframe with the count of tools for each group
tool_counts = df.groupby(['lack_of_software', 'tools']).size().reset_index(name='count')

# create the plot
fig = px.bar(tool_counts, x='tools', y='count', color='lack_of_software', barmode='group', 
             title='Have you felt a lack of dedicated drum transcription software?')

# update the x-axis labels
fig.update_xaxes(title_text='Scoring Methods')

fig.update_layout(
    legend_title='Responses',
                          xaxis=dict(
                    categoryorder='array',
                    categoryarray=['Does not transcribe', 'DAW', 'GuitarPro', "Pen and paper"])
)  
# update the y-axis labels
fig.update_yaxes(title_text='Number of Responses')

fig.show()
pio.write_image(fig, 'barz.png', width=800, height=400, scale=3)

In [70]:
# create a dataframe with the count of tools for each group
tool_counts = df.groupby(['group', 'adt_desirable']).size().reset_index(name='count')

# create the plot
fig = px.bar(tool_counts, x='adt_desirable', y='count', color='group', barmode='group', 
             title='Would you use software specifically created for drum transcription?',
             category_orders={'learning_stage': ["First stage", "Second stage","Third stage","Fourth stage","Fifth stage","Instructor"]})

# update the x-axis labels
fig.update_xaxes(title_text='Answer')

fig.update_layout(
    legend_title='Groups')
  
# update the y-axis labels
fig.update_yaxes(title_text='Number of Responses')

fig.show()
pio.write_image(fig, 'barz2.png', width=800, height=400, scale=3)

In [77]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

data = {"type": "pie"}

rows = 1
cols = 3

pie_data = [[data for j in range(cols)] for i in range(rows)]

fig = make_subplots(rows=rows, cols=cols, specs=pie_data)
row = 'adt_desirable'
#lack_of_software	desire_to_use	adt_desirable	knows_adt_apps

for i, group in enumerate(["Students", "Instructors", "Other"]):
    fig.add_trace(
        go.Pie(labels=df[(df['group']==group) | (df['tools']=="GuitarPro")][row], title=group),
        row=1, col=i+1, 
    )

fig.update_layout(
    title='Would you show increased interest in that application given ADT functionality?',
    legend_title='Responses'
)       

fig.show(width=800, height=400)
pio.write_image(fig, 'piess.png', width=800, height=500, scale=1)