In [1]:
# code snippet below from @lrowe: https://github.com/jtpio/jupyterlite/issues/110#issuecomment-850916083
import micropip
# Work around https://github.com/pyodide/pyodide/issues/1614 which is now fixed in pyodide
await micropip.install('Jinja2')
#micropip.PACKAGE_MANAGER.builtin_packages['jinja2'] = micropip.PACKAGE_MANAGER.builtin_packages['Jinja2']
# Last version of jsonschema before it added the pyrsistent dependency (native code, no wheel)
await micropip.install("jsonschema")
# await micropip.install("https://files.pythonhosted.org/packages/77/de/47e35a97b2b05c2fadbec67d44cfcdcd09b8086951b331d82de90d2912da/jsonschema-2.6.0-py2.py3-none-any.whl")
await micropip.install("altair")

In [2]:
import altair as alt
import numpy as np
import pandas as pd
from js import fetch
from urllib.request import urlopen
import math



In [3]:
URL= 'https://raw.githubusercontent.com/qcitizen/sample-data/main/samples_daily_data.csv'

results = await fetch(URL)
text = await results.text()

filename = 'daily_data_clean_0618.csv'

with open(filename, 'w') as f:
    f.write(text)

data = pd.read_csv(filename, sep= ',')

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 90 entries, 0 to 89
Data columns (total 36 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   level_0          90 non-null     int64  
 1   participantnum   90 non-null     object 
 2   participantid    90 non-null     object 
 3   responseid       90 non-null     object 
 4   scoring          90 non-null     object 
 5   mdyest           90 non-null     bool   
 6   whatsubstyest    1 non-null      object 
 7   othersubstyest   90 non-null     bool   
 8   lsddosageyest    0 non-null      float64
 9   psildosageyest   1 non-null      object 
 10  otherdosageyest  90 non-null     bool   
 11  preparedprodyes  1 non-null      object 
 12  stack            1 non-null      object 
 13  stackedother     90 non-null     bool   
 14  mdtoday          90 non-null     bool   
 15  whatsubsttod     66 non-null     object 
 16  othersubsttod    90 non-null     bool   
 17  psildosagetod    6

In [5]:
columns_to_recode = ['connectedq','contemplativeq','creativeq','focusq']

for col in columns_to_recode:
    data[col] = data[col].replace([0,1,2,3,4], [1,2,3,4,5])

In [6]:
dependent_measures = ['connectedq','contemplativeq','creativeq','focusq','productiveq','wellbeingq','sleepq','dreamsq']

In [7]:
days_md_df = data[['Task_Days','mdtoday']]
days_md_df.to_csv('days_md_df.csv')

In [8]:
for i, row in data.iterrows():
    if row['mdtoday'] == True:
        cell = 'Yes'
    elif row['mdtoday'] == False:
        cell = 'No'
    else:
        cell = np.nan
    data.at[i, 'Microdosing'] = cell

In [9]:
md_col_df = pd.DataFrame(data["Microdosing"])

In [10]:
data['dreamsq'].value_counts()

False    79
True     11
Name: dreamsq, dtype: int64

In [11]:
#To use for columns that are not encoded as bool
# answer_list = [True, False,"prefernotsay",'']
# convert_list=[1,-1,0,0]
# n2s = dict(zip(answer_list, convert_list))

# data['dreamsq'] = data['dreamsq'].replace(n2s)

In [12]:
data['dreamsq'] = data['dreamsq'].replace([True, False], [1, 0])

# Daily Data

## Visualizations with no missing data

In [13]:
values = []
for measure in dependent_measures:
    values.append({
        'Measure': measure,
        'Overall Mean': round(data[measure].mean(),2),
        'Microdosing Days Mean': round(data[data['mdtoday'] == True][measure].mean(),2),
        'Non-microdosing Days Mean': round(data[data['mdtoday'] == False][measure].mean(),2),
        'Median': data[measure].median(),
        'Missing Values (%)': ((data[measure].isna().sum())/len(data)) * 100,
        'Number of Responses': data[measure].notnull().sum(),
        'Maximal Value': data[measure].max()
    })
summary_df = pd.DataFrame(values)    
summary_df

Unnamed: 0,Measure,Overall Mean,Microdosing Days Mean,Non-microdosing Days Mean,Median,Missing Values (%),Number of Responses,Maximal Value
0,connectedq,4.67,4.61,4.83,5.0,0.0,90,5
1,contemplativeq,4.52,4.48,4.62,5.0,0.0,90,5
2,creativeq,4.13,4.03,4.42,4.0,0.0,90,5
3,focusq,4.02,3.98,4.12,4.0,0.0,90,5
4,productiveq,3.47,3.36,3.75,4.0,0.0,90,5
5,wellbeingq,3.58,3.55,3.67,4.0,0.0,90,5
6,sleepq,3.79,3.79,3.79,4.0,0.0,90,5
7,dreamsq,0.12,0.11,0.17,0.0,0.0,90,1


In [14]:
sub_df = data[data['Microdosing'] == 'Yes']

alt.Chart(sub_df).mark_line(point=True).encode(
    x='Task_Days',
    y='psildosagetod'
).properties(
    width=900,
    title= 'Microdosing Days and Doses'
)

In [15]:
print("Stacking substance and number of times reported:", data['stack'].value_counts())

Stacking substance and number of times reported: lionsmane, niacin    1
Name: stack, dtype: int64


In [16]:
a = alt.Chart(data).mark_circle(size=80).encode(
            x='Task_Days:N',
            y=alt.Y('dreamsq:Q', scale=alt.Scale(domain=(0,1)), axis=alt.Axis(values=[0,1], ticks=True, labels=True, grid=False)),
            color=alt.Color('Microdosing', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
        ).properties(
            width=900,
            height=100
        )
b = alt.Chart(data).mark_line(point=False, color='black',opacity=0.3).encode(
        x='Task_Days:N',
        y='dreamsq:Q'
    ).properties(
        width=900,
        height=100,
        title= ' Days with Vivid Dreams'
    )

a

In [17]:
dependent_measures2 = ['connectedq','contemplativeq','creativeq','focusq','productiveq','wellbeingq','sleepq']

In [18]:
charts = []
titles = ['Connectedness to Nature', 'Contemplation', 'Creativity', 'Focus', 'Productivity', 'Well-being', 'Sleep Quality']

for title, measure in zip(titles, dependent_measures2):
    row = []
    for col in range(1, 2):
        a = alt.Chart(data).mark_circle(size=80).encode(
            x='Task_Days:N',
            y=alt.Y(measure, scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
            color=alt.Color('Microdosing', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
        ).properties(
            width=900,
            title= title
        )
        b = alt.Chart(data).mark_line(point=False, color='black',opacity=0.3).encode(
            x='Task_Days:N',
            y=measure
        ).properties(
            width=900,
        )
        row.append(a+b)
    charts.append(alt.hconcat(*row))
alt.vconcat(*charts)

In [19]:
protocol = 1
for i, row in data.iterrows():
    if i == 0:
        protocol_end = False
    else:
        prev_row = data.iloc[i - 1]
        protocol_end = prev_row.mdtoday == False and row.mdtoday == True
        
    if protocol_end:
        protocol += 1
        
    data.at[i, "Protocol"] = f"Cluster {protocol}"

In [20]:
charts = []
for row_nr in range(1,5):
    row = []
    for col in range(1, 5):
        subplot_data = data[data.Protocol == f"Cluster {(row_nr - 1) * 4 + col}"]
        
        if subplot_data.shape[0] == 1:
            break
            
        a = alt.Chart(subplot_data).mark_circle(size=80).encode(
            x='Task_Days:N',
            y=alt.Y('creativeq:Q',scale=alt.Scale(domain=(1,5))),
            color=alt.Color('mdtoday',
                   scale=alt.Scale(
            domain=[False, True],
            range=['orange', 'navy']))
        ).properties(
            width=200,
            height=200,
        )
        b = alt.Chart(subplot_data).mark_line(point=False, color='black',opacity=0.3).encode(
            x='Task_Days:N',
            y='creativeq:Q'
        ).properties(
            width=200,
            height=200,
        )
        row.append(a+b)
    charts.append(alt.hconcat(*row))
alt.vconcat(*charts)

In [21]:
for window in range(2,8):
    for i, row in data.iterrows():
        if i+1 <= window:
            start_index = 0
        else:
            start_index = i + 1 - window

        rows_in_window = data[start_index:i+1]
        try:
            md_in_window = rows_in_window['Microdosing'].value_counts()['Yes']
        except KeyError:
            md_in_window = 0
        data.at[i, f'Microdosing_in_last_{window}_days (%)'] = round(md_in_window / window, 2)
    

In [22]:
for window in range(2,8):
    for i, row in data.iterrows():
        if i+1 <= window:
            start_index = 0
        else:
            start_index = i + 1 - window

        rows_in_window = data[start_index:i+1]
        try:
            md_in_window = rows_in_window['Microdosing'].value_counts()['Yes']
        except KeyError:
            md_in_window = 0
        data.at[i, f'Num_microdosing_days_in_last_{window}_days'] = md_in_window
    

In [23]:
alt.Chart(data).mark_circle(color='navy',size=200, opacity=0.35).encode(
    alt.X('Microdosing_in_last_7_days (%):Q', 
        axis=alt.Axis(format='%'),
    ),
    alt.Y('creativeq:Q', scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
)

In [24]:
data.to_csv('N_of_1_data_0729.csv', index=False)

In [25]:
main = []
for window in range(2,8):   
    stripplot = alt.Chart(data, width=140).mark_circle(size=50, opacity=0.35).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False,),
            scale=alt.Scale(),
        ),
        y=alt.Y('creativeq:Q', scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
        color=alt.Color(f'Microdosing_in_last_{window}_days (%):N', legend=None),
        column=alt.Column(
            f'Microdosing_in_last_{window}_days (%):N',
            header=alt.Header(
                labelAngle=0,
                titleOrient='top',
                labelOrient='bottom',
                labelAlign='center',
                labelPadding=323,
            ),
        ),
    ).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
    ).configure_facet(
        spacing=0
    ).configure_view(
        stroke=None
    )
    main.append(stripplot)
main[5]

In [26]:
main = []
for window in range(2,8):   
    stripplot = alt.Chart(data, width=140).mark_circle(size=50, opacity=0.35).encode(
        x=alt.X(
            'jitter:Q',
            title=None,
            axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
            scale=alt.Scale(),
        ),
        y=alt.Y('creativeq:Q', scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
        color=alt.Color(f'Num_microdosing_days_in_last_{window}_days:N', legend=None),
        column=alt.Column(
            f'Num_microdosing_days_in_last_{window}_days:N',
            header=alt.Header(
                labelAngle=0,
                titleOrient='top',
                labelOrient='bottom',
                labelAlign='right',
                labelPadding=326,
            ),
        ),
    ).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
    ).configure_facet(
        spacing=0
    ).configure_view(
        stroke=None
    )
    main.append(stripplot)
main[5]

In [27]:
# main = alt.hconcat(data=data).transform_calculate(
#         # Generate Gaussian jitter with a Box-Muller transform
#         jitter='sqrt(-2*log(random()))*cos(2*PI*random())'
#     ).configure_facet(
#         spacing=0
#     ).configure_view(
#         stroke=None
#     )

# for window in range(2,8):   
#     stripplot = alt.Chart(data, width=140).mark_circle(size=50, opacity=0.3).encode(
#         x=alt.X(
#             'jitter:Q',
#             title=None,
#             axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
#             scale=alt.Scale(),
#         ),
#         y=alt.Y('creativeq:Q'),
#         color=alt.Color(f'Microdosing_last_{window}_days:N', legend=None),
#         column=alt.Column(
#             f'Microdosing_last_{window}_days:N',
#             header=alt.Header(
#                 labelAngle=-90,
#                 titleOrient='top',
#                 labelOrient='bottom',
#                 labelAlign='right',
#                 labelPadding=3,
#             ),
#         ),
#     )
#     main |= stripplot
# main

In [28]:
one_month_data = data[data['Task_Days'] < 29]
month1_yes = one_month_data[one_month_data['Microdosing'] == 'Yes']
month1_no = one_month_data[one_month_data['Microdosing'] == 'No']
md_data = data[data['Microdosing'] == 'Yes']
no_md_data = data[data['Microdosing'] == 'No']

In [29]:
a = alt.Chart(one_month_data).mark_circle(size=60).encode(
    x='Task_Days:N',
    y=alt.Y('creativeq:Q', 
            scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
            color=alt.Color('mdtoday:N', legend=alt.Legend(orient="left"))
)

b = alt.Chart(month1_yes, width=160).mark_circle(opacity=0.4, size=60).encode(
    x=alt.X(
            'jitter:Q',
            title='Microdosing Days',
            axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False),
            scale=alt.Scale(),
        ),
    y=alt.Y(
        'creativeq:Q',
        axis=alt.Axis(title=None, values=[1, 2, 3, 4, 5],ticks=False, grid=True,labels=True),
        scale=alt.Scale(domain=(1,5))
    ),
    color=alt.Color('mdtoday',
            scale=alt.Scale(
            domain=[False, True],
            range=['orange', 'navy'])),
    order='mdtoday:N'
).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='(sqrt(-2*log(random()))*cos(2*PI*random())*17)'
    )

c = alt.Chart(month1_no, width=160).mark_circle(size=60).encode(
    x=alt.X(
            'jitter:Q',
            title='Non-microdosing Days',
            axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False),
            scale=alt.Scale(),
        ),
    y=alt.Y(
        'creativeq:Q',
        axis=alt.Axis(title=None, values=[1, 2, 3, 4, 5],ticks=False, grid=True,labels=True),
        scale=alt.Scale(domain=(1,5))
    ),
    color=alt.Color('mdtoday',
            scale=alt.Scale(
            domain=['false', 'true'],
            range=['orange', 'navy'])),
    order='mdtoday:N'
).transform_calculate(
        # Generate Gaussian jitter with a Box-Muller transform
        jitter='(sqrt(-2*log(random()))*cos(2*PI*random())*1)'
    )

a|b|c

In [30]:
def calc_offset(df, col):
    counts = df[col].value_counts()
    interval = 2
    for key, value in zip(counts.index, counts):
        if value % 2 == 0:
            dist_from_mid = interval / 2
        else:
            dist_from_mid = 0

        right_offsets = []
        for i in range(0, math.ceil(value / 2)):
            right_offsets.append(dist_from_mid + i * interval)
        left_offsets = np.array(right_offsets[::-1]) * -1
        offsets = np.concatenate((left_offsets, np.array(right_offsets)))
        offsets = np.unique(offsets)

        j = 0
        for i, row in df[df[col] == key].iterrows():
            df.at[i, f'Offset_{col}'] = offsets[j]
            j += 1
            

In [31]:
for measure in dependent_measures2:
    calc_offset(month1_yes, measure)
    calc_offset(month1_no, measure)
    calc_offset(md_data, measure)
    calc_offset(no_md_data, measure)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [32]:
# hard-coding samples (population) means based on the full df from deepnote

d = {'connectedq': [3.35, 3.35],
     'contemplativeq': [3.33, 3.33],
     'creativeq': [3.18, 3.18],
     'focusq': [3.28, 3.28],
     'productiveq': [3.27, 3.27],
     'wellbeingq': [3.36, 3.36],
     'sleepq': [3.49, 3.49],
     'Task_Days': [0, 28]}

sample_means = pd.DataFrame(d)

In [33]:
charts = []
titles = ['Connectedness to Nature', 'Contemplation', 'Creativity', 'Focus', 'Productivity', 'Well-being', 'Sleep Quality']

for title, measure in zip(titles, dependent_measures2):
    one_month_data[f"y_mean_{measure}"] = one_month_data[measure].mean()
    row = []
    for col in range(1, 2):
        a = alt.Chart(one_month_data).mark_circle(size=60).encode(
            x='Task_Days:N',
            y=alt.Y(measure, scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
            color=alt.Color('mdtoday:N', legend=alt.Legend(orient="left"))
        ).properties(
            title=title)
        
        means_ppt = alt.Chart(one_month_data).mark_line(color='blue', point=False, strokeDash=[1,4], strokeWidth=2.9).encode(
        x='Task_Days:N',
        y=f"y_mean_{measure}:Q"
        )

        # population means
        means_sample = alt.Chart(sample_means).mark_line(color='grey', point=False).encode(
        x='Task_Days:N',
        y=alt.Y(measure, axis=alt.Axis(title=''))
        )
            
        b = alt.Chart(month1_yes, width=160).mark_circle(size=60).encode(
            x=alt.X(
                    f'Offset_{measure}:Q',
                    title='Microdosing Days',
                    axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False),
                    scale=alt.Scale(),
                ),
            y=alt.Y(
                measure, scale=alt.Scale(domain=(1,5)),
                axis=alt.Axis(title=None, values=[1, 2, 3, 4, 5], ticks=False, grid=True,labels=False),
            ),
            color=alt.Color('mdtoday',
                    scale=alt.Scale(
                    domain=[False, True],
                    range=['orange', 'navy'])),
            order='mdtoday:N'
        )

        c = alt.Chart(month1_no, width=160).mark_circle(size=60).encode(
            x=alt.X(
                    f'Offset_{measure}:Q',
                    title='Non-microdosing Days',
                    axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False),
                    scale=alt.Scale(),
                ),
            y=alt.Y(
                measure, scale=alt.Scale(domain=(1,5)),
                axis=alt.Axis(title=None, values=[1, 2, 3, 4, 5], ticks=False, grid=True,labels=False),
            ),
            color=alt.Color('mdtoday',
                    scale=alt.Scale(
                    domain=[False, True],
                    range=['orange', 'navy'])),
            order='mdtoday:N'
        )
        row.append(a+means_ppt+means_sample|b|c)
    charts.append(alt.hconcat(*row))
alt.vconcat(*charts)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  one_month_data[f"y_mean_{measure}"] = one_month_data[measure].mean()


In [34]:
d = {'connectedq': [3.35, 3.35],
     'contemplativeq': [3.33, 3.33],
     'creativeq': [3.18, 3.18],
     'focusq': [3.28, 3.28],
     'productiveq': [3.27, 3.27],
     'wellbeingq': [3.36, 3.36],
     'sleepq': [3.49, 3.49],
     'Task_Days': [0, 90]}

sample_means2 = pd.DataFrame(d)

In [35]:
charts = []
titles = ['Connectedness to Nature', 'Contemplation', 'Creativity', 'Focus', 'Productivity', 'Well-being', 'Sleep Quality']

for title, measure in zip(titles, dependent_measures2):
    data[f"y_mean_{measure}"] = data[measure].mean()
    row = []
    for col in range(1, 2):
        a = alt.Chart(data, width=600).mark_circle(size=30).encode(
            x=alt.X('Task_Days:N', axis=alt.Axis(values=[0, 90], ticks=True, grid=False, labels=True)),
            y=alt.Y(measure, scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1, 2, 3, 4, 5])),
            color=alt.Color('mdtoday:N', legend=alt.Legend(orient="left"))
        ).properties(
            title=title)

        means_ppt = alt.Chart(data).mark_line(color='blue', point=False, strokeDash=[1,4], strokeWidth=2.9).encode(
        x='Task_Days:N',
        y=f"y_mean_{measure}:Q"
        )

        means_sample = alt.Chart(sample_means2).mark_line(color='grey', point=False).encode(
        x='Task_Days:N',
        y=alt.Y(measure, axis=alt.Axis(title=''))
        )
        
        b = alt.Chart(md_data, width=160).mark_circle(size=30).encode(
            x=alt.X(
                    f'Offset_{measure}:Q',
                    title='Microdosing Days',
                    axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False),
                    scale=alt.Scale(),
                ),
            y=alt.Y(
                measure, scale=alt.Scale(domain=(1,5)),
                axis=alt.Axis(title=None, values=[1, 2, 3, 4, 5], ticks=False, grid=True,labels=False),
            ),
            color=alt.Color('mdtoday',
                    scale=alt.Scale(
                    domain=[False, True],
                    range=['orange', 'navy'])),
            order='mdtoday:N'
        )

        c = alt.Chart(no_md_data, width=160).mark_circle(size=30).encode(
            x=alt.X(
                    f'Offset_{measure}:Q',
                    title='Non-microdosing Days',
                    axis=alt.Axis(values=[0], ticks=False, grid=False, labels=False),
                    scale=alt.Scale(),
                ),
            y=alt.Y(
                measure, scale=alt.Scale(domain=(1,5)),
                axis=alt.Axis(title=None, values=[1, 2, 3, 4, 5], ticks=False, grid=True,labels=False),
            ),
            color=alt.Color('mdtoday',
                    scale=alt.Scale(
                    domain=[False, True],
                    range=['orange', 'navy'])),
            order='mdtoday:N'
        )
        row.append(a+means_ppt+means_sample|b|c)
    charts.append(alt.hconcat(*row))
alt.vconcat(*charts)

## Visualizations with the missing data

In [36]:
df_nans = data[dependent_measures].mask(np.random.random(data[dependent_measures].shape) < .3)

df_nans = df_nans.join(md_col_df)

df_nans

Unnamed: 0,connectedq,contemplativeq,creativeq,focusq,productiveq,wellbeingq,sleepq,dreamsq,Microdosing
0,,,,,,3.0,,0.0,Yes
1,4.0,,3.0,5.0,3.0,4.0,,0.0,Yes
2,5.0,5.0,4.0,4.0,,,,1.0,Yes
3,5.0,5.0,5.0,5.0,,4.0,3.0,0.0,Yes
4,5.0,4.0,4.0,5.0,4.0,,4.0,0.0,No
...,...,...,...,...,...,...,...,...,...
85,5.0,5.0,4.0,4.0,,,5.0,,Yes
86,5.0,,3.0,3.0,3.0,3.0,,,Yes
87,5.0,4.0,5.0,4.0,3.0,4.0,,0.0,No
88,5.0,5.0,4.0,4.0,,4.0,4.0,0.0,No


In [37]:
df_nans = df_nans.rename({"connectedq": "Connectedness", "contemplativeq": "Contemplation", "creativeq": "Creativity",
                          "focusq": "Focus", "productive": "Productivity", "wellbeingq": "Well-being", "sleepq": "Sleep", 
                          "dreamsq": "Dreams"}, axis=1)

df_nans

Unnamed: 0,Connectedness,Contemplation,Creativity,Focus,productiveq,Well-being,Sleep,Dreams,Microdosing
0,,,,,,3.0,,0.0,Yes
1,4.0,,3.0,5.0,3.0,4.0,,0.0,Yes
2,5.0,5.0,4.0,4.0,,,,1.0,Yes
3,5.0,5.0,5.0,5.0,,4.0,3.0,0.0,Yes
4,5.0,4.0,4.0,5.0,4.0,,4.0,0.0,No
...,...,...,...,...,...,...,...,...,...
85,5.0,5.0,4.0,4.0,,,5.0,,Yes
86,5.0,,3.0,3.0,3.0,3.0,,,Yes
87,5.0,4.0,5.0,4.0,3.0,4.0,,0.0,No
88,5.0,5.0,4.0,4.0,,4.0,4.0,0.0,No


In [38]:
values2 = []
dependent_measures2 = df_nans.columns.to_list()[:-1]

for measure in dependent_measures2:
    values2.append({
        'Measure': measure,
        'Overall Mean': round(df_nans[measure].mean(),2),
        'Microdosing Days Mean': round(df_nans[df_nans['Microdosing'] == "Yes"][measure].mean(),2),
        'Non-microdosing Days Mean': round(df_nans[df_nans['Microdosing'] == "No"][measure].mean(),2),
        'Median': df_nans[measure].median(),
        'Missing Values (%)': round(((df_nans[measure].isna().sum())/len(df_nans)) * 100, 2),
        'Number of Responses': df_nans[measure].notnull().sum(),
        'Maximal Value': df_nans[measure].max()
    })
summary_df2 = pd.DataFrame(values2)    
summary_df2

Unnamed: 0,Measure,Overall Mean,Microdosing Days Mean,Non-microdosing Days Mean,Median,Missing Values (%),Number of Responses,Maximal Value
0,Connectedness,4.63,4.57,4.82,5.0,30.0,63,5.0
1,Contemplation,4.56,4.55,4.58,5.0,26.67,66,5.0
2,Creativity,4.27,4.19,4.5,4.0,34.44,59,5.0
3,Focus,4.09,4.02,4.28,4.0,26.67,66,5.0
4,productiveq,3.43,3.36,3.73,4.0,35.56,58,4.0
5,Well-being,3.53,3.49,3.63,4.0,22.22,70,5.0
6,Sleep,3.85,3.87,3.82,4.0,38.89,55,5.0
7,Dreams,0.11,0.1,0.13,0.0,28.89,64,1.0


In [39]:
task_days = pd.DataFrame(data.Task_Days)

df_nans = df_nans.join(task_days)

for i, row in df_nans.iterrows():
    if np.isnan(row['Dreams']):
        cell = row['Task_Days']
    else:
        cell = np.nan
    df_nans.at[i, 'Task_Days_Missing'] = cell    
        
df_nans       

Unnamed: 0,Connectedness,Contemplation,Creativity,Focus,productiveq,Well-being,Sleep,Dreams,Microdosing,Task_Days,Task_Days_Missing
0,,,,,,3.0,,0.0,Yes,0.0,
1,4.0,,3.0,5.0,3.0,4.0,,0.0,Yes,1.0,
2,5.0,5.0,4.0,4.0,,,,1.0,Yes,2.0,
3,5.0,5.0,5.0,5.0,,4.0,3.0,0.0,Yes,3.0,
4,5.0,4.0,4.0,5.0,4.0,,4.0,0.0,No,4.0,
...,...,...,...,...,...,...,...,...,...,...,...
85,5.0,5.0,4.0,4.0,,,5.0,,Yes,86.0,86.0
86,5.0,,3.0,3.0,3.0,3.0,,,Yes,87.0,87.0
87,5.0,4.0,5.0,4.0,3.0,4.0,,0.0,No,88.0,
88,5.0,5.0,4.0,4.0,,4.0,4.0,0.0,No,89.0,


In [40]:
a = alt.Chart(df_nans).mark_circle(size=80, color='navy').encode(
            x='Task_Days:N',
            y=alt.Y('Dreams:Q', scale=alt.Scale(domain=(0,1)), axis=alt.Axis(values=[0,1], ticks=True, labels=True, grid=False)),
            color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
        ).properties(
            width=900,
            height=100
        )
b = alt.Chart(df_nans).mark_line(point=False, color='blue',opacity=0.4).encode(
        x='Task_Days:N',
        y='Dreams:Q'
    ).properties(
        width=900,
        height=100,
        title= 'Days with Vivid Dreams'
    )

c = alt.Chart(df_nans[~df_nans["Task_Days_Missing"].isna()]).mark_rule(color='gray').encode(
    x=alt.X('Task_Days_Missing:N'))

a+b+c

In [41]:
df_nans.Dreams = df_nans.Dreams.replace([0, 1],[1, 2])

In [42]:
a = alt.Chart(df_nans).mark_bar().encode(
    x='Task_Days:N',
    y=alt.Y('Dreams:Q', scale=alt.Scale(domain=(0,2)), axis=alt.Axis(values=[1,2], ticks=True, labels=True, grid=False)),
    color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
).properties(
        width=1000,
        height=150,
        title= 'Days with Vivid Dreams'
    )

c = alt.Chart(df_nans[~df_nans["Task_Days_Missing"].isna()]).mark_rule(color='white').encode(
    x=alt.X('Task_Days_Missing:N'))

a+c

In [43]:
for i, row in df_nans.iterrows():
    if np.isnan(row['Creativity']):
        cell = row['Task_Days']
    else:
        cell = np.nan
    df_nans.at[i, 'Task_Days_Missing'] = cell     

In [44]:
a = alt.Chart(df_nans).mark_circle(size=80, color='navy').encode(
            x='Task_Days:N',
            y=alt.Y('Creativity:Q', scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1,2,3,4,5], ticks=True, labels=True, grid=False)),
            color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
        ).properties(
            width=1000,
            height=200
        )
b = alt.Chart(df_nans).mark_line(point=False, color='blue',opacity=0.4).encode(
        x='Task_Days:N',
        y='Creativity:Q'
    ).properties(
        width=1000,
        height=200,
        title= 'Creativity ratings over the course of study'
    )

c = alt.Chart(df_nans[~df_nans["Task_Days_Missing"].isna()]).mark_rule(color='gray').encode(
    x=alt.X('Task_Days_Missing:N'))

b+a+c

In [45]:
a = alt.Chart(df_nans).mark_bar(opacity=0.8).encode(
    x='Task_Days:N',
    y=alt.Y('Creativity:Q', scale=alt.Scale(domain=(0,5)), axis=alt.Axis(values=[1,2,3,4,5], ticks=True, labels=True, grid=False)),
    color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
).properties(
        width=1000,
        height=200,
        title= 'Creativity ratings over the course of study'
    )

c = alt.Chart(df_nans[~df_nans["Task_Days_Missing"].isna()]).mark_rule(color='white').encode(
    x=alt.X('Task_Days_Missing:N'))

a+c

In [46]:
df_nans["Microdosing"] = df_nans["Microdosing"].mask(np.random.random(df_nans["Microdosing"].shape) < .3)

df_nans["Microdosing"] = df_nans["Microdosing"].replace(np.nan, "Unknown")

df_nans

Unnamed: 0,Connectedness,Contemplation,Creativity,Focus,productiveq,Well-being,Sleep,Dreams,Microdosing,Task_Days,Task_Days_Missing
0,,,,,,3.0,,1.0,Unknown,0.0,0.0
1,4.0,,3.0,5.0,3.0,4.0,,1.0,Yes,1.0,
2,5.0,5.0,4.0,4.0,,,,2.0,Unknown,2.0,
3,5.0,5.0,5.0,5.0,,4.0,3.0,1.0,Yes,3.0,
4,5.0,4.0,4.0,5.0,4.0,,4.0,1.0,Unknown,4.0,
...,...,...,...,...,...,...,...,...,...,...,...
85,5.0,5.0,4.0,4.0,,,5.0,,Unknown,86.0,
86,5.0,,3.0,3.0,3.0,3.0,,,Unknown,87.0,
87,5.0,4.0,5.0,4.0,3.0,4.0,,1.0,No,88.0,
88,5.0,5.0,4.0,4.0,,4.0,4.0,1.0,Unknown,89.0,


In [47]:
a = alt.Chart(df_nans).mark_circle(size=80, color='navy').encode(
            x='Task_Days:N',
            y=alt.Y('Creativity:Q', scale=alt.Scale(domain=(1,5)), axis=alt.Axis(values=[1,2,3,4,5], ticks=True, labels=True, grid=False)),
            color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes', "Unknown"], range=['orange', 'navy', 'gray']))
        ).properties(
            width=1000,
            height=200
        )
b = alt.Chart(df_nans).mark_line(point=False, color='blue',opacity=0.4).encode(
        x='Task_Days:N',
        y='Creativity:Q'
    ).properties(
        width=1000,
        height=200,
        title= 'Creativity ratings during the course of study'
    )

c = alt.Chart(df_nans[~df_nans["Task_Days_Missing"].isna()]).mark_rule(color='gray').encode(
    x=alt.X('Task_Days_Missing:N'))

b+a+c

In [48]:
a = alt.Chart(df_nans).mark_bar(opacity=0.8).encode(
    x='Task_Days:N',
    y=alt.Y('Creativity:Q', scale=alt.Scale(domain=(0,5)), axis=alt.Axis(values=[1,2,3,4,5], ticks=True, labels=True, grid=False)),
    color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes', "Unknown"], range=['orange', 'navy', 'gray']))
).properties(
        width=1000,
        height=200,
        title= 'Creativity ratings over the course of study'
    )

c = alt.Chart(df_nans[~df_nans["Task_Days_Missing"].isna()]).mark_rule(color='white').encode(
    x=alt.X('Task_Days_Missing:N'))

a+c

## Baseline and Monthly Data

In [49]:
URL= 'https://raw.githubusercontent.com/qcitizen/sample-data/b1e87026688752d21cac4da1f42a47e78afeeefe/samples_monthly_data.csv'

results = await fetch(URL)
text = await results.text()

filename = 'monthly_data.csv'

with open(filename, 'w') as f:
    f.write(text)

monthly_data = pd.read_csv(filename, sep= ',')

In [50]:
dass_df = monthly_data.iloc[:12, :]

panas_df = monthly_data.iloc[12:20, :]

tapping_df = monthly_data.iloc[20:24, :]

pasat_df = monthly_data.iloc[24:28, :]

spatial_df = monthly_data.iloc[28:, :]
spatial_df

Unnamed: 0,value,Task,Time
28,285.0,Spatial Span,Baseline
29,255.0,Spatial Span,Month 1
30,260.0,Spatial Span,Month 2
31,325.0,Spatial Span,Month 3


In [51]:
dass_df["Microdosing"] = [True, True, False, False, True, True, False, False, True, True, False, False]
panas_df["Microdosing"] = [True, True, False, False, True, True, False, False]
tapping_df["Microdosing"] = [True, True, False, False]
pasat_df["Microdosing"] = [True, True, False, False]
spatial_df["Microdosing"] = [True, True, False, False]
spatial_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dass_df["Microdosing"] = [True, True, False, False, True, True, False, False, True, True, False, False]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  panas_df["Microdosing"] = [True, True, False, False, True, True, False, False]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tapping_df["Microdosin

Unnamed: 0,value,Task,Time,Microdosing
28,285.0,Spatial Span,Baseline,True
29,255.0,Spatial Span,Month 1,True
30,260.0,Spatial Span,Month 2,False
31,325.0,Spatial Span,Month 3,False


In [52]:
highlight = alt.selection(type='single', on='mouseover', fields=['Task'], nearest=True)

base = alt.Chart(dass_df).encode(
    x='Time:N',
    y=alt.Y('value:Q', scale=alt.Scale(domain=(0, 42)),
           axis=alt.Axis(title="DASS Score",ticks=True, grid=True,labels=True)),
    color=alt.Color('Task:N')
)

points = base.mark_circle().encode(
    opacity=alt.value(30)
).add_selection(
    highlight
).properties(
    width=900
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)

points + lines

In [53]:
highlight = alt.selection(type='single', on='mouseover', fields=['Task'], nearest=True)

points = alt.Chart(panas_df).mark_circle(size=70).encode(
    x='Time:N',
    y=alt.Y('value:Q', scale=alt.Scale(domain=(10, 50)), axis=alt.Axis(title="PANAS Score",ticks=True, grid=True,labels=True)),
    opacity=alt.value(30),
    color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange'])),  
).add_selection(
    highlight
).properties(
    width=900
)

lines = alt.Chart(panas_df).mark_line().encode(
    x='Time:N',
    y=alt.Y('value:Q', scale=alt.Scale(domain=(10, 50)), axis=alt.Axis(title="PANAS Score",ticks=True, grid=True,labels=True)),
    color=alt.Color('Task:N',scale=alt.Scale(domain=["Negative", "Positive"],range=['gray','purple'])),
    size=alt.condition(~highlight, alt.value(1), alt.value(3))
)


alt.layer(
    points,
    lines
).resolve_scale(
    color='independent'
)

In [54]:
dass_df['value'] = dass_df['value'].replace(0, 0.01)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dass_df['value'] = dass_df['value'].replace(0, 0.01)


In [55]:
alt.Chart(dass_df).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Time:O', axis=alt.Axis(labelAngle=0)),
    alt.Y('Task:N'),
    alt.Size('value:Q',
        scale=alt.Scale(range=[0, 4200]),
        legend=alt.Legend(title='Mean Score')
    ),
    alt.Color('Task:N', legend=None)
).properties(
    width=550,
    height=320
)

In [56]:
for i, row in dass_df.iterrows():
    score = (row['value'] / 42) * 100
    dass_df.at[i, "Score (%)"] = round(score)
    
for i, row in panas_df.iterrows():
    if row["Task"] == "Positive":
        score = (row['value'] / 50) * 100
    else:
        score = (row['value'] / 45) * 100
    panas_df.at[i, "Score (%)"] = round(score)
    
dass_panas_df = pd.concat([dass_df, panas_df])

dass_panas_df['Score (%)'] = dass_panas_df['Score (%)'].replace(0, 0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [57]:
alt.Chart(dass_panas_df).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Time:O', axis=alt.Axis(labelAngle=0)),
    alt.Y("Task:N", sort=["Anxiety","Depression","Stress","Positive","Negative"]),
    alt.Size('Score (%):Q',
        scale=alt.Scale(range=[0, 4200]),
        legend=alt.Legend(title='Percentage of the Maximum Score')
    ),
    alt.Color('Task:N', legend=None)
).properties(
    width=550,
    height=320
)

In [58]:
a = alt.Chart(tapping_df).mark_circle(size=80).encode(
            x='Time:N',
            y=alt.Y('value:Q', scale=alt.Scale(domain=(80,100)),
                axis=alt.Axis(title="Total Taps",values=[80,85,90,95,100],ticks=True, grid=True,labels=True)),
                color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange']))
        ).properties(
            width=900,
            height=200
        )
b = alt.Chart(tapping_df).mark_line(point=False, color='black',opacity=0.3).encode(
        x='Time:N',
        y=alt.Y('value:Q', scale=alt.Scale(domain=(80,100)))
    ).properties(
        width=900,
        height=200,
        title= 'Tapping Task with Alternating Fingers'
    )

b+a

In [59]:
a = alt.Chart(pasat_df).mark_circle(size=80).encode(
            x='Time:N',
            y=alt.Y('value:Q', scale=alt.Scale(domain=(25,35)),
                axis=alt.Axis(title="PASAT Score",ticks=True, grid=True,labels=True)),
                color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange']))
        ).properties(
            width=900,
            height=200
        )
b = alt.Chart(pasat_df).mark_line(point=False, color='black',opacity=0.3).encode(
        x='Time:N',
        y=alt.Y('value:Q', scale=alt.Scale(domain=(25,35)))
    ).properties(
        width=900,
        height=200,
        title= 'PASAT Scores over the Study Course'
    )

b+a

In [60]:
a = alt.Chart(spatial_df).mark_circle(size=80).encode(
            x='Time:N',
            y=alt.Y('value:Q', scale=alt.Scale(domain=(250,330)),
                axis=alt.Axis(title="Spatial Span Score",ticks=True, grid=True,labels=True)),
                color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange']))
        ).properties(
            width=900,
            height=200
        )
b = alt.Chart(spatial_df).mark_line(point=False, color='black',opacity=0.3).encode(
        x='Time:N',
        y=alt.Y('value:Q', scale=alt.Scale(domain=(250,330)))
    ).properties(
        width=900,
        height=200,
        title= 'Scores for Spatial Span Task over the Study Course'
    )

b+a

In [61]:
spatial_df_nan = spatial_df.replace(325.0, np.nan)
spatial_df_nan

Unnamed: 0,value,Task,Time,Microdosing
28,285.0,Spatial Span,Baseline,True
29,255.0,Spatial Span,Month 1,True
30,260.0,Spatial Span,Month 2,False
31,,Spatial Span,Month 3,False


In [62]:
for i, row in spatial_df_nan.iterrows():
    if np.isnan(row['value']):
        cell = 3
    else:
        cell = ""
    spatial_df_nan.at[i, 'Task_Days_Missing'] = cell    
        
spatial_df_nan['Task_Days_Missing'] = spatial_df_nan['Task_Days_Missing'].replace(3.0, "Month 3")

spatial_df_nan

Unnamed: 0,value,Task,Time,Microdosing,Task_Days_Missing
28,285.0,Spatial Span,Baseline,True,
29,255.0,Spatial Span,Month 1,True,
30,260.0,Spatial Span,Month 2,False,
31,,Spatial Span,Month 3,False,Month 3


In [63]:
a = alt.Chart(spatial_df_nan).mark_circle(size=80).encode(
            x='Time:N',
            y=alt.Y('value:Q', scale=alt.Scale(domain=(250,330)),
                axis=alt.Axis(title="Spatial Span Score",ticks=True, grid=True,labels=True)),
                color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange']))
        ).properties(
            width=900,
            height=200
        )
b = alt.Chart(spatial_df_nan).mark_line(point=False, color='black',opacity=0.3).encode(
        x='Time:N',
        y='value:Q'
    ).properties(
        width=900,
        height=200,
        title= 'Scores on Spatial Span Task over the Study Course'
    )
c = alt.Chart(spatial_df_nan[spatial_df_nan["Task_Days_Missing"] != '']).mark_rule(color='white').encode(
    x=alt.X('Task_Days_Missing:N'))

a+b+c

In [64]:
chart = alt.vconcat().configure_axisX(
            labelAngle=0
        )

for df, scale, title in [[spatial_df_nan, alt.Scale(domain=(250,330)), "Mean Spatial Span"], 
                  [tapping_df, alt.Scale(domain=(80,100)), "Total Taps"], 
                  [pasat_df, alt.Scale(domain=(25,35)), "Mean PASAT"]]:

    a = alt.Chart(df).mark_circle(size=80).encode(
                x=alt.X('Time:N'),
                y=alt.Y(
                    'value:Q', scale=scale,
                    axis=alt.Axis(title="Score",ticks=True, grid=True,labels=True)
                ),
                color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange']))
            ).properties(
                width=900,
                height=200
            )
    b = alt.Chart(df).mark_line(point=False, color='black',opacity=0.3).encode(
            x=alt.X('Time:N'),
            y=alt.Y('value:Q', scale=scale)
        ).properties(
            width=900,
            height=200,
            title= f'{title} Score over the Study Course'
        )

    chart &= b+a

chart

## Visual and Hearing Data

In [65]:
URL= 'https://raw.githubusercontent.com/qcitizen/sample-data/main/visual_acuity_sample.csv'

results = await fetch(URL)
text = await results.text()

filename = 'visual_acuity.csv'

with open(filename, 'w') as f:
    f.write(text)

visual_acuity = pd.read_csv(filename, sep= ',')

visual_acuity

Unnamed: 0,Participant Num,Submitted Date,Task,Score,Time
0,174.0,2019-11-15T22:22:41.896000+00:00,Visual Acuity,90,Baseline
1,174.0,2019-12-13T10:16:22.456000+00:00,Visual Acuity,90,Month 1
2,174.0,2020-01-12T21:22:43.048000+00:00,Visual Acuity,100,Month 2
3,174.0,2020-02-06T22:46:55.129000+00:00,Visual Acuity,110,Month 3


In [66]:
URL= 'https://raw.githubusercontent.com/qcitizen/sample-data/main/contrast_sensitivity_sample.csv'

results = await fetch(URL)
text = await results.text()

filename = 'contrast_sensitivity.csv'

with open(filename, 'w') as f:
    f.write(text)

contrast_sensitivity = pd.read_csv(filename, sep= ',')

contrast_sensitivity

Unnamed: 0,Participant Num,Submitted Date,Task,Score,Time
0,315.0,2019-11-16T00:51:55.320000+00:00,Contrast Sensitivity,100,Baseline
1,315.0,2019-12-14T23:25:04.158000+00:00,Contrast Sensitivity,100,Month 1
2,315.0,2020-01-12T18:09:52.282000+00:00,Contrast Sensitivity,100,Month 2
3,315.0,2020-02-10T18:03:17.981000+00:00,Contrast Sensitivity,100,Month 3


In [67]:
visual_data = pd.concat([contrast_sensitivity, visual_acuity])

visual_data

Unnamed: 0,Participant Num,Submitted Date,Task,Score,Time
0,315.0,2019-11-16T00:51:55.320000+00:00,Contrast Sensitivity,100,Baseline
1,315.0,2019-12-14T23:25:04.158000+00:00,Contrast Sensitivity,100,Month 1
2,315.0,2020-01-12T18:09:52.282000+00:00,Contrast Sensitivity,100,Month 2
3,315.0,2020-02-10T18:03:17.981000+00:00,Contrast Sensitivity,100,Month 3
0,174.0,2019-11-15T22:22:41.896000+00:00,Visual Acuity,90,Baseline
1,174.0,2019-12-13T10:16:22.456000+00:00,Visual Acuity,90,Month 1
2,174.0,2020-01-12T21:22:43.048000+00:00,Visual Acuity,100,Month 2
3,174.0,2020-02-06T22:46:55.129000+00:00,Visual Acuity,110,Month 3


In [68]:
alt.Chart(visual_data).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Time:O', axis=alt.Axis(labelAngle=0)),
    alt.Y("Task:N"),
    alt.Size('Score:Q',
        scale=alt.Scale(range=[0, 4200]),
        legend=alt.Legend(title='Score')
    ),
    alt.Color('Task:N', legend=None)
).properties(
    width=550,
    height=320
)

In [69]:
visual_data["Microdosing"] = ["Yes", "Yes", "No", "No", "Yes", "Yes", "No", "No"]

In [70]:
alt.Chart(visual_data).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Time:O', axis=alt.Axis(labelAngle=0)),
    alt.Y("Task:N"),
    alt.Size('Score:Q',
        scale=alt.Scale(range=[0, 3900]),
        legend=alt.Legend(title='Score')
    ),
    alt.Color('Microdosing',scale=alt.Scale(domain=["Yes", "No"],range=['navy','orange']))
).properties(
    width=550,
    height=320,
    title='Scores on Visual Tests over the Study Course'
)

## Hearing Data


In [71]:
URL= 'https://raw.githubusercontent.com/qcitizen/sample-data/main/hearing_test_sample.csv'

results = await fetch(URL)
text = await results.text()

filename = 'hearing_test.csv'

with open(filename, 'w') as f:
    f.write(text)

hearing_test = pd.read_csv(filename, sep= ',')

hearing_test

Unnamed: 0,Participant Num,Submitted Date,Task,Calculated Threshold,Frequency,Time
0,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,5.0,1000,Baseline
1,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,5.0,2000,Baseline
2,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,10.0,3000,Baseline
3,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,10.0,4000,Baseline
4,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,45.0,8000,Baseline
5,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,5.0,1000,Baseline
6,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,10.0,500,Baseline
7,1375.0,2019-11-16T04:28:10.472000+00:00,Hearing Test,15.0,250,Baseline
8,1375.0,2019-12-16T04:20:24.690000+00:00,Hearing Test,,1000,Month 1
9,1375.0,2019-12-16T04:20:24.690000+00:00,Hearing Test,5.0,2000,Month 1


In [82]:
hearing_test_base = hearing_test[hearing_test["Time"] == "Baseline"]
hearing_test_m1 = hearing_test[hearing_test["Time"] == "Month 1"]
hearing_test_m2 = hearing_test[hearing_test["Time"] == "Month 2"]
hearing_test_m3 = hearing_test[hearing_test["Time"] == "Month 3"]

hearing_test_base['Microdosing'] = ["Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"]
hearing_test_m1['Microdosing'] = ["Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"]
hearing_test_m2['Microdosing'] = ["No","No","No","No","No","No","No","No"]
hearing_test_m3['Microdosing'] = ["No","No","No","No","No","No","No","No"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hearing_test_base['Microdosing'] = ["Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hearing_test_m1['Microdosing'] = ["Yes","Yes","Yes","Yes","Yes","Yes","Yes","Yes"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hearing_test_m2['Microdosing'] = ["No

In [92]:
chart = alt.vconcat().configure_axisX(
            labelAngle=0
        )

for df, scale, title in [
           [hearing_test_base, alt.Scale(domain=(0,50)), "Baseline"], 
           [hearing_test_m1, alt.Scale(domain=(-10,30)), "Month 1"], 
           [hearing_test_m2, alt.Scale(domain=(-15,35)), "Month 2"], 
           [hearing_test_m3, alt.Scale(domain=(-5,35)), "Month 3"]
        ]:
    
    a = alt.Chart(df).mark_bar(opacity=0.8).encode(
    x=alt.X("Frequency:N", sort=[250, 500, 1000, 2000, 3000, 4000, 8000]),
    y=alt.Y('Calculated Threshold:Q', scale=scale, axis=alt.Axis(values=[-20,-10,0,10,20,30,40,50], ticks=True, labels=True, grid=False)),
    color=alt.Color('Microdosing:N', scale=alt.Scale(domain=['No', 'Yes'], range=['orange', 'navy']))
        ).properties(
                width=700,
                height=200,
                title= f'{title} Hearing Test Scores'
            ) 

    # a = alt.Chart(df).mark_circle(size=80).encode(
    #             x=alt.X('Time:N'),
    #             y=alt.Y(
    #                 'value:Q', scale=scale,
    #                 axis=alt.Axis(title="Score",ticks=True, grid=True,labels=True)
    #             ),
    #             color=alt.Color('Microdosing',scale=alt.Scale(domain=[True, False],range=['navy','orange']))
    #         ).properties(
    #             width=900,
    #             height=200
    #         )
    # b = alt.Chart(df).mark_line(point=False, color='black',opacity=0.3).encode(
    #         x=alt.X('Time:N'),
    #         y=alt.Y('value:Q', scale=scale)
    #     ).properties(
    #         width=900,
    #         height=200,
    #         title= f'{title} Score over the Study Course'
    #     )

    chart &= a

chart