In [72]:
import os
import re
import pandas as pd

# Directory containing your EDF annotation files
data_dir = "../Data/raw-sleep-telemetry"

# Prepare a list to collect all data
all_dfs = []

# Loop through all files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith(".edf"):
        filepath = os.path.join(data_dir, filename)
        with open(filepath, "r", encoding="latin1") as f:
            text = f.read()
        # Extract annotations
        pattern = r"\+(\d+)\x15(\d+)\x14Sleep stage ([\w ]+)\x14"
        matches = re.findall(pattern, text)
        df = pd.DataFrame(matches, columns=["start_sec", "duration_sec", "stage"])
        df["start_sec"] = df["start_sec"].astype(int)
        df["duration_sec"] = df["duration_sec"].astype(int)
        df["file"] = filename
        all_dfs.append(df)

# Combine all into one DataFrame
combined_df = pd.concat(all_dfs, ignore_index=True)


combined_df

Unnamed: 0,start_sec,duration_sec,stage,file
0,0,480,W,ST7152JA-Hypnogram.edf
1,480,480,1,ST7152JA-Hypnogram.edf
2,960,120,2,ST7152JA-Hypnogram.edf
3,1080,60,3,ST7152JA-Hypnogram.edf
4,1140,60,2,ST7152JA-Hypnogram.edf
...,...,...,...,...
5424,28530,2160,2,ST7241JO-Hypnogram.edf
5425,30690,510,R,ST7241JO-Hypnogram.edf
5426,31200,30,1,ST7241JO-Hypnogram.edf
5427,31230,240,R,ST7241JO-Hypnogram.edf


In [73]:
import json
import pandas as pd

# Load the JSON data
with open("../Data/processed_sleep_data/combined_sleep_data_fixed.json", "r") as f:
    data = json.load(f)

# Extract filename, subject_id, night_id for each entry
records = []
for entry in data:
    meta = entry.get("metadata", {})
    records.append({
        "filename": meta.get("filename"),
        "subject_id": meta.get("subject_id"),
        "night_id": meta.get("night_id")
    })

# Create DataFrame
df = pd.DataFrame(records)

merged_df = combined_df.merge(df, left_on='file', right_on='filename', how='left')
merged_df = merged_df.drop(columns=['filename'])
merged_df


Unnamed: 0,start_sec,duration_sec,stage,file,subject_id,night_id
0,0,480,W,ST7152JA-Hypnogram.edf,15,2
1,480,480,1,ST7152JA-Hypnogram.edf,15,2
2,960,120,2,ST7152JA-Hypnogram.edf,15,2
3,1080,60,3,ST7152JA-Hypnogram.edf,15,2
4,1140,60,2,ST7152JA-Hypnogram.edf,15,2
...,...,...,...,...,...,...
5424,28530,2160,2,ST7241JO-Hypnogram.edf,24,1
5425,30690,510,R,ST7241JO-Hypnogram.edf,24,1
5426,31200,30,1,ST7241JO-Hypnogram.edf,24,1
5427,31230,240,R,ST7241JO-Hypnogram.edf,24,1


In [74]:
import pandas as pd

subject_csv = pd.read_csv("../Data/ST-subjects.csv", skiprows=1).iloc[:21]

subject_df = subject_csv[["Nr", "Age", "M1/F2"]].rename(columns={
     "Nr": "subject_id",
     "Age": "age",
     "M1/F2": "gender"
 })

merged_df['subject_id'] = merged_df['subject_id'].astype(float)

subject_df = subject_df.dropna(subset=["subject_id"])
#subject_df["subject_id"] = subject_df["subject_id"].astype(str).str.zfill(2)
subject_df['subject_id'] = subject_df['subject_id'].astype(float)
#subject_df["gender"] = subject_df["gender"].map({1: "M", 2: "F"})

subject_df

final_df = merged_df.merge(subject_df, left_on="subject_id", right_on='subject_id', how="left")


In [75]:
final_df

Unnamed: 0,start_sec,duration_sec,stage,file,subject_id,night_id,age,gender
0,0,480,W,ST7152JA-Hypnogram.edf,15.0,2,66.0,2.0
1,480,480,1,ST7152JA-Hypnogram.edf,15.0,2,66.0,2.0
2,960,120,2,ST7152JA-Hypnogram.edf,15.0,2,66.0,2.0
3,1080,60,3,ST7152JA-Hypnogram.edf,15.0,2,66.0,2.0
4,1140,60,2,ST7152JA-Hypnogram.edf,15.0,2,66.0,2.0
...,...,...,...,...,...,...,...,...
5424,28530,2160,2,ST7241JO-Hypnogram.edf,24.0,1,,
5425,30690,510,R,ST7241JO-Hypnogram.edf,24.0,1,,
5426,31200,30,1,ST7241JO-Hypnogram.edf,24.0,1,,
5427,31230,240,R,ST7241JO-Hypnogram.edf,24.0,1,,


In [76]:
final_df = final_df.dropna()

In [77]:
import pandas as pd

subject_csv = pd.read_csv("../Data/ST-subjects.csv", skiprows=1)
subject_csv = subject_csv.iloc[:21]

# For Placebo night
placebo = subject_csv[["Nr", "Age", "M1/F2", "night nr", "lights off"]].copy()
placebo["condition"] = "placebo"
placebo = placebo.rename(columns={"night nr": "night_id", "lights off": "lights_off", "Nr": "subject_id", "M1/F2": "gender"})

# For Temazepam night
temazepam = subject_csv[["Nr", "Age", "M1/F2", "night nr.1", "lights off.1"]].copy()
temazepam["condition"] = "temazepam"
temazepam = temazepam.rename(columns={"night nr.1": "night_id", "lights off.1": "lights_off", "Nr": "subject_id", "M1/F2": "gender"})

# Combine
long_df = pd.concat([placebo, temazepam], ignore_index=True)
long_df = long_df.dropna(subset=["night_id"])  # Remove empty rows

# Make sure types match your main DataFrame
long_df["subject_id"] = long_df["subject_id"].astype(float)
long_df["night_id"] = long_df["night_id"].astype(float)

# Ensure types match
long_df["subject_id"] = long_df["subject_id"].astype(float)
long_df["night_id"] = long_df["night_id"].astype(float)
final_df["subject_id"] = final_df["subject_id"].astype(float)
final_df["night_id"] = final_df["night_id"].astype(float)

# Set multi-index (optional, for lookup; not required for merge)
long_df = long_df.set_index(["subject_id", "night_id"])


# Merge on subject_id and night_id
final_df = final_df.merge(
    long_df.reset_index()[["subject_id", "night_id", "condition", "lights_off"]],
    on=["subject_id", "night_id"],
    how="left"
)

final_df



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df["subject_id"] = final_df["subject_id"].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df["night_id"] = final_df["night_id"].astype(float)


Unnamed: 0,start_sec,duration_sec,stage,file,subject_id,night_id,age,gender,condition,lights_off
0,0,480,W,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
1,480,480,1,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
2,960,120,2,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
3,1080,60,3,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
4,1140,60,2,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
...,...,...,...,...,...,...,...,...,...,...
5180,25200,360,1,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40
5181,25560,30,W,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40
5182,25590,90,1,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40
5183,25680,180,2,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40


In [None]:
final_df.set_index(['subject_id'])

Unnamed: 0,start_sec,duration_sec,stage,file,subject_id,night_id,age,gender,condition,lights_off
0,0,480,W,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
1,480,480,1,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
2,960,120,2,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
3,1080,60,3,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
4,1140,60,2,ST7152JA-Hypnogram.edf,15.0,2.0,66.0,2.0,temazepam,23:33
...,...,...,...,...,...,...,...,...,...,...
5180,25200,360,1,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40
5181,25560,30,W,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40
5182,25590,90,1,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40
5183,25680,180,2,ST7141JE-Hypnogram.edf,14.0,1.0,20.0,1.0,placebo,0:40


In [89]:
# Group by subject, night, condition, and stage, summing duration
stage_summary = (
    final_df
    .groupby(['subject_id', 'night_id', 'condition', 'age', 'gender', 'lights_off', 'stage'], as_index=False)
    .agg({'duration_sec': 'sum'})
)

# Pivot so each stage is a column
stage_pivot = stage_summary.pivot_table(
    index=['subject_id', 'night_id', 'condition', 'age', 'gender', 'lights_off'],
    columns='stage',
    values='duration_sec',
    fill_value=0
).reset_index()

stage_pivot.columns.name = None  # Remove the columns' name

In [93]:
stage_pivot['total'] = stage_pivot[['1','2','3','4','R','W']].sum(axis=1)

In [156]:
for stage in ['1', '2', '3', '4', 'R', 'W']:
    stage_pivot[f'{stage}_pct'] = stage_pivot[stage] / stage_pivot['total'] * 100

stage_pivot

Unnamed: 0,subject_id,night_id,condition,age,gender,lights_off,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct
0,1.0,1.0,placebo,60.0,1.0,23:01,3030.0,15870.0,2790.0,1290.0,3660.0,6120.0,32760.0,9.249084,48.443223,8.516484,3.937729,11.172161,18.681319
1,4.0,2.0,temazepam,18.0,2.0,22:37,1800.0,18660.0,2400.0,1260.0,6480.0,3300.0,33900.0,5.309735,55.044248,7.079646,3.716814,19.115044,9.734513
2,5.0,1.0,temazepam,32.0,2.0,23:34,3090.0,11040.0,2370.0,5070.0,6750.0,2220.0,30540.0,10.117878,36.149312,7.760314,16.601179,22.102161,7.269155
3,5.0,2.0,placebo,32.0,2.0,23:23,3630.0,11880.0,1590.0,3810.0,6780.0,3330.0,31020.0,11.702128,38.297872,5.125725,12.282398,21.856867,10.73501
4,6.0,1.0,placebo,35.0,2.0,23:28,2670.0,17670.0,300.0,0.0,8130.0,1470.0,30240.0,8.829365,58.43254,0.992063,0.0,26.884921,4.861111
5,6.0,2.0,temazepam,35.0,2.0,23:26,1410.0,17430.0,780.0,0.0,7380.0,1560.0,28560.0,4.936975,61.029412,2.731092,0.0,25.840336,5.462185
6,7.0,1.0,placebo,51.0,2.0,0:02,2640.0,6030.0,1500.0,2460.0,2040.0,9960.0,24630.0,10.718636,24.482339,6.090134,9.98782,8.282582,40.43849
7,7.0,2.0,temazepam,51.0,2.0,23:24,1710.0,10350.0,3540.0,1260.0,3360.0,7230.0,27450.0,6.229508,37.704918,12.896175,4.590164,12.240437,26.338798
8,8.0,1.0,temazepam,66.0,2.0,23:53,2370.0,14040.0,5040.0,0.0,2640.0,4920.0,29010.0,8.169597,48.397104,17.37332,0.0,9.10031,16.959669
9,8.0,2.0,placebo,66.0,2.0,23:20,5130.0,13500.0,1350.0,0.0,3330.0,4620.0,27930.0,18.367347,48.335124,4.833512,0.0,11.922664,16.541353


In [103]:
# Count the number of nights per subject
nights_per_subject = stage_pivot.groupby('subject_id')['night_id'].nunique()

# Get subject_ids with at least 2 nights
subjects_with_2_nights = nights_per_subject[nights_per_subject >= 2].index

# Filter the pivot table
filtered_stage_pivot = stage_pivot[stage_pivot['subject_id'].isin(subjects_with_2_nights)]

my_filter = filtered_stage_pivot.set_index(['subject_id', 'night_id'])

In [171]:
my_filter['efficiency'] = (my_filter['total'] - my_filter['W']) / my_filter['total'] * 100

eff_pivot = my_filter.pivot_table(
    index='subject_id',
    columns='condition',
    values='efficiency'
).reset_index()

# Calculate the difference: temazepam - placebo
eff_pivot['efficiency_diff'] = eff_pivot['temazepam'] - eff_pivot['placebo']

# Get age for each subject (assuming age is constant per subject)
age_df = my_filter.reset_index().groupby('subject_id')['age'].first().reset_index()

# Merge with efficiency difference
result = eff_pivot[['subject_id', 'efficiency_diff']].merge(age_df, on='subject_id', how='left')

result[['subject_id', 'age', 'efficiency_diff']]

Unnamed: 0,subject_id,age,efficiency_diff
0,5.0,32.0,3.465854
1,6.0,35.0,-0.601074
2,7.0,51.0,14.099692
3,8.0,66.0,-0.418316
4,9.0,47.0,6.822237
5,10.0,20.0,6.959732
6,11.0,21.0,0.585446
7,12.0,21.0,-2.701197
8,13.0,22.0,4.946794
9,14.0,20.0,1.002491


In [177]:
my_filter

placebo_efficiency = my_filter[my_filter['condition'] == 'placebo'][['efficiency']]
placebo_efficiency

Unnamed: 0_level_0,Unnamed: 1_level_0,efficiency
subject_id,night_id,Unnamed: 2_level_1
5.0,2.0,89.26499
6.0,1.0,95.138889
7.0,1.0,59.56151
8.0,2.0,83.458647
9.0,2.0,78.331528
10.0,1.0,87.428023
11.0,2.0,94.989775
12.0,1.0,93.177388
13.0,2.0,92.957746
14.0,1.0,94.988345


In [152]:
summary = my_filter.groupby("condition")[['W_pct','1_pct', '2_pct', '3_pct', '4_pct', 'R_pct', 'age']].mean()
summary

Unnamed: 0_level_0,W_pct,1_pct,2_pct,3_pct,4_pct,R_pct,age
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
placebo,12.251641,10.241429,43.049945,8.09115,6.72895,19.636885,40.166667
temazepam,9.643924,8.122355,47.019191,7.746214,7.488811,19.979504,40.166667


In [137]:
# Use filtered_pivot_diff from before
pivot_diff = filtered_pivot_diff.pivot_table(
    index=['subject_id', 'age', 'gender'],
    columns='condition',
    values=['W_pct', 'R_pct', '1_pct', '2_pct', '3_pct', '4_pct'],
)

# Flatten the columns
pivot_diff.columns = ['_'.join(col).strip() for col in pivot_diff.columns.values]
pivot_diff = pivot_diff.reset_index()

# Calculate difference: temazepam - placebo
for stage in ['W_pct', 'R_pct', '1_pct', '2_pct', '3_pct', '4_pct']:
    pivot_diff[f'{stage}_diff'] = pivot_diff.get(f'{stage}_temazepam', 0) - pivot_diff.get(f'{stage}_placebo', 0)

w_diff_df = pivot_diff.sort_values('W_pct_diff')[['W_pct_diff', 'age', "gender","1_pct_placebo","2_pct_placebo","3_pct_placebo","4_pct_placebo", "R_pct_placebo",
    "W_pct_placebo"]]

In [138]:
# Let's say you have a DataFrame w_diff_df indexed by subject_id and W_pct_diff column
responder_ids = [9, 6, 3, 15, 1, 16, 7, 14, 13]

w_diff_df['group'] = w_diff_df.index.to_series().apply(lambda x: 'Low responder' if x in responder_ids else 'High responder')


In [151]:
w_diff_df.groupby('group').mean()

Unnamed: 0_level_0,W_pct_diff,age,gender,1_pct_placebo,2_pct_placebo,3_pct_placebo,4_pct_placebo,R_pct_placebo,W_pct_placebo
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
High responder,-6.874855,46.777778,1.666667,12.256244,36.173626,9.731898,7.077789,17.196041,17.564402
Low responder,1.65942,33.555556,1.666667,8.226613,49.926264,6.450401,6.380111,22.077729,6.938881


In [179]:
my_filter

Unnamed: 0_level_0,Unnamed: 1_level_0,condition,age,gender,lights_off,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct,efficiency
subject_id,night_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
5.0,1.0,temazepam,32.0,2.0,23:34,3090.0,11040.0,2370.0,5070.0,6750.0,2220.0,30540.0,10.117878,36.149312,7.760314,16.601179,22.102161,7.269155,92.730845
5.0,2.0,placebo,32.0,2.0,23:23,3630.0,11880.0,1590.0,3810.0,6780.0,3330.0,31020.0,11.702128,38.297872,5.125725,12.282398,21.856867,10.73501,89.26499
6.0,1.0,placebo,35.0,2.0,23:28,2670.0,17670.0,300.0,0.0,8130.0,1470.0,30240.0,8.829365,58.43254,0.992063,0.0,26.884921,4.861111,95.138889
6.0,2.0,temazepam,35.0,2.0,23:26,1410.0,17430.0,780.0,0.0,7380.0,1560.0,28560.0,4.936975,61.029412,2.731092,0.0,25.840336,5.462185,94.537815
7.0,1.0,placebo,51.0,2.0,0:02,2640.0,6030.0,1500.0,2460.0,2040.0,9960.0,24630.0,10.718636,24.482339,6.090134,9.98782,8.282582,40.43849,59.56151
7.0,2.0,temazepam,51.0,2.0,23:24,1710.0,10350.0,3540.0,1260.0,3360.0,7230.0,27450.0,6.229508,37.704918,12.896175,4.590164,12.240437,26.338798,73.661202
8.0,1.0,temazepam,66.0,2.0,23:53,2370.0,14040.0,5040.0,0.0,2640.0,4920.0,29010.0,8.169597,48.397104,17.37332,0.0,9.10031,16.959669,83.040331
8.0,2.0,placebo,66.0,2.0,23:20,5130.0,13500.0,1350.0,0.0,3330.0,4620.0,27930.0,18.367347,48.335124,4.833512,0.0,11.922664,16.541353,83.458647
9.0,1.0,temazepam,47.0,1.0,23:42,2730.0,13920.0,1860.0,0.0,5580.0,4200.0,28290.0,9.650053,49.204666,6.574761,0.0,19.724284,14.846235,85.153765
9.0,2.0,placebo,47.0,1.0,0:30,4590.0,10620.0,1680.0,0.0,4800.0,6000.0,27690.0,16.576381,38.353196,6.067172,0.0,17.334778,21.668472,78.331528


In [185]:
# Find the first night for each subject
first_night = my_filter.reset_index().groupby('subject_id')['night_id'].min().reset_index()

# Merge to get the condition for the first night
first_night_cond = my_filter.reset_index().merge(first_night, on=['subject_id', 'night_id'], how='inner')

# Filter for those where the first night was temazepam
temazepam_first = first_night_cond[first_night_cond['condition'] == 'temazepam']

result = my_filter.loc[my_filter.index.get_level_values('subject_id').isin(temazepam_first['subject_id'])]
result

Unnamed: 0_level_0,Unnamed: 1_level_0,condition,age,gender,lights_off,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct,efficiency
subject_id,night_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
5.0,1.0,temazepam,32.0,2.0,23:34,3090.0,11040.0,2370.0,5070.0,6750.0,2220.0,30540.0,10.117878,36.149312,7.760314,16.601179,22.102161,7.269155,92.730845
5.0,2.0,placebo,32.0,2.0,23:23,3630.0,11880.0,1590.0,3810.0,6780.0,3330.0,31020.0,11.702128,38.297872,5.125725,12.282398,21.856867,10.73501,89.26499
8.0,1.0,temazepam,66.0,2.0,23:53,2370.0,14040.0,5040.0,0.0,2640.0,4920.0,29010.0,8.169597,48.397104,17.37332,0.0,9.10031,16.959669,83.040331
8.0,2.0,placebo,66.0,2.0,23:20,5130.0,13500.0,1350.0,0.0,3330.0,4620.0,27930.0,18.367347,48.335124,4.833512,0.0,11.922664,16.541353,83.458647
9.0,1.0,temazepam,47.0,1.0,23:42,2730.0,13920.0,1860.0,0.0,5580.0,4200.0,28290.0,9.650053,49.204666,6.574761,0.0,19.724284,14.846235,85.153765
9.0,2.0,placebo,47.0,1.0,0:30,4590.0,10620.0,1680.0,0.0,4800.0,6000.0,27690.0,16.576381,38.353196,6.067172,0.0,17.334778,21.668472,78.331528
11.0,1.0,temazepam,21.0,2.0,23:38,750.0,12240.0,870.0,5850.0,9450.0,1350.0,30510.0,2.45821,40.117994,2.851524,19.174041,30.973451,4.424779,95.575221
11.0,2.0,placebo,21.0,2.0,23:52,1110.0,11640.0,1920.0,4410.0,8790.0,1470.0,29340.0,3.783231,39.672802,6.543967,15.030675,29.9591,5.010225,94.989775
13.0,1.0,temazepam,22.0,1.0,0:38,1800.0,14100.0,4080.0,390.0,4860.0,540.0,25770.0,6.984866,54.714785,15.832363,1.513388,18.859139,2.09546,97.90454
13.0,2.0,placebo,22.0,1.0,0:31,2670.0,11520.0,2490.0,600.0,6480.0,1800.0,25560.0,10.446009,45.070423,9.741784,2.347418,25.352113,7.042254,92.957746


In [None]:
result.reset_index().drop(columns=['subject_id', 'condition', 'lights_off']).groupby('night_id').mean()

Unnamed: 0_level_0,age,gender,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct,efficiency
night_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1.0,42.444444,1.777778,2323.333333,13843.333333,2523.333333,2230.0,5533.333333,3053.333333,29506.666667,7.90941,47.140395,8.640078,7.36282,18.725832,10.221465,89.778535
2.0,42.444444,1.777778,3006.666667,12833.333333,1996.666667,1986.666667,5710.0,3180.0,28713.333333,10.485564,44.834251,6.987776,6.69191,19.961791,11.038708,88.961292


In [197]:
my_filter.loc[my_filter.index.get_level_values('subject_id').isin([6, 7, 10, 12, 14, 15, 17, 20, 22])].reset_index().drop(columns=['subject_id', 'condition', 'lights_off']).groupby('night_id').mean()

Unnamed: 0_level_0,age,gender,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct,efficiency
night_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1.0,37.888889,1.555556,2863.333333,11850.0,2623.333333,1886.666667,5606.666667,3743.333333,28573.333333,9.997293,41.26564,9.194524,6.765989,19.311978,13.464575,86.535425
2.0,37.888889,1.555556,2413.333333,13313.333333,1973.333333,2210.0,6053.333333,2586.666667,28550.0,8.3353,46.897988,6.85235,7.614803,21.233175,9.066383,90.933617


In [241]:
low_res = responder_ids
final_df[final_df['subject_id'].isin(low_res)].query('start_sec < 100')[['duration_sec', 'condition']].groupby('condition').mean()

Unnamed: 0_level_0,duration_sec
condition,Unnamed: 1_level_1
placebo,792.857143
temazepam,595.0


In [242]:
final_df[~final_df['subject_id'].isin(low_res)].query('start_sec < 100')[['duration_sec', 'condition']].groupby('condition').mean()

Unnamed: 0_level_0,duration_sec
condition,Unnamed: 1_level_1
placebo,1210.0
temazepam,1278.75


In [207]:
res

[9, 6, 15, 1, 16, 7, 14, 13]

In [250]:
result.set_index('condition').select_dtypes(['int', 'float']).reset_index().groupby('condition').mean()

Unnamed: 0_level_0,age,gender,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct,efficiency
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
placebo,42.444444,1.777778,3006.666667,12833.333333,1996.666667,1986.666667,5710.0,3180.0,28713.333333,10.485564,44.834251,6.987776,6.69191,19.961791,11.038708,88.961292
temazepam,42.444444,1.777778,2323.333333,13843.333333,2523.333333,2230.0,5533.333333,3053.333333,29506.666667,7.90941,47.140395,8.640078,7.36282,18.725832,10.221465,89.778535


In [None]:
result

Unnamed: 0_level_0,Unnamed: 1_level_0,condition,age,gender,lights_off,1,2,3,4,R,W,total,1_pct,2_pct,3_pct,4_pct,R_pct,W_pct,efficiency
subject_id,night_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
5.0,1.0,temazepam,32.0,2.0,23:34,3090.0,11040.0,2370.0,5070.0,6750.0,2220.0,30540.0,10.117878,36.149312,7.760314,16.601179,22.102161,7.269155,92.730845
5.0,2.0,placebo,32.0,2.0,23:23,3630.0,11880.0,1590.0,3810.0,6780.0,3330.0,31020.0,11.702128,38.297872,5.125725,12.282398,21.856867,10.73501,89.26499
8.0,1.0,temazepam,66.0,2.0,23:53,2370.0,14040.0,5040.0,0.0,2640.0,4920.0,29010.0,8.169597,48.397104,17.37332,0.0,9.10031,16.959669,83.040331
8.0,2.0,placebo,66.0,2.0,23:20,5130.0,13500.0,1350.0,0.0,3330.0,4620.0,27930.0,18.367347,48.335124,4.833512,0.0,11.922664,16.541353,83.458647
9.0,1.0,temazepam,47.0,1.0,23:42,2730.0,13920.0,1860.0,0.0,5580.0,4200.0,28290.0,9.650053,49.204666,6.574761,0.0,19.724284,14.846235,85.153765
9.0,2.0,placebo,47.0,1.0,0:30,4590.0,10620.0,1680.0,0.0,4800.0,6000.0,27690.0,16.576381,38.353196,6.067172,0.0,17.334778,21.668472,78.331528
11.0,1.0,temazepam,21.0,2.0,23:38,750.0,12240.0,870.0,5850.0,9450.0,1350.0,30510.0,2.45821,40.117994,2.851524,19.174041,30.973451,4.424779,95.575221
11.0,2.0,placebo,21.0,2.0,23:52,1110.0,11640.0,1920.0,4410.0,8790.0,1470.0,29340.0,3.783231,39.672802,6.543967,15.030675,29.9591,5.010225,94.989775
13.0,1.0,temazepam,22.0,1.0,0:38,1800.0,14100.0,4080.0,390.0,4860.0,540.0,25770.0,6.984866,54.714785,15.832363,1.513388,18.859139,2.09546,97.90454
13.0,2.0,placebo,22.0,1.0,0:31,2670.0,11520.0,2490.0,600.0,6480.0,1800.0,25560.0,10.446009,45.070423,9.741784,2.347418,25.352113,7.042254,92.957746


In [257]:
# List of subject_ids to exclude
exclude_ids = [9, 6, 15, 1, 16, 7, 14, 13]

# If 'result' has a MultiIndex with 'subject_id' as the first level:
filtered_result = result.loc[result.index.get_level_values('subject_id').isin(exclude_ids)]
needed_for_new = filtered_result.set_index('condition').select_dtypes(['int', 'float']).reset_index().groupby('condition').mean()

In [261]:
needed_for_new[['W', 'R', '3', 'total']]

Unnamed: 0_level_0,W,R,3,total
condition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
placebo,4180.0,4560.0,2670.0,27630.0
temazepam,2820.0,5400.0,3130.0,28590.0
