In [1]:
import numpy as np
import pandas as pd
import logging

In [2]:
from glob import glob
import re

def natural_key(string):
    """ provides a human-like sorting key of a string """
    p = r'(\d+)'
    key = [int(t) if t.isdigit() else None for t in re.split(p, string)]
    return key

# check whether this can be replaced by natural key
def _session_key(string):
    """ sort the file name by session """
    p = r'(s\d*)_'
    return re.findall(p, string)


def _time_key(file_name):
    """ provides a time-based sorting key """
    # the splits are specific to tuh abnormal eeg data set
    splits = file_name.split('/')
    p = r'(\d{4}_\d{2}_\d{2})'
    [date] = re.findall(p, splits[-2])
    date_id = [int(token) for token in date.split('_')]
    recording_id = natural_key(splits[-1])
    session_id = _session_key(splits[-2])
    return date_id + session_id + recording_id


def read_all_file_names(path, extension, key="time"):
    """ read all files with specified extension from given path
    :param path: parent directory holding the files directly or in
    subdirectories
    :param extension: the type of the file, e.g. '.txt' or '.edf'
    :param key: the sorting of the files. natural e.g. 1, 2, 12, 21
        (machine 1, 12, 2, 21) or by time since this is
        important for cv. time is specified in the edf file names
    """
    assert key in ["natural", "time"], "unknown sorting key"
    file_paths = glob(path + '**/*' + extension, recursive=True)
    if key == "time":
        sorting_key = _time_key
    else:
        sorting_key = natural_key
    file_names = sorted(file_paths, key=sorting_key)

    assert len(file_names) > 0, ("something went wrong. Found no {} files in {}"
                                 .format(extension, path))
    return file_names

In [3]:
def uniqe_ids(path):
    all_reports = np.array(read_all_file_names(path, extension=".txt", key="time"))
    ids = [r.split("/")[-3] for r in all_reports]
    return all_reports, np.unique(ids)

In [4]:
all_reports, unique_patient_ids = uniqe_ids("/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/")

In [5]:
len(all_reports)

2993

In [6]:
len(unique_patient_ids)

2329

In [7]:
train_normal_reports, train_normal_unique_patient_ids = uniqe_ids("/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/eval/")

In [8]:
len(train_normal_reports), len(train_normal_unique_patient_ids)

(276, 253)

In [9]:
train_abnormal_reports, train_abnormal_unique_patient_ids = uniqe_ids("/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/")

In [10]:
len(train_abnormal_reports), len(train_abnormal_unique_patient_ids)

(2717, 2076)

In [11]:
len(np.intersect1d(train_normal_unique_patient_ids, train_abnormal_unique_patient_ids))

0

In [6]:
def remove_exccessive_white_space(string):
    replaces = re.sub(r'\s+', ' ', string)
    return replaces.strip()

In [7]:
def merge_columns(df, merge_to, merge_from, drop=True):
    # TODO: check that no data is overwritten by merging?
    assert merge_to in df.columns, "column {} not found in dataframe".format(merge_to)
    assert merge_from in df.columns, "column {} not found in dataframe".format(merge_from)
    df[merge_to][pd.isna(df[merge_to])] = df[merge_from][pd.isna(df[merge_to])]
    if drop:
        df = df.drop(merge_from, axis=1)
    return df

In [8]:
def merge_several_columns(df, merge_to, merge_from_several, drop=True):
    for column in merge_from_several:
        df = merge_columns(df, merge_to, column, drop=drop)
    return df

In [9]:
# assume sections are given in all caps seperated only by white space characters and followed by a colon
# assume that section text starts with a colon and is everything in between two sections
categoy_pattern = r"^([A-Z\s]{2,}):{1}"
content_pattern = r":(.*)"

In [19]:
i_start = 998
i_stop = 1001
df = pd.DataFrame()
for report in all_reports[i_start:i_stop]:
    with open(report, "rb") as f:
        content = f.readlines()
    assert content, "error reading {}".format(report)
    content = b'\n'.join([line.strip() for line in content]).strip().decode("latin-1")
    categories = re.findall(categoy_pattern, content, re.MULTILINE)   
    assert len(categories) > 0, "no categories found"

    splits = report.split('/')
    # add subject id, session id, label, data path and set information
    df_row = {"SUBJECT": splits[-3], "SESSION": splits[-2], "LABEL": splits[-6], 
              "PATH": report, "SET": splits[-7]}
    
    # some recordings have multiple entries per category. skip these files for now, add later manually
    if len(np.unique(categories)) != len(categories):
        df = df.append(df_row, ignore_index=True)
        continue
    
    # go through all subsequent pairs of categories, extract text inbetween and assign it to start category
    for j in range(len(categories) - 1):
        start = categories[j]
        stop = categories[j + 1]
        match = re.findall(start + content_pattern + stop, content, re.DOTALL)
        assert len(match) == 1, "found more than one match!"
        # remove multiple spaces and newlines
        start = ' '.join(start.split())
        df_row.update({start: remove_exccessive_white_space(match[0])})
        
    # take all text that appears after last category and assign 
    match = re.findall(stop + content_pattern, content, re.DOTALL)
    assert len(match) == 1, "found more than one match!"
    # remove multiple spaces and newlines
    stop = ' '.join(stop.split())
    df_row.update({stop: remove_exccessive_white_space(match[0])})
    df = df.append(df_row, ignore_index=True)

In [20]:
len(df)

3

In [21]:
df.head(3)

Unnamed: 0,CLINICAL CORRELATION,CLINICAL HISTORY,DESCRIPTION OF THE RECORD,HR,IMPRESSION,INTRODUCTION,LABEL,MEDICATIONS,PATH,SESSION,SET,SUBJECT
0,There are no epileptiform features noted. If e...,50 year old left handed male with an episode o...,In wakefulness there is a 10 Hz alpha rhythm w...,60 bpm,Normal EEG.,Digital video EEG was performed in lab using s...,normal,"Acai Berry Extract, Calcium, Vitamin D3 Melatonin",/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...,s001_2011_10_11,train,8243
1,,,,,,,abnormal,,/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...,s014_2011_10_11,train,5265
2,There are no epileptiform features identified ...,39 year old right handed woman with 2 episodes...,"In wakefulness, there is an 11 Hz alpha rhythm...",72 bpm,Normal EEG in a relatively sleepy patient.,Digital video EEG was performed in lab using s...,normal,"Wellbutrin, Xanax, Percocet, Singulair, Advair",/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...,s001_2011_10_12,train,8235


In [39]:
df = merge_several_columns(df, "CLINICAL CORRELATION", ["CORRELATION", "CLINICAL COURSE", "CLINICAL CORRELATIONS", "CLINICAL CORRELATE",
                                                        "CLINICAL INTERPRETATION", "CLINICAL CORR ELATION", "NOTE"])  # not sure about merging note

In [40]:
df = merge_several_columns(df, "CLINICAL HISTORY", ["HISTORY", "CLINICAL", "M CLINICAL HISTORY", "EEG REPORT CLINICAL HISTORY", 
                                                    "HOSPITAL COURSE", "BASELINE EEG CLINICAL HISTORY", "EEG NUMBER",
                                                    "ORIGINAL CLINICAL HISTORY"])

In [41]:
df = merge_several_columns(df, "DESCRIPTION OF THE RECORD", ["DESCRIPTION OF RECORD", "DESCRIPTION RECORD", "DESCRIPTION OF RECORDING", 
                                                             "DESCRIPTION OF THE RECORDING", "DESCRIPTION OF THE PROCEDURE", 
                                                             "DESCRIPTION OF BACKGROUND", "DESCRIPTION OF PROCEDURE", "OF THE RECORD",
                                                             "DESCRIPTION THE RECORD"])

In [42]:
df = merge_several_columns(df, "MEDICATIONS", ["MEDICATION", "CURRENT MEDICATIONS", "MEDICINES"])

In [43]:
df = merge_several_columns(df, "HEART RATE", ["HEAR RATE", "HR"])

In [44]:
df = merge_several_columns(df, "IMPRESSION", ["CLINICAL IMPRESSION"])

In [45]:
df = merge_several_columns(df, "FINDINGS", ["ABNORMAL FINDINGS"])

In [46]:
df = merge_several_columns(df, "EVENTS", ['SEIZURE EVENTS', 'SEIZURES OR EPISODES', 'EVENT', 'EPISODES', 'CLINICAL EVENTS',
                                          "EPISODES OR EVENTS", "EPISODES DURING THE RECORDING", "REFERRING FOR STUDY",
                                          "EVENTS OF PUSHBUTTON", "SEIZURES", "SEIZURE ACTIVITY"])

In [47]:
df = merge_several_columns(df, "TECHNICAL DIFFICULTIES", ["TECHNICAL PROBLEMS", "TECHNICAL DIFFICULTY", "CLINICAL DIFFICULTIES", "TECHNICAL DISCHARGES", 
                                                          "TECHNICAL NOTES", "TECHNICAL ISSUES", "TECHNIQUE DIFFICULTIES", "TECHNICAL CONSIDERATIONS",
                                                          "TECHNICAL QUALITY", "TECHNICAL", "ARTIFACTS"])

In [48]:
df = merge_several_columns(df, "CONDITION OF THE RECORDING", ["CONDITIONS OF THE RECORDING", "CONDITION OF RECORDING", 
                                                              "CONDITIONS OF RECORDING"])

In [49]:
df = merge_several_columns(df, "REASON FOR STUDY", ["REASON", "REASON FOR STUDIES", "REASON FOR EGG", "REASON FOR THE STUDY"])

In [50]:
df = merge_several_columns(df, "FINDINGS", ["DIAGNOSES", "DIAGNOSIS", "ABNORMAL DISCHARGES", "ABNORMAL DISCHARGE", 
                                            "EEG", "RECOMMENDATIONS"])  # not sure about merging recommendations

In [51]:
df = merge_several_columns(df, "PAST MEDICAL HISTORY", ["PAST HISTORY"])

In [52]:
df = merge_several_columns(df, "ACTIVATION PROCEDURES", ["ACTIVATION PROCEDURE", "ACTIVATING PROCEDURES", ])

In [53]:
df = merge_several_columns(df, "REASON FOR STUDY", ["REASON FOR EEG", "REASON FOR PROCEDURE"])

In [54]:
for drop_column in ["RECORDING TIMES", "RECORDING START TIME", "RECORDING END TIME", "RECORD FINISH TIME", "RECORD START TIME", 
                    "TOTAL LENGTH OF THE RECORDING", "RECORDING LENGTH", "TIME OF RECORDING", "LENGTH OF ELECTROENCEPHALOGRAM", 
                    "EEG LENGTH", "LENGTH OF EEG", "LENGTH OF PROCEDURE", "LENGTH OF THE RECORDING", "LENGTH OF THE EEG", 
                    "LENGTH OF RECORDING", "STUDY DATE", "DATE OF RECORDING", "EGG LENGTH", "TIME", "DURATION OF STUDY", 
                    "STUDY DURATION", "DATE OF THE RECORDING", "DATE OF STUDY", "DATES OF STUDY", "DT", "DD", "DENTAL PROBLEMS", 
                    "STAGES", "REASON FOR SEIZURES", "SEIZURES OR PUSHBUTTON EVENTS", "FEATURES", "INPATIENT ROOM", "EKG",
                    "DATE", "SLEEP"]:
    df = df.drop(drop_column, axis=1)

In [55]:
df.head(3)

Unnamed: 0,CLINICAL CORRELATION,CLINICAL HISTORY,DESCRIPTION OF THE RECORD,IMPRESSION,INTRODUCTION,LABEL,MEDICATIONS,PATH,SESSION,SET,...,REASON FOR STUDY,TECHNICAL DIFFICULTIES,NOTE,EVENTS,FINDINGS,ACTIVATION PROCEDURES,PAST MEDICAL HISTORY,TYPE OF STUDY,CONDITION OF THE RECORDING,HEART RATE
0,This prolonged outpatient sleep deprived EEG w...,"Schizophrenia, memory loss.","In wakefulness, background EEG is well organiz...",Normal EEG.,Digital video EEG is performed in the lab usin...,normal,"Geodon, simvastatin, benztropine, Norvasc.",/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...,s001_2009_09_03,eval,...,,,,,,,,,,
1,This is the third normal EEG for this individu...,Seizures.,"In wakefulness, there is a 9.8 Hz alpha rhythm...",Normal electroencephalogram.,Digital video EEG is performed in the lab usin...,normal,"Dilantin, Lipitor.",/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...,s003_2009_09_03,eval,...,,,,,,,,,,
2,No focal nor epileptiform features are identif...,Sixty-five-year-old woman with a previous eval...,The background EEG is appropriately organized ...,Normal EEG in wakefulness.,Digital video EEG is performed in the lab usin...,normal,Norvasc.,/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...,s001_2009_09_04,eval,...,,,,,,,,,,


In [56]:
print([c for c in df.columns])

['CLINICAL CORRELATION', 'CLINICAL HISTORY', 'DESCRIPTION OF THE RECORD', 'IMPRESSION', 'INTRODUCTION', 'LABEL', 'MEDICATIONS', 'PATH', 'SESSION', 'SET', 'SUBJECT', 'REASON FOR STUDY', 'TECHNICAL DIFFICULTIES', 'NOTE', 'EVENTS', 'FINDINGS', 'ACTIVATION PROCEDURES', 'PAST MEDICAL HISTORY', 'TYPE OF STUDY', 'CONDITION OF THE RECORDING', 'HEART RATE']


In [75]:
len(df) - pd.isna(df).sum()

CLINICAL CORRELATION          2666
CLINICAL HISTORY              2952
DESCRIPTION OF THE RECORD     2956
IMPRESSION                    2970
INTRODUCTION                  2826
LABEL                         2993
MEDICATIONS                   2920
PATH                          2993
SESSION                       2993
SET                           2993
SUBJECT                       2993
REASON FOR STUDY               716
TECHNICAL DIFFICULTIES         681
NOTE                             1
EVENTS                         563
FINDINGS                       888
ACTIVATION PROCEDURES            8
PAST MEDICAL HISTORY            18
TYPE OF STUDY                   15
CONDITION OF THE RECORDING     115
HEART RATE                    1478
dtype: int64

checking individual cases

In [81]:
column = "INTRODUCTION"

In [82]:
df[column][~pd.isna(df[column])]

0       Digital video EEG is performed in the lab usin...
1       Digital video EEG is performed in the lab usin...
2       Digital video EEG is performed in the lab usin...
3       A 38-year-old right-handed woman with epilepsy...
4       Digital video EEG was performed in lab using s...
5       Digital video EEG was performed in lab using s...
6       Digital video EEG was performed in lab using s...
7       Digital video EEG was performed at bedside usi...
8       Digital video EEG was performed in lab using s...
9       Digital video EEG was performed in lab using s...
10      Digital video EEG was performed in lab/bed usi...
11      Digital video EEG was performed in lab using s...
12      Digital video EEG was performed in lab using s...
13      Digital video EEG was performed in lab using s...
14      Digital video EEG was performed in lab using s...
15      Digital video EEG was performed in lab using s...
16      Digital video EEG was performed in lab using s...
17      Digita

In [65]:
df.iloc[1943]

CLINICAL CORRELATION                                                        NaN
CLINICAL HISTORY              The patient is a 16-year-old man with episodes...
DESCRIPTION OF THE RECORD     The posterior dominant rhythm consists of a we...
IMPRESSION                    This is a normal awake and briefly drowsy 22-m...
INTRODUCTION                  Digital video EEG is performed using standard ...
LABEL                                                                    normal
MEDICATIONS                                                             Keppra.
PATH                          /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...
SESSION                                                         s001_2012_09_24
SET                                                                       train
SUBJECT                                                                00009703
REASON FOR STUDY                                                            NaN
TECHNICAL DIFFICULTIES                  

In [305]:
df["CLINICAL COURSE"][~pd.isna(df["CLINICAL COURSE"])]

471    These findings are similar to a previous study...
Name: CLINICAL COURSE, dtype: object

In [269]:
df["TECHNICAL DISCHARGES"][~pd.isna(df["TECHNICAL DISCHARGES"])]

300    : None
Name: TECHNICAL DISCHARGES, dtype: object

In [270]:
df["ABNORMAL DISCHARGE"][~pd.isna(df["ABNORMAL DISCHARGE"])]

297    None.
299    None.
Name: ABNORMAL DISCHARGE, dtype: object

In [277]:
df["CLINICAL DIFFICULTIES"][~pd.isna(df["CLINICAL DIFFICULTIES"])]

219    Increased temporalis muscle artifact throughou...
350                                                None.
Name: CLINICAL DIFFICULTIES, dtype: object

In [272]:
df["PATH"].iloc[299]

'/data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/083/00008366/s001_2011_10_24/00008366_s001.txt'

In [273]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/083/00008366/s001_2011_10_24/00008366_s001.txt

CLINICAL HISTORY:  71 year old right handed man presents for left CEA.  Past medical history significant for diabetes, hypertension, coronary artery disease, gout, BPH, anxiety, depression, and GERD.
MEDICATIONS:  Metformin, Zocor, Topamax.
REASON FOR EEG:  Baseline for CEA.
INTRODUCTION:  Digital video EEG was performed in lab using standard 10-20 electrode placement system with anterior temporal and EKG electrodes.  The EEG was recorded in wakefulness and photic stimulation was performed.
TECHNICAL DIFFICULTIES:  None.
DESCRIPTION OF THE RECORD:  The record opens to a well-formed posterior dominant rhythm at 9 to10 Hz and amplitude of 20-60 microvolts.  This is reactive to eye opening.  Normal frontocentral beta is seen.  The EEG is only recorded during wakefulness.  Photic stimulation does not reveal any abnormal discharges.
ABNORMAL DISCHARGE:  None.
SEIZURES:    None.
IMPRESSION:  Normal awake EEG.
CLINICAL CORRELATION:  No seizures or epileptiform discharges were seen.





In [334]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/067/00006766/s001_2010_07_06/00006766_s001.txt

CLINICAL HISTORY: 26 year old right handed woman with epilepsy, multiple different types of seizures occurring quite frequently, increasing over the past week.

MEDICATIONS: Keppra.

INTRODUCTION: Digital video EEG was performed in lab using standard 10-20 system of electrode placement with 1 channel EKG. Hyperventilation and photic stimulation are performed.

DESCRIPTION OF THE RECORD: In wakefulness, there Is a 10Hz, 40 microvolt posterior dominant rhythm. Features of drowsiness include slow rolling eye movements followed by POSTS. Vertex waves are observed in stage I sleep. Stage II sleep was characterized by vertex waves and spindles. Hyperventilation is performed with good effort, producing an increase in amplitude of the background. Photic stimulation elicits bilateral driving.

HR: 80 bpm

IMPRESSION: Normal EEG.

CLINICAL CORRELATION: These findings are identical to previous studies which include a 2007 prolonged EEG monitoring session which identified multiple behaviors withou

df.to_csv("/data/schirrmr/gemeinl/tuh-abnormal-eeg/reports/reports_{}_{}.csv".format(i_start, i_stop))

In [115]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/103/00010316/s001_2013_05_14/00010316_s001.txt

CLINICAL HISTORY:  A 24-year-old male with new onset seizures one year ago after toxic ingestion.  The patient subsequently had 4 seizures after alcohol consumption on May 11 and a seizure May 12, a 15-20 minute tonic clonic seizure with postictal confusion.
MEDICATIONS:  Depakote, buspirone, trazodone, Cymbalta, and others.
INTRODUCTION:  Digital video EEG was performed in the lab using standard 10-20 system of electrode placement with 1 channel EKG.  Hyperventilation and photic stimulation were completed.
TECHNICAL:  There is some muscle artifact at T4, otherwise satisfactory.
DESCRIPTION OF THE RECORD:  In wakefulness, there is a 10 Hz 40 mV alpha rhythm with a small amount of low voltage, frontal central beta.
Hyperventilation was performed early in the record producing an increase in amplitude.  Drowsiness is characterized by slowing of the alpha rhythm with rhythmic central theta, multiple arousals in and out of wakefulness and drowsiness were noted.  Deeper stages of sleep were 

In [178]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/100/00010026/s001_2013_02_19/00010026_s001.txt

MEDICATIONS:  Tegretol
HISTORY:  A 28-year-old woman with recurrent black outs with an aura of left posterior headaches since the age of 13, date of most recent event unknown.
INTRODUCTION:  Digital video EEG was performed in the lab using standard 10.20  system with 1-channel EKG.  Hyperventilation was completed.  Photic stimulation was not completed.
DESCRIPTION OF BACKGROUND:  In apparent wakefulness, there is an 8 Hertz alpha rhythm with a background with excess theta.  Testing was performed to assure maximal wakefulness and there is some improvement that the background remains with a small amount of excess theta.  Hyperventilation was performed early on in the record producing an increase in amplitude of the background as well as some slowing and head rocking artifact.  Head rocking artifact is noted in the occipitoparietal region, right more so than the left.
Deeper stages of sleep were not sustained.  The technologist did extend the EEG but did not capture sleep, although testin

In [203]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/022/00002253/s003_2011_05_12/00002253_s003.txt

EEG REPORT
CLINICAL HISTORY: 29 year old right handed male with a past gunshot wound to the head, left encephalomalacia, parietal lobe injury, now with recurrent seizures after the patient stopped his medications.
MEDICATIONS: Keppra, Lamictal, Lovenox
INTRODUCTION: Digital video EEG was performed in lab using standard 10-20 system of electrode placement with 1 channel of EKG. hyperventilation and photic stimulation were completed.
DESCRIPTION OF THE RECORD: In wakefulness, there is an 8.3 Hz alpha rhythm present bilaterally but somewhat disrupted on the left compared to the right. A mu rhythm is also noted in the central regions, although specific testing is not performed. Focal slowing is noted from the left hemisphere with disruption of faster frequency activity and the presence of theta activity in the left mid to posterior quadrant. Hyperventilation accentuates the focal slowing. Vertex waves, spindles and POSTS are observed in stage II sleep. As the patient transitions to sleep, 

In [215]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/000/00000068/s010_2013_01_21/00000068_s010.txt

BASELINE EEG
CLINICAL HISTORY:  A 45-year-old woman with a strong family history of epilepsy, nocturnal tonic-clonic seizures, myoclonic jerks.  Vagal nerve stimulator.  There is a history of head trauma.
MEDICATIONS:  Dilantin, Lamictal.
INTRODUCTION:  Digital video EEG is performed in the lab using standard 10-20 system of electrode placement with one channel of EKG.  Hyperventilation and photic stimulation were completed.
DESCRIPTION OF THE RECORD:  In wakefulness, there is a 10-Hz alpha rhythm but there is a small amount of 7-Hz theta noted in the background.  Hyperventilation was performed early on in the record and produced an increase in theta.  As the patient transitioned in and out of stage 2 sleep, rare spike and slow-wave complexes were identified.  There was a burst of high-amplitude spike and polyspike activity in sleep which was generalized.  There was an additional subtle spike in sleep noted in the right frontal region.  Sleep was not sustained.
Photic stimulation was p

In [248]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/085/00008512/s001_2012_01_09/00008512_s001.txt

CLINICAL HISTORY:  60 year old male with multiple focal seizures involving right arm with chewing movements.  Past MCA stroke, hypertension, aphasia, pacemaker.
MEDICATIONS:  Depakote, Ativan
INTRODUCTION:  Digital video EEG was performed at bedside using standard 10-20 system of electrode placement with 1 channel EKG.  The patient has clinical seizures characterized by facial twitching and involuntary movements of the right arm.
DESCRIPTION OF THE RECORD:  As the tracing begins the patient has been having seizures and continues to have seizures.  The overall EEG is sharply contoured, almost a chaotic pattern, and intermittently discontinuous.  Left frontal sharp waves precede the seizure and the seizure includes a mixture of frontal and left hemispheric sharp activity and fast activity.  The activity is maximum in the front, but is really seen throughout the left hemisphere in the parasagittal region.  The fast activity is prominent in the central regions.  Some of the activity spread

In [261]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/085/00008512/s001_2012_01_09/00008512_s001.txt

CLINICAL HISTORY:  60 year old male with multiple focal seizures involving right arm with chewing movements.  Past MCA stroke, hypertension, aphasia, pacemaker.
MEDICATIONS:  Depakote, Ativan
INTRODUCTION:  Digital video EEG was performed at bedside using standard 10-20 system of electrode placement with 1 channel EKG.  The patient has clinical seizures characterized by facial twitching and involuntary movements of the right arm.
DESCRIPTION OF THE RECORD:  As the tracing begins the patient has been having seizures and continues to have seizures.  The overall EEG is sharply contoured, almost a chaotic pattern, and intermittently discontinuous.  Left frontal sharp waves precede the seizure and the seizure includes a mixture of frontal and left hemispheric sharp activity and fast activity.  The activity is maximum in the front, but is really seen throughout the left hemisphere in the parasagittal region.  The fast activity is prominent in the central regions.  Some of the activity spread

In [268]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/082/00008251/s001_2011_11_26/00008251_s001.txt

68 year old female with a history of PCA infant who presents with
MEDICATIONS: None.
REASON FOR EEG: Seizures.
Long term digital video EEG monitoring was performed at bedside over 20
hours using the standard 20 electrode placement with additional interior temporal and EKG
electrodes. The patient was unresponsive during the recording. No activating procedures were
performed.
TECHNICAL DIFFICULTIES : None.
DESCRIPTION OF RECORD: The record opens to a diffusely background in the 2-4
Hz frequency. No abnormal posterior dominant rhythm is seen or central beta. No
abnormal sleep architecture is seen. No activating procedures are
ABNORMAL DISCHARGES: Generalized slow waves at 2-4 Hz an amplitude 20-30 mV with
frontal intermittent rhythmic delta (FIRDA).
NOTE: During the recording the EEG continues to increase in the frequencies to a more of a
theta range in the 5-7 Hz frequency, which still remains diffusely During stimulation there
is an increase in the frequency and there is multiple artifa

In [77]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/083/00008337/s001_2011_06_30/00008337_s001.txt

EEG NUMBER: 11-915  HISTORY: This Is a 62-year-old right-handed male admitted 6/27/2011 with difficulty speaking, dysphagia, facial droop. PMH: migraine headaches and CABG.
MEDICATIONS: None reported.
INTRODUCTION: Digital video EEG Is performed in the lab using standard 10-20 system of electrode placement with one-channel EKG. Photic stimulation is performed.
DESCRIPTION OF RECORD: In wakefulness, there is a 10Hz alpha rhythm, but there is a small amount of 8Hz activity In the background. In drowsiness, there is a small amount of shifting temporal theta. Brief epochs of stage I sleep include vertex waves. Photic stimulation elicits scant driving. The driving is visualized with digital analysis of the EEG.
Heart rate 90 bpm.
IMPRESSION: This EEG is within normal limits for age.
CLINICAL CORRELATION: No epileptiform features are observed. Normal EEG doesn't exclude a diagnosis of epilepsy.






In [99]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/065/00006535/s005_2012_07_18/00006535_s005.txt

CLINICAL HISTORY:   44 year old man with recurrent seizures.
MEDICATIONS:  Vimpat, phenobarbital, and Topamax.
REASON FOR STUDY:  Seizure.
INTRODUCTION:  Digital video long-term EEG was performed using the standard 10-20 electrode placement system with additional anterior temporal and single-lead EKG electrodes.  The patient was recorded during wakefulness and sleep.  Activating procedures were not performed.  Continuous spike detection software as well as seizure detection software and nurses' alarm noted events were used to review the EEG.
TECHNICAL DIFFICULTIES:  None.
DESCRIPTION OF THE RECORD:  The record opens to a well-defined posterior dominant rhythm of 9 to 10 Hz which is reactive to eye opening.  There is a normal amount of frontocentral beta.  Activating procedures were not performed.  The patient is recorded in wakefulness and stage I, stage II, and deeper stages of sleep.
ABNORMAL DISCHARGES:
Focal sharp waves at F7/T3 seen occasionally in wakefulness and sleep.
Focal slo

In [140]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/106/00010639/s001_2013_08_22/00010639_s001.txt

REASON:  Evaluate for encephalopathy.
CLINICAL HISTORY:  A 61-year-old woman with previous left MCA CVA, MR, CHF, and CAD.
INTRODUCTION:  Digital video EEG was performed in the lab using standard 10-20 system of electrode placement with one channel of EKG.  Hyperventilation was deferred.  Photic stimulation was completed.  This is routine 24 minute  EEG which is technically acceptable.
DESCRIPTION OF THE RECORD:  In wakefulness, there is a 9 Hz alpha rhythm which is disrupted on the right compared to the left.  The right hemisphere demonstrates nearly continuous focal slowing with disruption of faster frequency activity and a mixture of arrhythmic delta and theta.  The focal slowing is of medium amplitude.  Features of drowsiness include bilateral slowing, but the asymmetry persists into stage 1 sleep.  Photic stimulation elicits bilateral driving.
Heart rate 72 BPM.
IMPRESSION:  This is an abnormal EEG due to:
Continuous right-sided focal slowing with disruption of faster frequency ac

In [178]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/089/00008913/s001_2012_07_12/00008913_s001.txt

CLINICAL HISTORY: 64 year old woman with a history of seizures since she age 15, now having seizures at night, incontinent at times, postictal confusion lasting 5-10 minutes.
MEDICATIONS:  Tegretol
REASON FOR SEIZURES:  Seizures.
INTRODUCTION:  Digital video routine EEG was performed using the standard 10-20 electrode placement system with additional anterior temporal and single lead EKG electrode.  The patient was recorded in wakefulness, stage I, and stage II sleep. Activating procedures, including photic stimulation and hyperventilation were performed.
TECHNICAL DIFFICULTIES:  None.
DESCRIPTION OF THE RECORD:  The record opens to a well defined posterior dominant rhythm that reaches 9.5 Hz and is reactive to eye opening.  There is normal amount of frontal central beta.  Activating procedures, including hyperventilation and photic stimulation, produce no abnormal discharges.  The patient reached stage I and stage II sleep.
ABNORMAL DISCHARGES:  None.
SEIZURES:  None.
IMPRESSION:  Nor

In [360]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/092/00009231/s002_2012_10_06/00009231_s002.txt

CLINICAL HISTORY:  Adult female with advanced dementia, schizophrenia, epilepsy, status post aspiration, found comatose in the early morning hours of the 4th, initially comatose lacking in brainstem signs now with repetitive eye blink.
MEDICATIONS:  Depakote and Versed.  Many others.
INTRODUCTION:  Digital video EEG is performed at the bedside using standard 10-20 system of electrode placement with 1 channel of EKG.  Hyperventilation and photic stimulation are performed.
October 6-7
DESCRIPTION OF THE RECORD:
As the tracing began, there was a great deal of muscle artifact in the EEG.  In addition to the eye blink artifact, there is a periodic complex identified in the frontocentral regions with a broad field of spread in the parasagittal regions.  When reviewed with an a CZ reference, CZ appears to be active.  There are frontal, periodic, discrete sharp waves or spikes noted.
Throughout the recording, a Versed drip is initiated.  The Versed drip is initiated at 11:29.  At 11:48, the pa

In [540]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/eval/abnormal/01_tcp_ar/100/00010003/s001_2013_02_18/00010003_s001.txt

INPATIENT ROOM:  779
CLINICAL HISTORY:  This is a 79-year-old female with recent left MCA stroke, on warfarin with change in mental status with abrupt onset.
PAST MEDICAL HISTORY:  Significant for the chronic kidney disease, on hemodialysis; myocardial infarction; hypertension.
MEDICATIONS:  Levetiracetam, Norvasc, terazosin, labetalol, aspirin.
INTRODUCTION:  Digital video EEG was performed in lab using standard 10-20 system of electrode placement with one channel EKG.  Hyperventilation was not performed.  Photic stimulation was performed.
DESCRIPTION OF THE RECORD:  In the most alert state, the posterior dominant rhythm was 8.5 to 9 Hz performed in the right occipital region, which attenuates with eye opening.  There is hemispheric asymmetry, which is characterized by left hemispheric attenuation and slowing with loss of faster frequencies.  There is also almost continuous left temporal delta activity maximal at T3/T1 during this recording.  There are occasional sharp wave discharges

In [549]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/092/00009231/s002_2012_10_06/00009231_s002.txt

CLINICAL HISTORY:  Adult female with advanced dementia, schizophrenia, epilepsy, status post aspiration, found comatose in the early morning hours of the 4th, initially comatose lacking in brainstem signs now with repetitive eye blink.
MEDICATIONS:  Depakote and Versed.  Many others.
INTRODUCTION:  Digital video EEG is performed at the bedside using standard 10-20 system of electrode placement with 1 channel of EKG.  Hyperventilation and photic stimulation are performed.
October 6-7
DESCRIPTION OF THE RECORD:
As the tracing began, there was a great deal of muscle artifact in the EEG.  In addition to the eye blink artifact, there is a periodic complex identified in the frontocentral regions with a broad field of spread in the parasagittal regions.  When reviewed with an a CZ reference, CZ appears to be active.  There are frontal, periodic, discrete sharp waves or spikes noted.
Throughout the recording, a Versed drip is initiated.  The Versed drip is initiated at 11:29.  At 11:48, the pa

In [639]:
re.findall(categoy_pattern, "INTRODUCTION:  Digital video EEG is performed at the bedside using standard 10-20 system of electrode placement with 1 channel of EKG.  Hyperventilation and photic stimulation are performed."
"October 6-7\n"
"DESCRIPTION OF THE RECORD:\n"
"As the tracing began, there was a great deal of muscle artifact in the EEG.  In addition to the eye blink artifact, there", re.MULTILINE)

['INTRODUCTION', 'DESCRIPTION OF THE RECORD']

In [760]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/092/00009213/s002_2012_09_17/00009213_s002.txt

CLINICAL HISTORY:  53 year old left handed male with history of traumatic brain injury in 2002 with subsequent seizures, having about 2 generalized tonic-clonic seizure a week, reports recent seizure activity.
MEDICATIONS:  Dilantin
REASON FOR STUDY:  Seizures.
INTRODUCTION:  Digital video routine EEG was performed using the standard 10-20 electrode placement system with additional anterior temporal and single-lead EKG electrode.  The patient was recorded during wakefulness and drowsiness.  Activating procedures were performed including hyperventilation and photic stimulation.
TECHNICAL DIFFICULTIES:  Some T3 and T4 electrode artifact.
DESCRIPTION OF THE RECORD:  The record opens to a posterior dominant rhythm that at best reach 7-8 Hz which appears to be somewhat reactive to eye opening.  There is some frontocentral beta seen.  Activating procedures including hyperventilation produces some accentuation of the focal slowing, which I will mention below.  Photic stimulation produced no a

In [836]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/065/00006535/s005_2012_07_18/00006535_s005.txt

CLINICAL HISTORY:   44 year old man with recurrent seizures.
MEDICATIONS:  Vimpat, phenobarbital, and Topamax.
REASON FOR STUDY:  Seizure.
INTRODUCTION:  Digital video long-term EEG was performed using the standard 10-20 electrode placement system with additional anterior temporal and single-lead EKG electrodes.  The patient was recorded during wakefulness and sleep.  Activating procedures were not performed.  Continuous spike detection software as well as seizure detection software and nurses' alarm noted events were used to review the EEG.
TECHNICAL DIFFICULTIES:  None.
DESCRIPTION OF THE RECORD:  The record opens to a well-defined posterior dominant rhythm of 9 to 10 Hz which is reactive to eye opening.  There is a normal amount of frontocentral beta.  Activating procedures were not performed.  The patient is recorded in wakefulness and stage I, stage II, and deeper stages of sleep.
ABNORMAL DISCHARGES:
Focal sharp waves at F7/T3 seen occasionally in wakefulness and sleep.
Focal slo

In [70]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/abnormal/01_tcp_ar/095/00009578/s003_2013_03_01/00009578_s003.txt

REASON FOR STUDY:  Seizures.



CLINICAL HISTORY:  This is a 25-year-old man with history of a left cerebellar tumor resection status post chemo and radiation, and epilepsy secondary to the tumor, presents with recurrent seizures with right-sided shaking and confusion, as well as paralysis.



MEDICATIONS:  Keppra, Tylenol, Lovenox, Dilantin and clindamycin.



INTRODUCTION:  A routine EEG was performed using the standard 10-20 electrode placement system with anterior temporal and single lead EKG electrode.  The patient was recorded while awake.  Activating procedures were not performed.



DESCRIPTION OF THE RECORD:  The record opens to a posterior dominant rhythm that can only be seen over the right hemisphere that reaches 8.5 Hz.  There is some frontocentral beta seen over the right hemisphere only.  The patient was recorded only in wakefulness.  Activating procedures were not performed.



HEART RATE:  74.



ABNORMAL DISCHARGES:  Focal slow wave seen continuously over the left hem

In [83]:
!cat /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2.0.0/edf/train/normal/01_tcp_ar/098/00009896/s001_2013_04_24/00009896_s001.txt

REASON FOR STUDY:  Seizure.
CLINICAL HISTORY:  A 56-year-old woman with complaints of anxiety for one year, increased recently and now possible seizures.  Past medical history of hypertension and thyroid disorder.
MEDICATIONS:  Clonazepam, lisinopril, levothyroxine, simvastatin, Travatan.
INTRODUCTION:  A routine EEG was performed using a standard 10-20 electrode placement system with anterior temporal single lead EKG electrode.  The patient was recorded during wakefulness and sleep stage I.  Activating procedures included photic stimulation and hyperventilation.
TECHNICAL DIFFICULTIES:  Some T5 electrode artifact throughout the recording.
DESCRIPTION OF THE RECORD:  The record opens to a posterior dominant rhythm in the range of 9-10 Hz which is reactive to eye opening.  There is an excessive amount of beta throughout the recording.  Activating procedures including hyperventilation, photic stimulation produced no abnormal discharges.  The patient was recorded in wakefulness and stage 

In [86]:
df.iloc[2561]

CLINICAL CORRELATION                                                        NaN
CLINICAL HISTORY                                                            NaN
DESCRIPTION OF THE RECORD                                                   NaN
IMPRESSION                                                                  NaN
INTRODUCTION                                                                NaN
LABEL                                                                    normal
MEDICATIONS                                                                 NaN
PATH                          /data/schirrmr/gemeinl/tuh-abnormal-eeg/raw/v2...
SESSION                                                         s001_2013_04_24
SET                                                                       train
SUBJECT                                                                00009896
REASON FOR STUDY                                                            NaN
TECHNICAL DIFFICULTIES                  