1. Import Libraries

In [33]:
import pandas as pd
import csv

2. Load the EEG Data

In [34]:
import pandas as pd
import re
from collections import Counter  # (optional)

# -------- CONFIG --------
path = r"C:\Self Learning\Research Papers\UOW Research Papers\eSport Players with EEG Data\EEG Data\Amin1\Amin1_EPOC_186248_2023.05.25T01.15.44+01.00.md.mc.pm.fe.bp.csv"
expected_min_cols = 170
encodings_to_try = ["utf-8-sig", "utf-8", "latin-1"]

candidates = {
    ",":       {"is_regex": False},
    ";":       {"is_regex": False},
    "\t":      {"is_regex": False},
    "|":       {"is_regex": False},
    r"\s+":    {"is_regex": True},
}

# -------- STEP 1: decode a text sample for analysis --------
raw = None
text = None
used_encoding = None

for enc in encodings_to_try:
    try:
        with open(path, "rb") as f:
            raw = f.read()
        text = raw.decode(enc, errors="strict")
        used_encoding = enc
        break
    except Exception:
        continue

if text is None:
    raise RuntimeError("Could not decode file with utf-8-sig, utf-8, or latin-1.")

lines = text.splitlines()
sample_n = min(200, len(lines))
sample_lines = lines[:sample_n]

# -------- STEP 2: detect the header line and the best delimiter --------
def field_count(line, sep, is_regex):
    if not line.strip():
        return 0
    if is_regex:
        parts = re.split(sep, line.strip())
    else:
        parts = line.strip().split(sep)
    return len(parts)

best = None  # (max_fields, line_idx, sep, is_regex)

for idx, line in enumerate(sample_lines):
    if not line.strip():
        continue
    for sep, meta in candidates.items():
        cnt = field_count(line, sep, meta["is_regex"])
        if best is None or cnt > best[0]:
            best = (cnt, idx, sep, meta["is_regex"])

if best is None:
    raise RuntimeError("Could not identify any plausible header line.")

best_count, header_idx, best_sep, best_is_regex = best

if best_count < 50:
    print(f"⚠️ Detected only {best_count} fields on line {header_idx}. Continuing anyway.")

# -------- STEP 3: read the file from the detected header line --------
read_kwargs = {
    "encoding": used_encoding,
    "header": 0,                  # the first row after skiprows is the header
    "skiprows": header_idx,       # skip everything before the detected header
    "dtype": str,                 # keep 1:1 columns
    "quotechar": '"',
    "doublequote": True,
    "na_filter": False,
    # "on_bad_lines": "skip",     # uncomment if you hit malformed rows
}

if best_is_regex:
    read_kwargs["sep"] = best_sep
    read_kwargs["engine"] = "python"
else:
    read_kwargs["sep"] = best_sep
    read_kwargs["engine"] = "c"

df = pd.read_csv(path, **read_kwargs)

# -------- STEP 4: diagnostics and hard check --------
print(f"Detected encoding: {used_encoding}")
print(f"Detected header line index: {header_idx}")
print(f"Detected separator: {'REGEX ' if best_is_regex else ''}{best_sep}")
print(f"Shape: {df.shape}")

if df.shape[1] < expected_min_cols:
    hdr_preview = sample_lines[header_idx][:200].replace("\t", "\\t")
    print(f"⚠️ Columns < {expected_min_cols}. Header preview: {hdr_preview}")

assert df.shape[1] >= expected_min_cols, (
    f"Only {df.shape[1]} columns parsed; expected ≥ {expected_min_cols} (A..FN)."
)

print(df.columns.tolist()[:12])
print(df.head(3))


Detected encoding: utf-8-sig
Detected header line index: 1
Detected separator: ,
Shape: (55538, 170)
['Timestamp', 'OriginalTimestamp', 'EEG.Counter', 'EEG.Interpolated', 'EEG.AF3', 'EEG.F7', 'EEG.F3', 'EEG.FC5', 'EEG.T7', 'EEG.P7', 'EEG.O1', 'EEG.O2']
           Timestamp  OriginalTimestamp EEG.Counter EEG.Interpolated  \
0  1684973744.610299  1684973744.610602    4.000000         0.000000   
1  1684973744.618119  1684973744.618322    5.000000         0.000000   
2  1684973744.625939  1684973744.626141    6.000000         0.000000   

       EEG.AF3       EEG.F7       EEG.F3      EEG.FC5       EEG.T7  \
0  3728.205078  4473.846191  4032.820557  4274.871582  4424.102539   
1  3716.410156  4469.743652  4026.666748  4272.307617  4424.102539   
2  3720.512939  4472.307617  4032.307617  4274.871582  4425.641113   

        EEG.P7       EEG.O1       EEG.O2       EEG.P8       EEG.T8  \
0  4455.897461  4358.974121  3968.205078  4309.743652  3738.974365   
1  4459.487305  4342.051270  3962.564

In [35]:
data = df.copy()

3. Data Preprocessing

3.1. Extract the information of the dataset

In [36]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55538 entries, 0 to 55537
Columns: 170 entries, Timestamp to POW.AF4.Gamma
dtypes: object(170)
memory usage: 72.0+ MB


In [37]:
data.columns

Index(['Timestamp', 'OriginalTimestamp', 'EEG.Counter', 'EEG.Interpolated',
       'EEG.AF3', 'EEG.F7', 'EEG.F3', 'EEG.FC5', 'EEG.T7', 'EEG.P7',
       ...
       'POW.F8.Theta', 'POW.F8.Alpha', 'POW.F8.BetaL', 'POW.F8.BetaH',
       'POW.F8.Gamma', 'POW.AF4.Theta', 'POW.AF4.Alpha', 'POW.AF4.BetaL',
       'POW.AF4.BetaH', 'POW.AF4.Gamma'],
      dtype='object', length=170)

In [38]:
pd.set_option('display.max_columns', None)  # Show every column (no truncation)
pd.set_option('display.width', 0)           # Let pandas auto-size the table to the notebook width
data.head()

Unnamed: 0,Timestamp,OriginalTimestamp,EEG.Counter,EEG.Interpolated,EEG.AF3,EEG.F7,EEG.F3,EEG.FC5,EEG.T7,EEG.P7,EEG.O1,EEG.O2,EEG.P8,EEG.T8,EEG.FC6,EEG.F4,EEG.F8,EEG.AF4,EEG.RawCq,EEG.Battery,EEG.BatteryPercent,MarkerIndex,MarkerType,MarkerValueInt,EEG.MarkerHardware,CQ.AF3,CQ.F7,CQ.F3,CQ.FC5,CQ.T7,CQ.P7,CQ.O1,CQ.O2,CQ.P8,CQ.T8,CQ.FC6,CQ.F4,CQ.F8,CQ.AF4,CQ.Overall,EQ.SampleRateQuality,EQ.OVERALL,EQ.AF3,EQ.F7,EQ.F3,EQ.FC5,EQ.T7,EQ.P7,EQ.O1,EQ.O2,EQ.P8,EQ.T8,EQ.FC6,EQ.F4,EQ.F8,EQ.AF4,MOT.CounterMems,MOT.InterpolatedMems,MOT.GyroX,MOT.GyroY,MC.Action,MC.ActionPower,MC.IsActive,PM.Engagement.IsActive,PM.Engagement.Scaled,PM.Engagement.Raw,PM.Engagement.Min,PM.Engagement.Max,PM.Excitement.IsActive,PM.Excitement.Scaled,PM.Excitement.Raw,PM.Excitement.Min,PM.Excitement.Max,PM.LongTermExcitement,PM.Stress.IsActive,PM.Stress.Scaled,PM.Stress.Raw,PM.Stress.Min,PM.Stress.Max,PM.Relaxation.IsActive,PM.Relaxation.Scaled,PM.Relaxation.Raw,PM.Relaxation.Min,PM.Relaxation.Max,PM.Interest.IsActive,PM.Interest.Scaled,PM.Interest.Raw,PM.Interest.Min,PM.Interest.Max,PM.Focus.IsActive,PM.Focus.Scaled,PM.Focus.Raw,PM.Focus.Min,PM.Focus.Max,FE.BlinkWink,FE.HorizontalEyesDirection,FE.UpperFaceAction,FE.UpperFaceActionPower,FE.LowerFaceAction,FE.LowerFaceActionPower,POW.AF3.Theta,POW.AF3.Alpha,POW.AF3.BetaL,POW.AF3.BetaH,POW.AF3.Gamma,POW.F7.Theta,POW.F7.Alpha,POW.F7.BetaL,POW.F7.BetaH,POW.F7.Gamma,POW.F3.Theta,POW.F3.Alpha,POW.F3.BetaL,POW.F3.BetaH,POW.F3.Gamma,POW.FC5.Theta,POW.FC5.Alpha,POW.FC5.BetaL,POW.FC5.BetaH,POW.FC5.Gamma,POW.T7.Theta,POW.T7.Alpha,POW.T7.BetaL,POW.T7.BetaH,POW.T7.Gamma,POW.P7.Theta,POW.P7.Alpha,POW.P7.BetaL,POW.P7.BetaH,POW.P7.Gamma,POW.O1.Theta,POW.O1.Alpha,POW.O1.BetaL,POW.O1.BetaH,POW.O1.Gamma,POW.O2.Theta,POW.O2.Alpha,POW.O2.BetaL,POW.O2.BetaH,POW.O2.Gamma,POW.P8.Theta,POW.P8.Alpha,POW.P8.BetaL,POW.P8.BetaH,POW.P8.Gamma,POW.T8.Theta,POW.T8.Alpha,POW.T8.BetaL,POW.T8.BetaH,POW.T8.Gamma,POW.FC6.Theta,POW.FC6.Alpha,POW.FC6.BetaL,POW.FC6.BetaH,POW.FC6.Gamma,POW.F4.Theta,POW.F4.Alpha,POW.F4.BetaL,POW.F4.BetaH,POW.F4.Gamma,POW.F8.Theta,POW.F8.Alpha,POW.F8.BetaL,POW.F8.BetaH,POW.F8.Gamma,POW.AF4.Theta,POW.AF4.Alpha,POW.AF4.BetaL,POW.AF4.BetaH,POW.AF4.Gamma
0,1684973744.610299,1684973744.610602,4.0,0.0,3728.205078,4473.846191,4032.820557,4274.871582,4424.102539,4455.897461,4358.974121,3968.205078,4309.743652,3738.974365,3954.871826,4508.205078,4233.846191,4268.717773,0.0,4.0,77.0,,,,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,33.333332,,,,,,,,,,,,,,,,,4.0,0.0,2185.0,1466.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1684973744.618119,1684973744.618322,5.0,0.0,3716.410156,4469.743652,4026.666748,4272.307617,4424.102539,4459.487305,4342.05127,3962.564209,4306.153809,3738.974365,3949.743652,4503.077148,4227.692383,4258.461426,537.0,4.0,77.0,,,,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,33.333332,1.0,25.0,4.0,3.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,3.0,4.0,5.0,0.0,2186.0,1467.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,64.0,0.072763,1.0,0.0,2.752913,0.586717,0.8056,0.708167,0.178872,1.860343,1.025542,0.844825,0.429433,0.128652,2.626009,0.768515,0.544751,0.628888,0.179261,3.233267,0.669053,1.16054,0.533477,0.15731,0.025542,0.02189,0.01701,0.027438,0.037172,0.82282,0.730236,0.990768,0.427635,0.47874,1.0708,0.970814,0.595627,0.632517,0.248471,2.393476,2.992843,0.693447,1.172032,0.698712,1.707458,1.819474,0.956803,0.803948,0.248087,0.0,0.0,0.0,0.0,0.0,2.193424,1.7109,1.281903,0.939407,0.252571,1.646644,1.745216,0.89928,1.361151,0.249985,3.761983,1.698913,1.141317,0.896365,0.267288,2.519924,1.171301,0.972003,0.813173,0.25897
2,1684973744.625939,1684973744.626141,6.0,0.0,3720.512939,4472.307617,4032.307617,4274.871582,4425.641113,4449.743652,4350.256348,3956.410156,4308.205078,3738.974365,3953.846191,4501.538574,4236.410156,4263.077148,549.0,4.0,77.0,,,,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,33.333332,,,,,,,,,,,,,,,,,6.0,0.0,2185.0,1469.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1684973744.633758,1684973744.633861,7.0,0.0,3738.974365,4484.615234,4048.205078,4285.128418,4426.153809,4441.538574,4368.205078,3964.615479,4316.410156,3738.974365,3963.076904,4507.692383,4248.205078,4276.922852,447.0,4.0,77.0,,,,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,33.333332,,,,,,,,,,,,,,,,,7.0,0.0,2186.0,1469.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1684973744.641578,1684973744.641681,8.0,0.0,3748.205078,4491.794922,4056.410156,4290.256348,4425.128418,4452.307617,4374.871582,3981.538574,4321.025879,3738.974365,3968.718018,4517.94873,4250.769043,4284.102539,470.0,4.0,77.0,,,,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,0.0,4.0,4.0,4.0,4.0,33.333332,,,,,,,,,,,,,,,,,8.0,0.0,2186.0,1470.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


3.2. Keep only the columns from E-R

In [39]:
# Keep only columns E..R (Excel letters). 
# A=0,B=1,C=2,D=3,E=4,...,R=17 (0-based). iloc stop is exclusive, so use 18.
start_idx = 4           # E
stop_idx_exclusive = 18 # R + 1

# Clamp the stop index in case the file has fewer columns
stop_idx_exclusive = min(stop_idx_exclusive, data.shape[1])

data = data.iloc[:, start_idx:stop_idx_exclusive]

# (Optional) quick check
print(data.shape)

(55538, 14)


In [40]:
data.head()

Unnamed: 0,EEG.AF3,EEG.F7,EEG.F3,EEG.FC5,EEG.T7,EEG.P7,EEG.O1,EEG.O2,EEG.P8,EEG.T8,EEG.FC6,EEG.F4,EEG.F8,EEG.AF4
0,3728.205078,4473.846191,4032.820557,4274.871582,4424.102539,4455.897461,4358.974121,3968.205078,4309.743652,3738.974365,3954.871826,4508.205078,4233.846191,4268.717773
1,3716.410156,4469.743652,4026.666748,4272.307617,4424.102539,4459.487305,4342.05127,3962.564209,4306.153809,3738.974365,3949.743652,4503.077148,4227.692383,4258.461426
2,3720.512939,4472.307617,4032.307617,4274.871582,4425.641113,4449.743652,4350.256348,3956.410156,4308.205078,3738.974365,3953.846191,4501.538574,4236.410156,4263.077148
3,3738.974365,4484.615234,4048.205078,4285.128418,4426.153809,4441.538574,4368.205078,3964.615479,4316.410156,3738.974365,3963.076904,4507.692383,4248.205078,4276.922852
4,3748.205078,4491.794922,4056.410156,4290.256348,4425.128418,4452.307617,4374.871582,3981.538574,4321.025879,3738.974365,3968.718018,4517.94873,4250.769043,4284.102539


* First row is the header row. so it will not be applied in the simulation

3.3. Extract last 53k rows

In [41]:
data_last_53k = data.tail(53000).reset_index(drop=True)

print("data_last_53k shape:", data_last_53k.shape)

data_last_53k shape: (53000, 14)


In [42]:
data_last_53k.head()

Unnamed: 0,EEG.AF3,EEG.F7,EEG.F3,EEG.FC5,EEG.T7,EEG.P7,EEG.O1,EEG.O2,EEG.P8,EEG.T8,EEG.FC6,EEG.F4,EEG.F8,EEG.AF4
0,3616.923096,4264.615234,4838.461426,4277.94873,4428.205078,4324.615234,4328.205078,3961.025635,4273.333496,3738.461426,3920.512939,4517.436035,4322.563965,4209.230957
1,3626.666748,4267.692383,4833.846191,4279.487305,4433.333496,4330.256348,4340.512695,3967.692383,4274.358887,3739.487061,3929.230713,4532.307617,4333.333496,4221.538574
2,3638.461426,4279.487305,4839.487305,4283.589844,4433.846191,4314.871582,4322.563965,3961.025635,4285.128418,3740.0,3942.564209,4543.589844,4349.743652,4239.487305
3,3638.461426,4282.563965,4841.025879,4283.077148,4432.820313,4309.230957,4304.102539,3955.384521,4292.307617,3738.974365,3949.230713,4544.615234,4354.358887,4246.153809
4,3628.205078,4274.871582,4827.179688,4278.461426,4433.846191,4323.589844,4307.179688,3956.410156,4280.512695,3738.461426,3937.435791,4530.769043,4344.615234,4230.256348


3.4. Create csv file from filtered data

In [43]:
import os
import csv  # you already imported this above

# Ensure you've created `data_last_53k` from the previous step
# and that `path` points to your original CSV.

output_path = os.path.join(os.path.dirname(path), "amin-1_eeg_modified.csv")

data_last_53k.to_csv(
    output_path,
    index=False,
    encoding="utf-8-sig",   # keeps Excel happy with UTF-8 BOM
    quoting=csv.QUOTE_MINIMAL
    # line_terminator="\n", # optional: normalize newlines
)

print(f"✅ Saved: {output_path} | shape: {data_last_53k.shape}")


✅ Saved: C:\Self Learning\Research Papers\UOW Research Papers\eSport Players with EEG Data\EEG Data\Amin1\amin-1_eeg_modified.csv | shape: (53000, 14)


4. Create 2D array

4.1. Convert to an array

In [44]:
import numpy as np

# If you want raw strings exactly as in the CSV:
arr = data_last_53k.to_numpy(copy=True)
assert arr.shape == (53000, 14)

In [45]:
arr.shape

(53000, 14)

4.2. Transpose the array

In [46]:
# Transpose (returns a view when possible)
arr_T = arr.T
print(arr_T.shape)   # (14, 53000)

(14, 53000)


5. Segement to (14,200) shape

In [47]:
import numpy as np

seg_len = 200
channels, total_samples = arr_T.shape
n_segments = total_samples // seg_len  # 53000 // 200 = 265

# (Optional) trim if not perfectly divisible — here it is, but this keeps it robust
usable = n_segments * seg_len
arr_T_trim = arr_T[:, :usable]

# Reshape to (channels, segments, seg_len) then put segments first → (segments, channels, seg_len)
segments = arr_T_trim.reshape(channels, n_segments, seg_len).transpose(1, 0, 2)

print("segments.shape:", segments.shape)  # (265, 14, 200)


segments.shape: (265, 14, 200)


6. Add target variable

In [48]:
import numpy as np

y = np.zeros(265, dtype=np.int64)      # shape (265,)
# or as a column vector:
# y = np.zeros((265, 1), dtype=np.int64)


In [49]:
y.shape

(265,)

In [50]:
y[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

7. Save the arrays

7.1. Save the segments

In [51]:
import os
import numpy as np
import csv  # if you're also writing CSVs

# Assume: `path` points to your original CSV and `segments` is (265, 14, 200)
# (Optional) y is your labels vector (265,)

out_dir = os.path.dirname(path)

# ---- Save segments as a NumPy array (.npy) ----
adam_eeg_path = os.path.join(out_dir, "amin-1_eeg.npy")
np.save(adam_eeg_path, segments)
print(f"✅ Saved segments → {adam_eeg_path} | shape: {segments.shape}")

# ---- (Optional) Save labels next to it ----



✅ Saved segments → C:\Self Learning\Research Papers\UOW Research Papers\eSport Players with EEG Data\EEG Data\Amin1\amin-1_eeg.npy | shape: (265, 14, 200)


7.2. Save the target array

In [52]:
#y = np.ones(segments.shape[0], dtype=np.int64)
y_path = os.path.join(out_dir, "amin-1_eeg_labels.npy")
np.save(y_path, y)
print(f"✅ Saved labels   → {y_path} | shape: {y.shape}")

✅ Saved labels   → C:\Self Learning\Research Papers\UOW Research Papers\eSport Players with EEG Data\EEG Data\Amin1\amin-1_eeg_labels.npy | shape: (265,)
