In [1]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy import fft

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

def get_freq_bands_vec(sig):
    fs = 178 # sampling frequency

    fft_vals = np.abs(fft.rfft(sig))
    fft_freqs = fft.rfftfreq(178, 1/fs)
    
    idx_max = fft_freqs[fft_freqs <= 45].shape[0]
    return fft_vals[:, :idx_max]

def balance_dataset(X, label):
    # Combine X and label into a single 2D array
    data = np.column_stack((X, label))

    # Split the data into positives and negatives
    positives = data[data[:, -1] == 1]
    negatives = data[data[:, -1] == 0]

    # Sample negatives to match the number of positives
    neg_indicies = np.random.choice(negatives.shape[0], size=positives.shape[0], replace=False)
    balanced_negatives = negatives[neg_indicies]

    # Concatenate positives and balanced_negatives
    balanced_data = np.vstack((positives, balanced_negatives))

    # Shuffle the balanced_data
    np.random.default_rng().shuffle(balanced_data)
    
    # Split the balanced_data back into X and label
    Xs = balanced_data[:, :-1]
    Ys = balanced_data[:, -1]

    return Xs, Ys

# Prints out an a list in C# array format.
def print_cs_arr(arr, numcols=3):
    for i, track in enumerate(arr):
        print("{", end='')

        for i in range(len(track) - 1):
            if (i % numcols == 0):
                print("\n    ", end='')

            print(f"{track[i]}, ", end='')
            
        if ((len(track) - 1) % numcols == 0):
            print("\n    ", end='')

        print(track[-1])
        print("}")
        
def calc_accuracy(clf, X, y):
    score = clf.score(X, y)    
    return score * 100

In [2]:
df = pd.read_csv("assignment_data.csv")
X = df.iloc[:, 1:].values
y = np.array(df.y)

Xf = get_freq_bands_vec(X)
Xf_s, ys = balance_dataset(Xf, y)

Xf_train, Xf_test, yf_train, yf_test = train_test_split(Xf_s, ys, test_size = 0.2)
lda_f = LinearDiscriminantAnalysis()
lda_f.fit(Xf_train, yf_train)
calc_accuracy(lda_f, Xf_test, yf_test)

89.58333333333334

In [3]:
df_stream = pd.read_csv("stream_data.csv")
Xstream = df_stream.iloc[:, 1:].values
ystream = np.array(df_stream.y)
Xfstream = get_freq_bands_vec(Xstream)

calc_accuracy(lda_f, Xfstream, ystream)

100.0

In [4]:
lda_f.intercept_[0]

-4.123590025129676

In [5]:
print_cs_arr(lda_f.coef_)

{
    2.393854404613444e-05, 4.43734814333502e-05, 0.00010711998974713549, 
    5.2382267262760616e-05, 3.482292446690348e-05, 5.81644940027338e-05, 
    0.00014283148946524522, 0.00010661045121351692, 5.3886373069000136e-05, 
    3.9374311300536184e-05, 3.5177671655783e-05, -6.248530985487442e-06, 
    -2.2658918575145503e-06, 2.065200237607213e-05, 3.8282278835623364e-05, 
    -3.261125475821274e-06, -3.36659448376386e-05, -9.515207389792815e-05, 
    -6.903709829155665e-05, 3.17153414471976e-05, 4.5664818665058584e-05, 
    1.4159088251044835e-05, 5.8463685581459536e-05, -2.372720736444919e-05, 
    -7.440296071127977e-05, 7.007588744335804e-05, 0.0002168960473265773, 
    0.00022516825670470888, 5.274458347737229e-05, 0.0001242817129292018, 
    -2.771296464553915e-05, 9.314983609394977e-05, 8.519714157483721e-05, 
    0.0004682987239197689, 1.952438002875265e-05, 0.00014540003708971271, 
    0.0005254582627487196, -0.0001477797789196373, 0.00028540061575577514, 
    0.000490163389

In [6]:
print_cs_arr([Xstream[0]])

{
    22.97906494140625, 28.01885986328125, 28.01885986328125, 
    17.999267578125, -1.97991943359375, -28.01885986328125, 
    -44.9981689453125, -58.97760009765625, -52.97784423828125, 
    -35.99853515625, -16.01934814453125, 8.9996337890625, 
    31.01873779296875, 43.97821044921875, 46.01812744140625, 
    38.9984130859375, 35.99853515625, 37.01849365234375, 
    37.97845458984375, 49.01800537109375, 53.997802734375, 
    67.01727294921875, 68.9971923828125, 76.01690673828125, 
    76.97686767578125, 73.01702880859375, 61.97747802734375, 
    49.97796630859375, 34.97857666015625, 25.97894287109375, 
    28.97882080078125, 28.97882080078125, 31.01873779296875, 
    32.9986572265625, 38.9984130859375, 43.01824951171875, 
    50.9979248046875, 58.01763916015625, 62.9974365234375, 
    67.01727294921875, 65.997314453125, 61.97747802734375, 
    58.01763916015625, 55.01776123046875, 53.997802734375, 
    46.01812744140625, 31.97869873046875, 16.01934814453125, 
    -1.97991943359375, 

In [12]:
xstreamPos = df_stream.query('y == 1').iloc[:, 1:].values

get_freq_bands_vec([xstreamPos[0]])

array([[ 4497.11700439,  3097.28655343,  4438.9910916 ,  4236.42433312,
         7634.68118011,  6769.94985286,  9433.979992  , 55909.22562102,
         6604.02663919,  7781.54913966,  7431.80699409,  5820.95141134,
         5870.64528072,  7761.32222773, 20627.33807099,  4546.43552373,
         6213.10090398,  2673.36456082,  3134.13127487,  1617.38371315,
         1621.33017355,  4236.93007882,  1133.70548905,  2070.20328548,
          816.21913834,  1055.2975814 ,  1502.9800411 ,  2340.94063697,
          829.30274112,  1050.81747406,  1106.29614528,   956.13956641,
          469.16300245,   556.88281324,  1195.37318877,   533.33085344,
          728.66782731,  1233.16572113,   213.55954393,   616.28655349,
          156.0490473 ,   322.25511016,   340.65441484,   320.3850366 ,
          653.59077022,   346.6855354 ]])