In [5]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from scipy import fft

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

def get_freq_bands_vec(sig):
    fs = 178 # sampling frequency

    fft_vals = np.abs(fft.rfft(sig))
    fft_freqs = fft.rfftfreq(178, 1/fs)
    
    idx_max = fft_freqs[fft_freqs <= 45].shape[0]
    return fft_vals[:, :idx_max]

def balance_dataset(X, label):
    # Combine X and label into a single 2D array
    data = np.column_stack((X, label))

    # Split the data into positives and negatives
    positives = data[data[:, -1] == 1]
    negatives = data[data[:, -1] == 0]

    # Sample negatives to match the number of positives
    neg_indicies = np.random.choice(negatives.shape[0], size=positives.shape[0], replace=False)
    balanced_negatives = negatives[neg_indicies]

    # Concatenate positives and balanced_negatives
    balanced_data = np.vstack((positives, balanced_negatives))

    # Shuffle the balanced_data
    np.random.default_rng().shuffle(balanced_data)
    
    # Split the balanced_data back into X and label
    Xs = balanced_data[:, :-1]
    Ys = balanced_data[:, -1]

    return Xs, Ys

# Prints out an a list in C# array format.
def print_cs_arr(arr, numcols=3):
    for i, track in enumerate(arr):
        print("{", end='')

        for i in range(len(track) - 1):
            if (i % numcols == 0):
                print("\n    ", end='')

            print(f"{track[i]}, ", end='')
            
        if ((len(track) - 1) % numcols == 0):
            print("\n    ", end='')

        print(track[-1])
        print("}")
        
def calc_accuracy(clf, X, y):
    score = clf.score(X, y)    
    return score * 100

In [12]:
df = pd.read_csv("assignment_data.csv")
X = df.iloc[:, 1:].values
y = np.array(df.y)

Xf = get_freq_bands_vec(X)
Xf_s, ys = balance_dataset(Xf, y)

Xf_train, Xf_test, yf_train, yf_test = train_test_split(Xf_s, ys, test_size = 0.2)
lda_f = LinearDiscriminantAnalysis()
lda_f.fit(Xf_train, yf_train)
calc_accuracy(lda_f, Xf_test, yf_test)

87.28070175438597

In [13]:
df_stream = pd.read_csv("stream_data.csv")
Xstream = df_stream.iloc[:, 1:].values
ystream = np.array(df_stream.y)
Xfstream = get_freq_bands_vec(Xstream)

calc_accuracy(lda_f, Xfstream, ystream)

100.0

In [15]:
lda_f.intercept_[0]

-4.258939315569634

In [11]:
print_cs_arr(lda_f.coef_)

{
    3.500104585074871e-05, 5.85520200478203e-05, 9.989393392021291e-05, 
    5.393148601654599e-05, 4.4684645186172575e-05, 5.592833711620393e-05, 
    0.00013452801834527866, 0.00011221343218411046, 4.8398395358631006e-05, 
    4.549087792368332e-05, 4.751571170482208e-05, 6.222446336459684e-06, 
    -1.4663895100583316e-05, 5.08692337625175e-05, 2.2529825405109293e-05, 
    -1.0426513735463466e-05, -4.207478260957604e-05, -8.264605921030252e-05, 
    -7.321927250114614e-05, 9.843581115749142e-06, 2.1271624442541146e-05, 
    1.7877840875616958e-05, 5.2307221539283215e-05, -3.500943918055206e-05, 
    -8.940674357586027e-05, 8.881871297638265e-05, 0.00020127312580325407, 
    0.00024156854200927622, 3.305797904671547e-05, 0.00017963132856232012, 
    -3.555549239796646e-05, 0.00014622604169083276, 4.802200142956232e-06, 
    0.0006106409217875772, 0.00013261180381890774, 0.00024858970598001074, 
    0.0006477404650415447, 4.316975585302066e-05, 0.00025965849484846804, 
    0.0003992

In [17]:
print_cs_arr([Xstream[0]])

{
    17.999267578125, 19.01922607421875, 38.9984130859375, 
    40.01837158203125, 28.01885986328125, 2.9998779296875, 
    -13.01947021484375, -16.97930908203125, -8.9996337890625, 
    1.01995849609375, 4.97979736328125, 1.01995849609375, 
    -4.97979736328125, -32.9986572265625, -32.9986572265625, 
    -31.01873779296875, -29.998779296875, -17.999267578125, 
    -16.97930908203125, -8.9996337890625, 13.01947021484375, 
    2.9998779296875, -10.97955322265625, -46.97808837890625, 
    -58.01763916015625, -34.01861572265625, 13.01947021484375, 
    37.01849365234375, 55.01776123046875, 58.01763916015625, 
    49.01800537109375, 59.99755859375, 79.01678466796875, 
    77.996826171875, 58.01763916015625, 35.99853515625, 
    2.9998779296875, -5.999755859375, -4.01983642578125, 
    -2.9998779296875, 4.01983642578125, -20.9991455078125, 
    -22.01910400390625, -16.01934814453125, -10.01959228515625, 
    4.97979736328125, 10.01959228515625, 0.0, 
    -14.9993896484375, -31.97869873046

In [26]:
xstreamPos = df_stream.query('y == 1').iloc[:, 1:].values

get_freq_bands_vec([xstreamPos[0]])

array([[ 4497.11700439,  3097.28655343,  4438.9910916 ,  4236.42433312,
         7634.68118011,  6769.94985286,  9433.979992  , 55909.22562102,
         6604.02663919,  7781.54913966,  7431.80699409,  5820.95141134,
         5870.64528072,  7761.32222773, 20627.33807099,  4546.43552373,
         6213.10090398,  2673.36456082,  3134.13127487,  1617.38371315,
         1621.33017355,  4236.93007882,  1133.70548905,  2070.20328548,
          816.21913834,  1055.2975814 ,  1502.9800411 ,  2340.94063697,
          829.30274112,  1050.81747406,  1106.29614528,   956.13956641,
          469.16300245,   556.88281324,  1195.37318877,   533.33085344,
          728.66782731,  1233.16572113,   213.55954393,   616.28655349,
          156.0490473 ,   322.25511016,   340.65441484,   320.3850366 ,
          653.59077022,   346.6855354 ]])