In [1]:
#from Raw Data Direct 
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
from pywt import wavedec
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import joblib

def extract_stat_features(data):
    features = []
    # Ensure data is a numpy array for processing
    data = np.asarray(data)
    if data.ndim == 1:
        data = data.reshape(1, -1)  # Reshape if it's a single sample
    
    for signal in data.T:
        signal = np.ravel(signal)
        peaks, _ = find_peaks(signal)
        neg_peaks, _ = find_peaks(-signal)
        features.append([
            np.mean(signal), np.std(signal), np.min(signal), np.max(signal),
            np.sum(signal**2), len(peaks), len(neg_peaks)
        ])
    return np.array(features)

# Loading the data

train_v = pd.read_csv('/kaggle/input/eogdata/Train-V.csv', header=None)
train_h = pd.read_csv('/kaggle/input/eogdata/Train-H.csv', header=None)
test_v = pd.read_csv('/kaggle/input/eogdata/Test-V.csv', header=None)
test_h = pd.read_csv('/kaggle/input/eogdata/Test-H.csv', header=None)


print("Original Train-V Data:")
print(train_v.head())
print("Original Train-H Data:")
print(train_h.head())

# Apply feature extraction
X_train_features_v = extract_stat_features(train_v.iloc[:-1, :])
X_train_features_h = extract_stat_features(train_h.iloc[:-1, :])
X_test_features_v = extract_stat_features(test_v)
X_test_features_h = extract_stat_features(test_h)

print(" Train-V Features (From Raw Data Direct):")
print(X_train_features_v[:5])
print(" Train-H Features (From Raw Data Direct):")
print(X_train_features_h[:5])

# Combine and scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(np.concatenate((X_train_features_v, X_train_features_h), axis=0))
X_test = scaler.transform(np.concatenate((X_test_features_v, X_test_features_h), axis=0))

print("Scaled Train Features:")
print(X_train[:5])
print("Scaled Test Features:")
print(X_test[:5])

# Train classifiers
svm_clf = SVC(kernel='rbf', C=10)
rf_clf = RandomForestClassifier(n_estimators=450, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)

y_train = np.concatenate((train_v.iloc[-1, :].values, train_h.iloc[-1, :].values))
y_test = np.array([4] * 5 + [1] * 5 + [3] * 5 + [2] * 5 + [0] * 5 + [4] * 5 + [1] * 5 + [3] * 5 + [2] * 5 + [0] * 5)

svm_clf.fit(X_train, y_train)
y_pred_svm = svm_clf.predict(X_test)
print("SVM Predictions:")
print(y_pred_svm)

rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
print("Random Forest Predictions:")
print(y_pred_rf)

knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN Predictions:")
print(y_pred_knn)

print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))

# Save the models
joblib.dump(svm_clf, 'svm_classifier_Raw.joblib')
joblib.dump(rf_clf, 'random_forest_classifier_Raw.joblib')


Original Train-V Data:
    0    1    2      3      4      5    6    7      8    9   ...     65  \
0  133  128  127  127.1  133.1  127.2  125  139  127.3  136  ...  125.5   
1  133  133  124  126.0  123.0  130.0  123  141  133.0  133  ...  122.0   
2  126  128  128  129.0  131.0  129.0  127  134  126.0  142  ...  133.0   
3  127  133  125  128.0  128.0  130.0  128  142  127.0  135  ...  126.0   
4  131  131  127  126.0  128.0  125.0  128  147  128.0  137  ...  126.0   

      66     67     68   69     70     71     72     73   74  
0  127.6  137.1  137.2  115  137.3  127.7  130.5  127.8  129  
1  127.0  133.0  131.0  113  134.0  126.0  129.0  119.0  122  
2  127.0  134.0  132.0  116  132.0  129.0  135.0  125.0  130  
3  128.0  133.0  131.0  122  137.0  128.0  123.0  125.0  130  
4  129.0  129.0  131.0  114  132.0  125.0  131.0  126.0  122  

[5 rows x 75 columns]
Original Train-H Data:
    0    1    2    3    4    5    6    7    8    9   ...   65     66   67  \
0  142  132  129  130  14

['random_forest_classifier_Raw.joblib']

In [2]:
#from Wavelet 
import pandas as pd
import numpy as np
from scipy.signal import butter, filtfilt, find_peaks
from pywt import wavedec
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load data
train_v = pd.read_csv('/kaggle/input/eogdata/Train-V.csv', header=None)
train_h = pd.read_csv('/kaggle/input/eogdata/Train-H.csv', header=None)
test_v = pd.read_csv('/kaggle/input/eogdata/Test-V.csv', header=None)
test_h = pd.read_csv('/kaggle/input/eogdata/Test-H.csv', header=None)

# Print original data
print("Original Train-V Data:\n", train_v.head())
print("Original Train-H Data:\n", train_h.head())
print("Original Test-V Data:\n", test_v.head())
print("Original Test-H Data:\n", test_h.head())

def preprocess_signals(data, lowcut=1.0, highcut=20.0, fs=176, order=2):
    nyquist = 0.5 * fs
    #Normalizes cutoff frequencies
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    #remove phase distortion
    filtered = filtfilt(b, a, data, axis=0)
    #Normalizes the filtered signal to the range [0, 1]
    normalized = (filtered - np.min(filtered, axis=0)) / (np.max(filtered, axis=0) - np.min(filtered, axis=0))
    #Centers the normalized signal to have zero mean
    return normalized - np.mean(normalized, axis=0)

# Apply preprocessing
X_train_v = preprocess_signals(train_v.iloc[:-1, :])
X_train_h = preprocess_signals(train_h.iloc[:-1, :])
X_test_v = preprocess_signals(test_v)
X_test_h = preprocess_signals(test_h)

# Print preprocessed data
print("Preprocessed Train-V Data:\n", X_train_v[:5])
print("Preprocessed Train-H Data:\n", X_train_h[:5])
print("Preprocessed Test-V Data:\n", X_test_v[:5])
print("Preprocessed Test-H Data:\n", X_test_h[:5])

def extract_features(data):
    features = []
    for signal in data.T:
        coeffs = wavedec(signal, 'db4', level=2)
        first_coeff = coeffs[0]
        features.append([
            np.mean(first_coeff), np.std(first_coeff), np.min(first_coeff), np.max(first_coeff),
            np.sum(first_coeff**2), len(find_peaks(first_coeff)[0]), len(find_peaks(-first_coeff)[0])
        ])
    return np.array(features)

# Feature extraction
X_train_features_v = extract_features(X_train_v)
X_train_features_h = extract_features(X_train_h)
X_test_features_v = extract_features(X_test_v)
X_test_features_h = extract_features(X_test_h)

# Print extracted features
print("Extracted Features Train-V (Wavelet):\n", X_train_features_v[:5])
print("Extracted Features Train-H (Wavelet):\n", X_train_features_h[:5])
print("Extracted Features Test-V(Wavelet):\n", X_test_features_v[:5])
print("Extracted Features Test-H(Wavelet):\n", X_test_features_h[:5])

# Combine and scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(np.concatenate((X_train_features_v, X_train_features_h), axis=0))
X_test_scaled = scaler.transform(np.concatenate((X_test_features_v, X_test_features_h), axis=0))

# Print scaled features
print("Scaled Train Features:\n", X_train_scaled[:5])
print("Scaled Test Features:\n", X_test_scaled[:5])
# Labels
y_train = np.concatenate((train_v.iloc[-1, :].values, train_h.iloc[-1, :].values))
print("y tarin Labels", y_train)
y_test = np.array([4] * 5 + [1] * 5 + [3] * 5 + [2] * 5 + [0] * 5+[4] * 5 + [1] * 5 + [3] * 5 + [2] * 5 + [0] * 5)
print("y test labels", y_test)
# Classifier training
svm_clf = SVC(kernel='rbf', C=10)
svm_clf.fit(X_train_scaled, y_train)
y_pred_svm = svm_clf.predict(X_test_scaled)

rf_clf = RandomForestClassifier(n_estimators=450, random_state=42)
rf_clf.fit(X_train_scaled, y_train)
y_pred_rf = rf_clf.predict(X_test_scaled)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

# Print classifier predictions
print("Predicted by SVM:\n", y_pred_svm)
print("Predicted by RandomForest:\n", y_pred_rf)
print("Predicted by KNN:\n", y_pred_knn)

# Evaluate and print results
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))

# Save the models
joblib.dump(svm_clf, 'svm_classifier_w.joblib')
joblib.dump(rf_clf, 'random_forest_classifier_w.joblib')


Original Train-V Data:
     0    1    2      3      4      5    6    7      8    9   ...     65  \
0  133  128  127  127.1  133.1  127.2  125  139  127.3  136  ...  125.5   
1  133  133  124  126.0  123.0  130.0  123  141  133.0  133  ...  122.0   
2  126  128  128  129.0  131.0  129.0  127  134  126.0  142  ...  133.0   
3  127  133  125  128.0  128.0  130.0  128  142  127.0  135  ...  126.0   
4  131  131  127  126.0  128.0  125.0  128  147  128.0  137  ...  126.0   

      66     67     68   69     70     71     72     73   74  
0  127.6  137.1  137.2  115  137.3  127.7  130.5  127.8  129  
1  127.0  133.0  131.0  113  134.0  126.0  129.0  119.0  122  
2  127.0  134.0  132.0  116  132.0  129.0  135.0  125.0  130  
3  128.0  133.0  131.0  122  137.0  128.0  123.0  125.0  130  
4  129.0  129.0  131.0  114  132.0  125.0  131.0  126.0  122  

[5 rows x 75 columns]
Original Train-H Data:
     0    1    2    3    4    5    6    7    8    9   ...   65     66   67  \
0  142  132  129  130  

['random_forest_classifier_w.joblib']