# Imports

In [108]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.fft import fft
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


# Read Data

In [87]:
X_test = pd.read_csv("PEMS_test", sep=";", header=None)
X_train = pd.read_csv("PEMS_train", sep=";", header=None)
y_test = pd.read_csv("PEMS_testlabels", sep=";", header=None)
y_train = pd.read_csv("PEMS_trainlabels", sep=";", header=None)

In [88]:
y_train = y_train.values[0][0][1:-1].split(" ")
y_train = [float(i) for i in y_train]
y_train = pd.DataFrame(y_train, columns=["label"])
y_test = y_test.values[0][0][1:-1].split(" ")
y_test = [float(i) for i in y_test]
y_test = pd.DataFrame(y_test, columns=["label"])

In [89]:
def transposeMeasurments(df):
    # Iterate over all columns
    for column in range(963):
        # Split by " "
        lists = []
        # Iterate over all rows from one specific columns and store the values as lists in variable lists
        for i in range(len(df)):
            liste = df[column][i].split(" ")
            if liste[0].startswith("["):
                liste[0] = liste[0].replace("[", "")
            
            if liste[-1].endswith("]"):
                liste[-1] = liste[-1].replace("]", "")

            floats = [float(i) for i in liste]
            lists.append(floats)


        neue_listen = [[] for _ in range(len(lists[0]))]
    
        # Iteriere über die Sublisten
        for sublist in lists:
            for index, element in enumerate(sublist):
                neue_listen[index].append(element)
       

        new = pd.DataFrame(neue_listen)
        new = new.transpose()
        new.columns = [f"Sensor {column} - Messwert {index}" for index in range(1, len(neue_listen) + 1)]
        
        df = pd.concat([df, new], axis=1)

    # Drop old columns with the names 0 until 962
    df.drop(columns=[x for x in range(963)], inplace=True)
    return df

In [90]:
X_test = transposeMeasurments(X_test)
X_train = transposeMeasurments(X_train)

# PCA

In [91]:
# Create a pipeline with StandardScaler and PCA
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Step 1: Standard Scaling
    ('pca', PCA(n_components=0.95)),   # Step 2: PCA - keep 95% of the variance
])

# Fit the pipeline on the training data
pipeline.fit(X_train)

# Fit and transform the training data
X_train_pca = pipeline.transform(X_train)
X_test_pca = pipeline.transform(X_test)

# Convert NumPy array to DataFrame
X_train_pca = pd.DataFrame(X_train_pca, columns=[f'PC{i}' for i in range(1, X_train_pca.shape[1] + 1)])
X_test_pca = pd.DataFrame(X_test_pca, columns=[f'PC{i}' for i in range(1, X_train_pca.shape[1] + 1)])


# FFT

In [92]:
# Apply Scipy fft
X_train_fft = fft(X_train)
X_test_fft = fft(X_test)

# Predictions

In [106]:
# Random Forest Classifier
# Use ravel to convert y_train from a column vector to a 1d array
# Accuracy formula: (TP + TN) / (TP + TN + FP + FN)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_pca, y_train.values.ravel())
y_test_pred = rf.predict(X_test_pca)
print(f"Random Forest Classifier with PCA: {accuracy_score(y_test, y_test_pred)}")

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_fft.real, y_train.values.ravel())
y_test_pred = rf.predict(X_test_fft.real)
print(f"Random Forest Classifier with FFT: {accuracy_score(y_test, y_test_pred)}")

Random Forest Classifier with PCA: 0.8439306358381503
Random Forest Classifier with FFT: 0.7225433526011561
