In [None]:
import os
import csv
import yaml
import wandb
import pickle
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.layers import Dense, GRU, Dropout, Conv2D, MaxPooling2D, Flatten
from keras.models import Sequential
from src import read_nz_file, read_jg_file, update_meta_data, split_df, aggregate_files, add_moving_window
from sklearn.model_selection import train_test_split
from wandb.keras import WandbCallback

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn import metrics 
from sklearn.metrics import confusion_matrix
# import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
def read_preprocessing(folder, file_type: str = 'parquet'):
    if file_type == 'parquet':
        X_train = pd.read_parquet(f'tmp/{folder}/X_train.parquet')
        X_test = pd.read_parquet(f'tmp/{folder}/X_test.parquet')
        y_train = pd.read_parquet(f'tmp/{folder}/y_train.parquet')['y']
        y_test = pd.read_parquet(f'tmp/{folder}/y_test.parquet')['y']
    elif file_type == 'pickle':
        with open(f'./tmp/{folder}/X_train.pickle', 'rb') as f: X_train = pickle.load(f)
        with open(f'./tmp/{folder}/X_test.pickle', 'rb') as f: X_test = pickle.load(f)
        with open(f'./tmp/{folder}/y_train.pickle', 'rb') as f: y_train = pickle.load(f)
        with open(f'./tmp/{folder}/y_test.pickle', 'rb') as f: y_test = pickle.load(f)

    with open(rf'./tmp/{folder}/metadata.yaml') as file:
        settings = yaml.full_load(file)

    return X_train, X_test, y_train, y_test, settings

In [None]:
X_train, X_test, y_train, y_test, settings = read_preprocessing('basic_20hz_20sec')

In [None]:
my_solver = 'newton-cg'

wandb.login()
run = wandb.init(
    project="CDL1",
    entity="cdl1",
    name="logistic regression",
    config={
        "architecture": "logistic regression",
        "moving_window_size": settings['MOVING_WINDOW_SIZE'],
        "hz": settings['HZ'],
        "step_size": settings['STEP_SIZE'],
        "test_proportion": settings['TEST_PROPORTION'],
        "aggregation": settings['AGGREGATION'],
        "preprocessing": settings['PREPROCESSING'],
        "features": settings['FEATURES'],
        "solver": my_solver
    }
)

labels = list(set(list(y_train)))

logisticRegr = LogisticRegression(random_state=0, multi_class='multinomial', penalty='none', solver='newton-cg')
logisticRegr.fit(X_train, y_train)

y_pred = logisticRegr.predict(X_test)
y_pred_train = logisticRegr.predict(X_train)
y_proba = logisticRegr.predict_proba(X_test)


wandb.sklearn.plot_classifier(
    logisticRegr, X_train, X_test, y_train, y_test, y_pred, y_proba, labels,
    model_name='Logistic Regression', feature_names=None
)

val_acc = metrics.accuracy_score(y_pred=y_pred, y_true=y_test)
acc = metrics.accuracy_score(y_pred=y_pred_train, y_true=y_train)

wandb.log({
    'accuracy': acc,
    'val_accuracy': val_acc
})

run.finish()

# Visualize single plot
wandb.sklearn.plot_confusion_matrix(y_test, y_pred, labels)

wandb.sklearn.plot_feature_importances(
    logisticRegr, [
        'gyroscope_X(rad/s)_mean', 'gyroscope_X(rad/s)_std',
        'gyroscope_Y(rad/s)_mean', 'gyroscope_Y(rad/s)_std',
        'gyroscope_Z(rad/s)_mean', 'gyroscope_Z(rad/s)_std',
        'magnetometer_X(microT)_mean', 'magnetometer_X(microT)_std',
        'magnetometer_Y(microT)_mean', 'magnetometer_Y(microT)_std',
        'magnetometer_Z(microT)_mean', 'magnetometer_Z(microT)_std',
        'time_since_start(ms)_mean', 'time_since_start(ms)_std',
        'gravity_X(G)_mean', 'gravity_X(G)_std', 'gravity_Y(G)_mean',
        'gravity_Y(G)_std', 'gravity_Z(G)_mean', 'gravity_Z(G)_std',
        'accelerometer_X(G)_mean', 'accelerometer_X(G)_std',
        'accelerometer_Y(G)_mean', 'accelerometer_Y(G)_std',
        'accelerometer_Z(G)_mean', 'accelerometer_Z(G)_std',
        'orientation_X(rad)_mean', 'orientation_X(rad)_std',
        'orientation_Y(rad)_mean', 'orientation_Y(rad)_std',
        'orientation_Z(rad)_mean', 'orientation_Z(rad)_std'
    ]
)

wandb.sklearn.plot_summary_metrics(logisticRegr, X_train, X_test, y_train, y_test)

wandb.sklearn.plot_learning_curve(logisticRegr, X_train, y_train)