In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
import xgboost as xgb
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from itertools import combinations

def apply_pca(X_train, X_test, n_components):
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    return X_train_pca, X_test_pca


def apply_lda(X_train, y_train, X_test, n_components):
    lda = LinearDiscriminantAnalysis(n_components=n_components)
    X_train_lda = lda.fit_transform(X_train, y_train)
    X_test_lda = lda.transform(X_test)
    return X_train_lda, X_test_lda


def read_csv_and_preprocess(csv_file_path, input_columns, output_column):
    
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(csv_file_path)
    df = df.dropna() 
    # Separate the input features and the output target
    X = df[input_columns]
    y = df[output_column]

    #y = y.replace(['Push1.csv', 'Push2.csv', 'Push3.csv','Push.csv'], 'Push')
    #y = y.replace(['Pull1.csv', 'Pull2.csv', 'Pull3.csv','Pull.csv'], 'Pull')
    #y = y.replace(['Push'], 0)
    #y = y.replace(['Pull'], 1)
    # Perform any preprocessing steps (e.g., dealing with missing values, encoding categorical features)
    #y = y.replace(['Flexion'], 0)
    #y = y.replace(['Extension'], 1)
    X = X.replace(["Flexion"],1)
    X = X.replace(["Extension"],0)
    
    # Create an instance of OneHotEncoder
    #encoder = OneHotEncoder()
    # Fit and transform the categorical column
    #encoded_data = encoder.fit_transform(df[['categorical_column']])
    # Convert the encoded data to a DataFrame
    #encoded_df = pd.DataFrame(encoded_data.toarray(), columns=encoder.get_feature_names_out(['categorical_column']))
    # Concatenate the encoded DataFrame with the original DataFrame
    #df_encoded = pd.concat([df, encoded_df], axis=1)
    # Drop the original categorical column if needed
    #df_encoded = df_encoded.drop(['Exercise type], axis=1)

    #y = y.replace(['60.csv'], 0)
    #y = y.replace(['90.csv'], 5)
    #y = y.replace(['120.csv'], 10)
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Perform feature scaling if necessary
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test, X, y


def train_and_evaluate_model(model, X_train, X_test, y_train, y_test):
    
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)

    return accuracy, classification_rep


csv_file_path = './Data_Non_Functional.csv'
input_columns = ['Flexion/Extension','Entropy_1','CoV_1','Intensity_1','Differential Intensity_1','Mean_RMS_1','Max_RMS_1','Mean_ARV_1','Max_ARV_1','Xcg_1','Ycg_1','Entropy_2','CoV_2','Intensity_2','Differential Intensity_2','Mean_RMS_2','Max_RMS_2','Mean_ARV_2','Max_ARV_2','Xcg_2','Ycg_2','Entropy_Ratio','CoV_Ratio','Mean_RMS_Ratio','Max_RMS_Ratio','Mean_ARV_Ratio','Max_ARV_Ratio','Intensity_Ratio','Differential_Instensity_Ratio']  
#input_columns = ['Mean_RMS_1','Max_RMS_1','Xcg_1','Ycg_1','Mean_RMS_2','Max_RMS_2','Xcg_2','Ycg_2']  # Replace with the names of your input columns

all_combinations = []

print("HEYYY!")

min_combination_length = 3
all_combinations = []
for r in range(min_combination_length, len(input_columns) + 1):
    all_combinations.extend([list(combo) for combo in combinations(input_columns, r)])

print("HEYYYYYYYY!")

#input_columns = ['Entropy_Ratio','CoV_Ratio','Mean_RMS_Ratio','Max_RMS_Ratio','Mean_ARV_Ratio','Max_ARV_Ratio','Intensity_Ratio','Differential_Instensity_Ratio']  # 72 decision tree
#input_columns = ['Mean_RMS_1','Max_RMS_1','Xcg_1','Ycg_1','Mean_RMS_2','Max_RMS_2','Xcg_2','Ycg_2']  # Replace with the names of your input columns
output_column = 'Exercise type'  # Replace with the name of your output column
#output_column = 'Flexion/Extension'  # Replace with the name of your output column
#output_column = 'Exercise type'  # Replace with the name of your output column

for input_comb in all_combinations:
    
    print("---------------------------\n\nFeatures : ",input_comb)

    X_train, X_test, y_train, y_test, X, y = read_csv_and_preprocess(csv_file_path, input_comb, output_column)

    # Apply PCA and LDA (you can adjust the number of components as needed)
    #n_pca_components = 20  # Number of components for PCA
    #n_lda_components = 1   # Number of components for LDA

    #X_train, X_test = apply_pca(X_train, X_test, n_pca_components)
    #le = LabelEncoder()
    #y_train = le.fit_transform(y_train)
    #X_train, X_test = apply_lda(X_train, y_train, X_test, n_lda_components)


    models = {
            'Logistic Regression': (LogisticRegression(), {
                'C': [0.1, 1, 10],
                'solver': ['liblinear', 'lbfgs']
            }),
            'Decision Tree': (DecisionTreeClassifier(), {
                'criterion': ['gini', 'entropy'],
                'max_depth': [None, 5, 10, 15, 20]
            }),
            'Random Forest': (RandomForestClassifier(), {
                'n_estimators': [50, 100, 150],
                'max_depth': [None, 5, 10, 15]
            }),
            'Support Vector Machine': (SVC(), {
                'C': [0.1, 1, 10],
                'kernel': ['linear', 'rbf'],
                'gamma': ['scale', 'auto']
            }),
            #'Naive Bayes': (GaussianNB(), {}),
            #'XGBoost': (xgb.XGBClassifier(), {
            #    'learning_rate': [0.01, 0.1, 0.2],
            #    'max_depth': [3, 5, 7, 9, 11],
            #    'n_estimators': [50, 100, 150]
            #}),
            'K-Nearest Neighbors': (KNeighborsClassifier(), {
                'n_neighbors': [1,5,10],
                'weights': ['uniform', 'distance']
            })
        }

    for model_name, (model, param_grid) in models.items():

        if param_grid:
            grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1)
            grid_search.fit(X_train, y_train)

            best_model = grid_search.best_estimator_
            accuracy, classification_rep = train_and_evaluate_model(best_model, X_train, X_test, y_train, y_test)

            print(f'Best {model_name} Model:')
            print(f'Best hyperparameters: {grid_search.best_params_}')
            print(f'Accuracy: {accuracy:.4f}')
            #print(f'Classification Report:\n{classification_rep}\n')
        else:
            accuracy, classification_rep = train_and_evaluate_model(model, X_train, X_test, y_train, y_test)
            print(f'{model_name} Model:')
            print(f'Accuracy: {accuracy:.4f}')
            #print(f'Classification Report:\n{classification_rep}\n')
            
            

HEYYY!
