In [6]:
import itertools
import pandas as pd
import numpy as np
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

# Assuming 'features' is the complete list of features in your dataset
all_features = ['PASSING CMP', 'PASSING ATT', 'PASSING PCT', 'PASSING YDS', 'PASSING Y/A', 'PASSING TD', 'PASSING INT',
                'PASSING SACKS', 'RUSHING ATT', 'RUSHING YDS', 'RUSHING TD', 'RECEIVING REC', 'RECEIVING TGT', 'RECEIVING YDS',
                'RECEIVING Y/R', 'RECEIVING TD', 'RECEIVING LG', 'RECEIVING 20+', 'MISC FL', 'WEEK', 'AVG_FPTS', 'MAX_FPTS',
                'MIN_FPTS', 'VAR_FPTS', 'MISC FPTS']

# Define positions
positions = ["qb", "rb", "wr", "te"]

# Dictionary to store the optimal features for each position
optimal_features_dict = {}

# Iterate over positions
for pos in positions:
    data = pd.read_csv("datasets/weekly_scoring.csv")

    # Filter data for the current position
    data_pos = data[data['POS'] == pos]

    # Filter out rows with zero 'MISC FPTS'
    data_pos = data_pos[data_pos['MISC FPTS'] != 0]

    # Initialize variables to store best features and performance
    best_features = None
    best_performance = float('inf')  # You may use a different metric depending on your goal

    # Generate all combinations of features
    for r in range(1, len(all_features) + 1):
        feature_combinations = list(itertools.combinations(all_features, r))

        # Iterate over feature combinations
        for features_to_predict in feature_combinations:
            # Convert the combination to a list
            features_to_predict = list(features_to_predict)

            # Update columns_to_predict based on the current feature combination
            columns_to_predict = features_to_predict + ['MISC FPTS']

            # Sort the data by the date column
            date_column = "CONTINUOUS_DATE"
            data_pos[date_column] = pd.to_datetime(data_pos[date_column])
            data_pos = data_pos.sort_values(by=date_column)

            # Extract the relevant columns for training
            training_data = data_pos[columns_to_predict].values

            # Impute missing values for training_data
            imputer = SimpleImputer(strategy='mean')  # You can change the strategy as needed
            training_data = imputer.fit_transform(training_data)

            # Normalize the data using Min-Max scaling
            scaler = MinMaxScaler(feature_range=(0, 1))
            training_data_scaled = scaler.fit_transform(training_data)

            # Define a function to create LSTM datasets
            def create_lstm_dataset(dataset, look_back=1):
                X, Y = [], []
                for i in range(len(dataset) - look_back):
                    X.append(dataset[i:(i + look_back)])
                    Y.append(dataset[i + look_back])
                return np.array(X), np.array(Y)

            # Set the number of time steps to look back
            look_back = 8  # You can adjust this value based on the characteristics of your data

            # Create the LSTM dataset
            X, Y = create_lstm_dataset(training_data_scaled, look_back)

            # Split the data into training and testing sets
            train_size = int(len(X) * 0.8)
            X_train, X_test = X[:train_size], X[train_size:]
            Y_train, Y_test = Y[:train_size], Y[train_size:]

            # Build the feature selection model
            feature_selection_model = Sequential()
            feature_selection_model.add(Dense(units=10, input_dim=len(columns_to_predict), activation='relu'))
            feature_selection_model.add(LSTM(units=50, input_shape=(X_train.shape[1], X_train.shape[2])))
            feature_selection_model.add(Dense(units=1))  # Assuming you are predicting a single value (MISC FPTS)
            feature_selection_model.compile(optimizer='adam', loss='mse')

            # Train the feature selection model
            feature_selection_model.fit(X_train, Y_train, epochs=100, batch_size=32, validation_data=(X_test, Y_test), verbose=0)

            # Make predictions on the test set
            selected_features = feature_selection_model.predict(X_test)

            # Evaluate the model
            mse = mean_squared_error(Y_test, selected_features)
            print(f'Mean Squared Error {pos}: {mse}')

            # Check if the current combination is better than the previous best
            if mse < best_performance:
                best_performance = mse
                best_features = features_to_predict

    # Store the optimal features for the current position in the dictionary
    optimal_features_dict[pos] = best_features

# Print the optimal features for each position
for pos, features in optimal_features_dict.items():
    print(f"Optimal features for {pos}: {features}")


InvalidParameterError: The 'estimator' parameter of SequentialFeatureSelector must be an object implementing 'fit'. Got <class 'keras.src.layers.rnn.lstm.LSTM'> instead.