In [52]:
# Implementation of https://arxiv.org/abs/2101.09460

import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import gymnasium as gym
from gymnasium import spaces
from collections import deque
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import make_scorer, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import random
import matplotlib.pyplot as plt

In [53]:
# Load the dataset
df_raw = pd.read_csv('./data/train_with_dummies_clean.csv', index_col=[0])

# Specify prefixes of columns to drop
prefixes_to_drop = ['Id', 'SaleType', 'SaleCondition', 'SalePrice']

# Drop specified columns before imputation
df_filtered = df_raw.drop([col for col in df_raw.columns if any(col.startswith(prefix) for prefix in prefixes_to_drop)], axis=1)

# Impute missing values in the filtered dataset
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df_filtered), columns=df_filtered.columns)

# Extract the SalePrice column from the original dataset for use as the target variable
sale_price_col = df_raw['SalePrice']
sale_price_mean = np.mean(sale_price_col)

# Scale the imputed dataset
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df_imputed)
df_scaled = pd.DataFrame(scaled_data, columns=df_imputed.columns)

# Define data_x and data_y for model input
data_x = df_scaled
data_y = sale_price_col.reset_index(drop=True)  # Reset index to ensure alignment

# Assuming 'df' is your DataFrame
column_to_index = {column: index for index, column in enumerate(data_x.columns)}
index_to_column = {index: column for column, index in column_to_index.items()}

In [54]:
data_x.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,PoolQC_Fa,PoolQC_Gd,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw,MiscFeature_Gar2,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC
0,0.073375,-0.229372,-0.207142,0.651479,-0.5172,1.050994,0.878668,0.511418,0.575425,-0.288653,...,-0.037037,-0.045376,-0.205214,-0.195977,-0.347118,-0.087129,-0.037037,-0.037037,-0.186352,-0.02618
1,-0.872563,0.451936,-0.091886,-0.071836,2.179628,0.156734,-0.429577,-0.57441,1.171992,-0.288653,...,-0.037037,-0.045376,-0.205214,-0.195977,-0.347118,-0.087129,-0.037037,-0.037037,-0.186352,-0.02618
2,0.073375,-0.09311,0.07348,0.651479,-0.5172,0.984752,0.830215,0.32306,0.092907,-0.288653,...,-0.037037,-0.045376,-0.205214,-0.195977,-0.347118,-0.087129,-0.037037,-0.037037,-0.186352,-0.02618
3,0.309859,-0.456474,-0.096897,0.651479,-0.5172,-1.863632,-0.720298,-0.57441,-0.499274,-0.288653,...,-0.037037,-0.045376,-0.205214,-0.195977,-0.347118,-0.087129,-0.037037,-0.037037,-0.186352,-0.02618
4,0.073375,0.633618,0.375148,1.374795,-0.5172,0.951632,0.733308,1.36457,0.463568,-0.288653,...,-0.037037,-0.045376,-0.205214,-0.195977,-0.347118,-0.087129,-0.037037,-0.037037,-0.186352,-0.02618


In [55]:
column_to_index

{'MSSubClass': 0,
 'LotFrontage': 1,
 'LotArea': 2,
 'OverallQual': 3,
 'OverallCond': 4,
 'YearBuilt': 5,
 'YearRemodAdd': 6,
 'MasVnrArea': 7,
 'BsmtFinSF1': 8,
 'BsmtFinSF2': 9,
 'BsmtUnfSF': 10,
 'TotalBsmtSF': 11,
 '1stFlrSF': 12,
 '2ndFlrSF': 13,
 'LowQualFinSF': 14,
 'GrLivArea': 15,
 'BsmtFullBath': 16,
 'BsmtHalfBath': 17,
 'FullBath': 18,
 'HalfBath': 19,
 'BedroomAbvGr': 20,
 'KitchenAbvGr': 21,
 'TotRmsAbvGrd': 22,
 'Fireplaces': 23,
 'GarageYrBlt': 24,
 'GarageCars': 25,
 'GarageArea': 26,
 'WoodDeckSF': 27,
 'OpenPorchSF': 28,
 'EnclosedPorch': 29,
 '3SsnPorch': 30,
 'ScreenPorch': 31,
 'PoolArea': 32,
 'MiscVal': 33,
 'MoSold': 34,
 'YrSold': 35,
 'MSZoning_C (all)': 36,
 'MSZoning_FV': 37,
 'MSZoning_RH': 38,
 'MSZoning_RL': 39,
 'Street_Grvl': 40,
 'Alley_Grvl': 41,
 'Alley_Pave': 42,
 'LotShape_IR1': 43,
 'LotShape_IR2': 44,
 'LotShape_IR3': 45,
 'LandContour_Bnk': 46,
 'LandContour_HLS': 47,
 'LandContour_Low': 48,
 'Utilities_AllPub': 49,
 'LotConfig_Corner': 50,
 '

In [56]:
def create_model(input_dim, output_dim):
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(input_dim,)),
        layers.Dense(256, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(output_dim)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(), loss='mse')
    
    target_model = models.clone_model(model)
    target_model.set_weights(model.get_weights())
    
    return model, target_model




class DQNAgent:
    def __init__(self, state_size, action_size, batch_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = batch_size
        self.model, self.target_model = create_model(state_size, action_size)


    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            # Perform a random action, but only consider valid actions
            valid_actions = [i for i, val in enumerate(state[0]) if val == 0]
            return np.random.choice(valid_actions)
        else:
            act_values = self.model.predict(state, verbose=0)
            # Mask out invalid actions by setting their Q-values to a large negative number
            act_values[0][state[0] == 1] = -1e9
            return np.argmax(act_values[0])  # returns action

        
    def replay(self):
        minibatch = random.sample(self.memory, self.batch_size)
        states = []
        next_states = []
        for state, action, reward, next_state, done in minibatch:
            # Reshape state and next_state without the extra singleton dimension
            state = np.squeeze(state)  # Remove the extra singleton dimension
            next_state = np.squeeze(next_state)  # Remove the extra singleton dimension
            states.append(state)
            next_states.append(next_state)
        states = np.array(states)
        next_states = np.array(next_states)
        current_q_values = self.model.predict(states, verbose=0)
        next_q_values = self.target_model.predict(next_states, verbose=0)

        for i, (state, action, reward, next_state, done) in enumerate(minibatch):
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(next_q_values[i]))
            current_q_values[i][action] = target

        # Set verbose=0 to hide progress bars
        self.model.fit(states, current_q_values, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay




    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [64]:
class FeatureSelectionEnv(gym.Env):
    metadata = {'render.modes': ['humanc']}

    def __init__(self, num_features, data_x, data_y, error_metric='rmse', debug=False, max_variables=None, ranker=True):
        super(FeatureSelectionEnv, self).__init__()
        self.num_features = num_features
        self.data_x = data_x
        self.data_y = data_y
        self.sale_price_mean = 180921.19
        self.action_space = spaces.Discrete(num_features + 1)  # Action: select a feature to toggle or END
        self.observation_space = spaces.MultiBinary(num_features + 1)  # State: binary vector of selected features
        self.state = None
        self.debug = debug
        self.error_metric = error_metric
        self.max_variables = max_variables
        
        if self.error_metric == 'rmse':
            self.scorer = make_scorer(mean_squared_error, squared=False, greater_is_better=False)
        else:
            raise ValueError("Unsupported error metric")
        self.num_cvs = 5
        self.previous_error = self.get_initial_error()

    def get_initial_error(self):
        predictions = np.full(self.data_y.shape, self.sale_price_mean)
        if self.error_metric == 'rmse':
            initial_error = mean_squared_error(self.data_y, predictions, squared=False)
        else:
            initial_error = 0
        print(initial_error)
        return initial_error

    def reset(self):
        self.state = np.zeros(self.num_features + 1, dtype=np.int8)  # Start with no features selected
        self.previous_error = self.get_initial_error()
        return self.state

    def step(self, action):
        done = False
        reward = 0
        
        # Check if the maximum number of variables has been reached
        if self.max_variables is not None and np.sum(self.state[:-1]) >= self.max_variables:
            done = True
            
            # Change to evalue_reward if not that good
            reward = self.evaluate_reward()  # Evaluate reward for reaching max variables
        else:
            all_features_selected = all(self.state[:-1])
            if action == self.num_features or all_features_selected:
                # Action is to end the sequence or all features selected
                done = True
                reward = self.evaluate_reward_with_bonus(action)
            else:
                if self.state[action] == 0:  # Ensure action is valid (feature not already included)
                    self.state[action] = 1
                    reward = self.evaluate_reward()  # Optionally, evaluate reward after each feature selection
                else:
                    raise ValueError("Shouldn't get to this because agent handles invalid action")
        
#         if reward == -1000:
#             done = True
        info = {}
        return self.state, reward, done, info

    def render(self, mode='human', close=False):
        pass
    
    
    def evaluate_reward_with_bonus(self, action):
        # Convert action from column name to column index using column_to_index mapping
        # action_index = column_to_index[action]
        
        # If done is first action
        

        selected_features_indices = [i for i, included in enumerate(self.state[:-1]) if included == 1]
        
        if len(selected_features_indices) == 0:
            return -10000
        
        X_selected = self.data_x.iloc[:, selected_features_indices]
        model = LinearRegression()
        scores = cross_val_score(model, X_selected, self.data_y, scoring=self.scorer, cv=self.num_cvs)
        current_error = -np.mean(scores)
        reward = self.previous_error - current_error
        self.previous_error = current_error

        # Define the bonus condition based on the action's column name
        bonus_column_name = 'YearRemodAdd'  # Replace with the actual column name for bonus
        bonus_reward = 10000  # Define the bonus reward amount

        # Apply the bonus reward if the action is on the bonus column
        if action == bonus_column_name:
            reward += bonus_reward

        if self.debug:
            print(f"Action: {action}, Reward: {reward}")
        return reward


    def evaluate_reward(self):
        selected_features_indices = [i for i, included in enumerate(self.state[:-1]) if included == 1]
        X_selected = self.data_x.iloc[:, selected_features_indices]
        model = LinearRegression()
        scores = cross_val_score(model, X_selected, self.data_y, scoring=self.scorer, cv=self.num_cvs)
        current_error = -np.mean(scores)
        reward = self.previous_error - current_error
        self.previous_error = current_error
        
        
#         if current_error > 500000:
#             reward = -1000
        if self.debug:
            print(reward)
        return reward
    
    def evaluate_final_reward(self):
        selected_features_indices = [i for i, included in enumerate(self.state[:-1]) if included == 1]
        X_selected = self.data_x.iloc[:, selected_features_indices]
        model = LinearRegression()
        scores = cross_val_score(model, X_selected, self.data_y, scoring=self.scorer, cv=self.num_cvs)
        current_error = -np.mean(scores)
        reward = self.get_initial_error() - current_error
        self.previous_error = current_error
        if current_error > 500000:
            reward = -1000
        if self.debug:
            print(reward)
        return reward * 4

In [65]:
env = FeatureSelectionEnv(num_features=237, data_x=data_x, data_y=data_y, error_metric='rmse', max_variables=75)
agent = DQNAgent(env.observation_space.shape[0], env.action_space.n, 32)

79415.29188606772


In [None]:
NUM_EPISODES = 1000  # Number of episodes to run
batch_size = 32
total_rewards_list = []  # List to store total rewards for each episode

for e in range(NUM_EPISODES):
    state = env.reset()
    state = np.reshape(state, [1, env.observation_space.shape[0]])
    total_reward = 0  # Initialize total reward for the episode

    for time in range(500):  # Assuming a max timestep per episode
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward  # Accumulate reward
        next_state = np.reshape(next_state, [1, env.observation_space.shape[0]])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print(f"Episode: {e+1}, Total reward: {total_reward}, Exploration Rate: {agent.epsilon}")  # Print total reward
            total_rewards_list.append(total_reward)  # Append total reward to the list
            break
    
    if e > 10:
        agent.replay()

# Plotting total rewards
import matplotlib.pyplot as plt

plt.plot(total_rewards_list)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Reward per Episode')
plt.grid(True)
plt.show()


79415.29188606772
Episode: 1, Total reward: -2.97078165775032e+16, Exploration Rate: 1.0
79415.29188606772
Episode: 2, Total reward: -2.8363401034256797e+17, Exploration Rate: 1.0
79415.29188606772
Episode: 3, Total reward: -957909172001329.6, Exploration Rate: 1.0
79415.29188606772
Episode: 4, Total reward: -2.875780191601489e+17, Exploration Rate: 1.0
79415.29188606772
Episode: 5, Total reward: -8.934389201160922e+16, Exploration Rate: 1.0
79415.29188606772
Episode: 6, Total reward: -8792376516623334.0, Exploration Rate: 1.0
79415.29188606772
Episode: 7, Total reward: -1.2110034404734981e+17, Exploration Rate: 1.0
79415.29188606772
Episode: 8, Total reward: -1.4873888314847402e+17, Exploration Rate: 1.0
79415.29188606772
Episode: 9, Total reward: -4.546592137128651e+16, Exploration Rate: 1.0
79415.29188606772
Episode: 10, Total reward: 38797.973092485176, Exploration Rate: 1.0
79415.29188606772
Episode: 11, Total reward: 29817.473293203635, Exploration Rate: 1.0
79415.29188606772
Epi

Episode: 81, Total reward: -5.736860579557066e+16, Exploration Rate: 0.7076077347272662
79415.29188606772
Episode: 82, Total reward: -4.74531549011745e+16, Exploration Rate: 0.7040696960536299
79415.29188606772
Episode: 83, Total reward: -1.5607540937328435e+17, Exploration Rate: 0.7005493475733617
79415.29188606772
Episode: 84, Total reward: -2.88916975511379e+17, Exploration Rate: 0.697046600835495
79415.29188606772
Episode: 85, Total reward: -1.4732962170327754e+17, Exploration Rate: 0.6935613678313175
79415.29188606772
Episode: 86, Total reward: 35708.15120824413, Exploration Rate: 0.6900935609921609
79415.29188606772
Episode: 87, Total reward: -9300947414430372.0, Exploration Rate: 0.6866430931872001
79415.29188606772
Episode: 88, Total reward: -1.0933520539953202e+17, Exploration Rate: 0.6832098777212641
79415.29188606772
Episode: 89, Total reward: -2.5717666506165498e+17, Exploration Rate: 0.6797938283326578
79415.29188606772
Episode: 90, Total reward: -3.2505639423273696e+16, E

79415.29188606772
Episode: 159, Total reward: -7.91523160808957e+16, Exploration Rate: 0.47862223409330756
79415.29188606772
Episode: 160, Total reward: -1.0437015914188267e+17, Exploration Rate: 0.47622912292284103
79415.29188606772
Episode: 161, Total reward: -1.2957711921030787e+17, Exploration Rate: 0.4738479773082268
79415.29188606772
Episode: 162, Total reward: 39737.17691296288, Exploration Rate: 0.47147873742168567
79415.29188606772
Episode: 163, Total reward: 27253.0712999541, Exploration Rate: 0.46912134373457726
79415.29188606772
Episode: 164, Total reward: 30815.7361261653, Exploration Rate: 0.46677573701590436
79415.29188606772
Episode: 165, Total reward: -1.303780062108593e+17, Exploration Rate: 0.46444185833082485
79415.29188606772
Episode: 166, Total reward: -6.229784014631933e+16, Exploration Rate: 0.46211964903917074
79415.29188606772
Episode: 167, Total reward: -5.38973915238908e+16, Exploration Rate: 0.4598090507939749
79415.29188606772
Episode: 168, Total reward: -

Episode: 237, Total reward: -1.0476935253925613e+17, Exploration Rate: 0.3237376186352221
79415.29188606772
Episode: 238, Total reward: 19341.143656821318, Exploration Rate: 0.322118930542046
79415.29188606772
Episode: 239, Total reward: 42820.3301429265, Exploration Rate: 0.32050833588933575
79415.29188606772
Episode: 240, Total reward: 41403.277501067256, Exploration Rate: 0.31890579420988907
79415.29188606772
Episode: 241, Total reward: -1.3708523134066022e+17, Exploration Rate: 0.3173112652388396
79415.29188606772
Episode: 242, Total reward: -2.175581971509633e+16, Exploration Rate: 0.3157247089126454
79415.29188606772
Episode: 243, Total reward: -6.786192146927875e+16, Exploration Rate: 0.3141460853680822
79415.29188606772
Episode: 244, Total reward: 41317.22761480819, Exploration Rate: 0.3125753549412418
79415.29188606772
Episode: 245, Total reward: 44375.36312983198, Exploration Rate: 0.31101247816653554
79415.29188606772
Episode: 246, Total reward: 38672.159332977535, Explorati

In [None]:
import matplotlib.pyplot as plt

# Filter out rewards that are 0 or less
filtered_rewards = [reward for reward in total_rewards_list if reward]

plt.plot(filtered_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Reward per Episode (Positive Rewards Only)')
plt.grid(True)
plt.show()

In [50]:
for e in range(1):
    state = env.reset()
    state = np.reshape(state, [1, env.observation_space.shape[0]])
    total_reward = 0  # Initialize total reward for the episode
    agent.epsilon = 0 # Set exploration rate to 0
    for time in range(500):  # Assuming a max timestep per episode
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        total_reward += reward  # Accumulate reward
        next_state = np.reshape(next_state, [1, env.observation_space.shape[0]])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print(f"Episode: {e+1}, Total reward: {total_reward}")  # Print total reward
            total_rewards_list.append(total_reward)  # Append total reward to the list
            break


79415.29188606772
Episode: 1, Total reward: 45227.233410004046


In [51]:
for i, included in enumerate(env.state):
    if included == 1:
        print("Selected column:", data_x.columns[i])

Selected column: MSSubClass
Selected column: OverallQual
Selected column: MasVnrArea
Selected column: TotalBsmtSF
Selected column: GrLivArea
Selected column: HalfBath
Selected column: Fireplaces
Selected column: GarageYrBlt
Selected column: GarageArea
Selected column: MSZoning_RL
Selected column: LandContour_Low
Selected column: Neighborhood_NoRidge
Selected column: Exterior1st_CemntBd
Selected column: Exterior2nd_CmentBd
Selected column: ExterQual_Ex
Selected column: BsmtQual_Ex
Selected column: BsmtQual_Gd
Selected column: BsmtExposure_No
Selected column: BsmtFinType1_GLQ
Selected column: FireplaceQu_TA


In [15]:
# Base Old dataset


# Selected column: MSSubClass
# Selected column: LotArea
# Selected column: OverallQual
# Selected column: OverallCond
# Selected column: YearBuilt
# Selected column: TotalBsmtSF
# Selected column: GrLivArea
# Selected column: BedroomAbvGr
# Selected column: TotRmsAbvGrd
# Selected column: Fireplaces
# Selected column: GarageCars
# Selected column: ScreenPorch
# Selected column: Alley_Grvl
# Selected column: LandContour_Low
# Selected column: LotConfig_CulDSac
# Selected column: LotConfig_FR2
# Selected column: Neighborhood_BrkSide
# Selected column: Neighborhood_Crawfor
# Selected column: Neighborhood_Mitchel
# Selected column: Neighborhood_NAmes
# Selected column: Neighborhood_NoRidge
# Selected column: Neighborhood_NridgHt
# Selected column: Neighborhood_Somerst
# Selected column: Neighborhood_StoneBr
# Selected column: Condition1_Norm
# Selected column: BldgType_1Fam
# Selected column: HouseStyle_1Story
# Selected column: RoofStyle_Mansard
# Selected column: RoofMatl_ClyTile
# Selected column: RoofMatl_WdShngl
# Selected column: Exterior1st_BrkComm
# Selected column: Exterior2nd_AsbShng
# Selected column: Exterior2nd_CBlock
# Selected column: Exterior2nd_HdBoard
# Selected column: Exterior2nd_MetalSd
# Selected column: ExterCond_Ex
# Selected column: BsmtQual_Ex
# Selected column: BsmtExposure_Gd
# Selected column: BsmtExposure_No
# Selected column: BsmtFinType1_ALQ
# Selected column: BsmtFinType1_GLQ
# Selected column: BsmtFinType1_Unf
# Selected column: Heating_GasW
# Selected column: Electrical_FuseP
# Selected column: Electrical_SBrkr
# Selected column: KitchenQual_Ex
# Selected column: Functional_Sev
# Selected column: GarageFinish_RFn
# Selected column: GarageFinish_Unf
# Selected column: GarageCond_Po
# Selected column: PavedDrive_Y
# Selected column: Fence_GdPrv
# Selected column: MiscFeature_Shed
# Selected column: MiscFeature_TenC

# Num Vars 15

# Selected column: OverallQual
# Selected column: MasVnrArea
# Selected column: TotalBsmtSF
# Selected column: GrLivArea
# Selected column: GarageCars
# Selected column: Neighborhood_NWAmes
# Selected column: Neighborhood_NoRidge
# Selected column: Neighborhood_NridgHt
# Selected column: Exterior1st_CBlock
# Selected column: ExterQual_TA
# Selected column: Foundation_PConc
# Selected column: BsmtFinType1_GLQ
# Selected column: KitchenQual_Ex
# Selected column: KitchenQual_TA
# Selected column: GarageType_BuiltIn

# Possible
# Halfbalth, Fullbath

# Definitely
# Year remodeled - Use binary before 1990, and after 1990
# Screen porch
# Overall Condition

In [None]:
# New dataset(No Multi-collinearity)

# Selected column: MSSubClass
# Selected column: OverallQual
# Selected column: MasVnrArea
# Selected column: TotalBsmtSF
# Selected column: GrLivArea
# Selected column: HalfBath
# Selected column: Fireplaces
# Selected column: GarageYrBlt
# Selected column: GarageArea
# Selected column: MSZoning_RL
# Selected column: LandContour_Low
# Selected column: Neighborhood_NoRidge
# Selected column: Exterior1st_CemntBd
# Selected column: Exterior2nd_CmentBd
# Selected column: ExterQual_Ex
# Selected column: BsmtQual_Ex
# Selected column: BsmtQual_Gd
# Selected column: BsmtExposure_No
# Selected column: BsmtFinType1_GLQ
# Selected column: FireplaceQu_TA