In [51]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
import os
import torch
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from keras.optimizers import SGD, Adam

In [52]:
# Load the training data
train_data = pd.read_csv('../../2nd-Comp-Data/train.csv')
test_data = pd.read_csv('../../2nd-Comp-Data/test.csv')

used = []

# Extract features and target variable
X = train_data.drop('price_doc', axis=1)
y = train_data['price_doc']
X_test = test_data.drop(['row ID'], axis=1)

In [53]:
X = X.drop('sub_area', axis=1)
X_test = X_test.drop('sub_area', axis=1)
used.append('Removed sub_area')

In [54]:
X = pd.get_dummies(X)
X_test = pd.get_dummies(X_test) 
used.append('OneHot Encoding')

In [55]:
# # drop all columns in X_train with dtypes object
# for col in X.columns:
#     if X[col].dtype == 'object':
#         X.drop(col, axis=1, inplace=True)

# # drop all columns in X_test with dtypes object
# for col in X_test.columns:
#     if X_test[col].dtype == 'object':
#         X_test.drop(col, axis=1, inplace=True)

# used.append("Removed Object Dtypes")

In [56]:
X = X.astype('float32')
X_test = X_test.astype('float32')
used.append("Converted All Columns To float32")

In [57]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

In [58]:
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
used.append("StandardScaler")

<h1>With Keras<h1>

In [59]:
features_used = X_train.shape[1]

# optimizerUsing = SGD(learning_rate=0.01, momentum=0.9)

# optimizerUsing = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False)
optimizerUsing = Adam(lr=0.01, epsilon=1e-07)

# Build the neural network
os.environ["KERAS_BACKEND"] = "torch"
model = Sequential()
model.add(Dense(100, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.3))  # You can use either dropout or early stopping
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='linear'))  # Output layer with linear activation for regression

# Compile the model
model.compile(loss='mean_squared_error', optimizer=optimizerUsing)

# Define early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=1800, validation_data=(X_val, y_val), callbacks=[early_stopping])

used.append("Keras With Torch Backend")
# used.append("1 Hidden Layer: Hidden Layer 1 with 140 neurons relu activation, Output linear")
used.append("2 Hidden Layers: Hidden Layer 1 with 100 neurons relu activation, Hidden Layer 2 with 50 neurons relu activation, Dropout 0.3, Output linear")
used.append("Optimizer: Adam(lr=0.01, epsilon=1e-07)")
used.append("Loss Calculation: Mean Squared Error")
used.append("EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)")
used.append("batch_size=1800, epochs=100, early_stopping, dropout=0.3 (used in hidden layers only)")
# used.append("batch_size=1800, epochs=100, early_stopping")

# Make predictions
predictions = model.predict(X_test)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<h1>With Skilearn<h1>

In [60]:
# features_used = X_train.shape[1]

# # Build the neural network using MLPRegressor from scikit-learn
# model = MLPRegressor(hidden_layer_sizes=(140), activation='relu', solver='adam', max_iter=100,
#                      batch_size=1800, early_stopping=True, random_state=42)

# # Train the model
# model.fit(X_train, y_train)

# used.append("MLPRegressor")
# used.append("1 Hidden Layer: Hidden Layer 1 with 140 neurons relu activation, Output linear")
# # used.append("2 Hidden Layers: Hidden Layer 1 with 128 neurons relu activation, \nHidden Layer 2 with 64 neurons relu activation, Output linear")
# used.append("Adam Optimizer")
# # used.append("Loss Calculation: Mean Squared Error")
# used.append("EarlyStopping")
# used.append("batch_size=1800, epochs=100, early_stopping, dropout=0.3 (used in hidden layers only)")

# # Make predictions
# predictions = model.predict(X_test)

In [61]:
# Create a DataFrame with the results
submission_df = pd.DataFrame({'row ID': test_data['row ID'], 'price_doc': predictions.flatten()})

# Save the results to a CSV file
submission_df.to_csv('Day7.5.csv', index=False)

In [62]:
print("Features Used = " + str(features_used) + "\n")
for i in used:
    print(i)

Features Used = 287

Removed sub_area
OneHot Encoding
Converted All Columns To float32
StandardScaler
Keras With Torch Backend
2 Hidden Layers: Hidden Layer 1 with 100 neurons relu activation, Hidden Layer 2 with 50 neurons relu activation, Dropout 0.3, Output linear
Optimizer: Adam(lr=0.01, epsilon=1e-07)
Loss Calculation: Mean Squared Error
EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
batch_size=1800, epochs=100, early_stopping, dropout=0.3 (used in hidden layers only)
