In [1]:
import pandas as pd
import numpy as np

# Sample data generation
np.random.seed(42)  # For reproducibility

# Number of samples
num_samples = 200000  # Change this to any number between 10000 and 20000

# Generate data
data = {
    'Last_month_sales': np.random.randint(1000, 5000, size=num_samples),
    'Product_supervisor_review': np.random.choice(['positive', 'negative'], size=num_samples),
    'Product_supervisor_product_recommendation': np.random.choice(['more', 'less', 'same'], size=num_samples),
    'Product_seniority': np.random.choice(['new', 'old'], size=num_samples),
    'Direct_competitor_sales': np.random.randint(500, 4000, size=num_samples),
    'Product_market_position': np.random.uniform(0, 100, size=num_samples),  # percentage of market share
    'Product_competitor_seniority': np.random.choice(['new', 'old'], size=num_samples),
}

# Create DataFrame
df = pd.DataFrame(data)

# Calculate ratio of competitor sales to last month sales
df['Competitor_sales_ratio'] = df['Direct_competitor_sales'] / df['Last_month_sales']

# Product prediction logic
df['Product_prediction'] = 0

# Old products with positive reviews are predicted to perform better
df.loc[(df['Product_seniority'] == 'old') & (df['Product_supervisor_review'] == 'positive'), 'Product_prediction'] = \
    df['Last_month_sales'] + np.random.randint(500, 1000)

# New products with positive reviews are predicted based on their last month sales
df.loc[(df['Product_seniority'] == 'new') & (df['Product_supervisor_review'] == 'positive'), 'Product_prediction'] = \
    df['Last_month_sales'] + np.random.randint(100, 500)

# For negative reviews, you can add a logic (e.g., lower the prediction)
df.loc[df['Product_supervisor_review'] == 'negative', 'Product_prediction'] = \
    df['Last_month_sales'] - np.random.randint(100, 500)

# Display the first few rows of the dataset
print(df.head())


   Last_month_sales Product_supervisor_review  \
0              4174                  negative   
1              4507                  negative   
2              1860                  positive   
3              2294                  positive   
4              2130                  positive   

  Product_supervisor_product_recommendation Product_seniority  \
0                                      more               new   
1                                      same               new   
2                                      more               old   
3                                      more               new   
4                                      less               new   

   Direct_competitor_sales  Product_market_position  \
0                     2337                93.932467   
1                     1734                64.658595   
2                     2716                21.411452   
3                     2380                90.806186   
4                     3774             

In [32]:
df.shape

(200000, 9)

In [35]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Assuming your generated data (df) is already available

# 1. Handle Missing Values (if any) - precautionary step
df.fillna(0, inplace=True)
print(df.shape)
print("*")
# 2. Encoding Categorical Variables (One-hot encoding)
df_encoded = pd.get_dummies(df, columns=[
    'Product_supervisor_review', 
    'Product_supervisor_product_recommendation', 
    'Product_seniority', 
    'Product_competitor_seniority'], drop_first=True)
print(df_encoded.columns)
print('**')
# 3. Feature Scaling (MinMax Scaling for numerical columns)
num_columns = ['Last_month_sales', 'Direct_competitor_sales', 'Product_market_position', 'Competitor_sales_ratio']

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Apply the scaler to the selected columns
df_encoded[num_columns] = scaler.fit_transform(df_encoded[num_columns])
print(df_encoded.shape)
print('***')
# 4. Splitting Data into Train, Test, and Validation Sets
# Define the target variable (Product_prediction) and features
X = df_encoded.drop(columns=['Product_prediction'])  # Features
y = df_encoded['Product_prediction']  # Target variable
df_encoded = df_encoded.drop(columns=['Product_prediction'])
# First split into train and test (80% train, 20% test)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)

# Further split the temp set into validation and test (50% of 20% = 10% validation, 10% test)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# 5. Check the splits
print(f'Train set: {X_train.shape}, Validation set: {X_val.shape}, Test set: {X_test.shape}')
print(X.columns)

(200000, 9)
*
Index(['Last_month_sales', 'Direct_competitor_sales',
       'Product_market_position', 'Competitor_sales_ratio',
       'Product_prediction', 'Product_supervisor_review_positive',
       'Product_supervisor_product_recommendation_more',
       'Product_supervisor_product_recommendation_same',
       'Product_seniority_old', 'Product_competitor_seniority_old'],
      dtype='object')
**
(200000, 10)
***
Train set: (160000, 9), Validation set: (20000, 9), Test set: (20000, 9)
Index(['Last_month_sales', 'Direct_competitor_sales',
       'Product_market_position', 'Competitor_sales_ratio',
       'Product_supervisor_review_positive',
       'Product_supervisor_product_recommendation_more',
       'Product_supervisor_product_recommendation_same',
       'Product_seniority_old', 'Product_competitor_seniority_old'],
      dtype='object')


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor


# Define models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'XGBoost': XGBRegressor(random_state=42),
    'Support Vector Regressor': SVR(),
    'Neural Network (MLP)': MLPRegressor(random_state=42),
    'Ridge Regression': Ridge(random_state=42),
    'Lasso Regression': Lasso(random_state=42),
    'ElasticNet Regression': ElasticNet(random_state=42),
    'KNN': KNeighborsRegressor(),
    'CatBoost': CatBoostRegressor(verbose=0, random_state=42),
    'LightGBM': LGBMRegressor(random_state=42),
}

# Results list to store evaluation metrics
results = []

# Function to evaluate models
def evaluate_model(model, X_train, X_val, y_train, y_val):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    mae = mean_absolute_error(y_val, y_pred)
    mse = mean_squared_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    
    return mae, mse, r2

# Iterate through all models and evaluate them
for name, model in models.items():
    mae, mse, r2 = evaluate_model(model, X_train, X_val, y_train, y_val)
    results.append({
        'Model': name,
        'MAE': mae,
        'MSE': mse,
        'R² Score': r2
    })

# Display results
results_df = pd.DataFrame(results)
print(results_df)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014606 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1030
[LightGBM] [Info] Number of data points in the train set: 160000, number of used features: 9
[LightGBM] [Info] Start training from score 3168.329944
                       Model         MAE            MSE  R² Score
0          Linear Regression   66.000299    4356.666330  0.996944
1              Decision Tree    0.000150       0.000150  1.000000
2              Random Forest    0.003966       0.000216  1.000000
3          Gradient Boosting   17.361534     526.979218  0.999630
4                    XGBoost    4.228534      25.551703  0.999982
5   Support Vector Regressor   63.516701   10122.724675  0.992899
6       Neural Network (MLP)    0.003628       0.000016  1.000000
7           Ridge Regression   65.997601    4356.230732  0.996944
8           Lasso Regression   65.913722    4362.908886  0.99

In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler

# Scaling the features for deep learning models
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Reshaping for LSTM/GRU (which expects 3D input)
X_train_dl = np.expand_dims(X_train_scaled, axis=1)
X_val_dl = np.expand_dims(X_val_scaled, axis=1)

# Common callback for early stopping
early_stopping = EarlyStopping(patience=10, restore_best_weights=True)

# Function to build and evaluate deep learning models
def evaluate_dl_model(model, X_train, X_val, y_train, y_val, epochs=50, batch_size=32):
    model.compile(optimizer='adam', loss='mse')
    history = model.fit(X_train, y_train, 
                        validation_data=(X_val, y_val),
                        epochs=epochs, batch_size=batch_size, 
                        callbacks=[early_stopping], verbose=0)
    
    # Predict on validation set
    y_pred = model.predict(X_val)
    
    # Calculate metrics
    mae = mean_absolute_error(y_val, y_pred)
    mse = mean_squared_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    
    return mae, mse, r2

# 1. MLP Model
mlp_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

mlp_mae, mlp_mse, mlp_r2 = evaluate_dl_model(mlp_model, X_train_scaled, X_val_scaled, y_train, y_val)
results.append({
    'Model': 'MLP',
    'MAE': mlp_mae,
    'MSE': mlp_mse,
    'R² Score': mlp_r2
})

# 2. LSTM Model
lstm_model = Sequential([
    LSTM(64, return_sequences=False, input_shape=(1, X_train.shape[1])),
    Dropout(0.2),
    Dense(1)  # Output layer for regression
])

lstm_mae, lstm_mse, lstm_r2 = evaluate_dl_model(lstm_model, X_train_dl, X_val_dl, y_train, y_val)
results.append({
    'Model': 'LSTM',
    'MAE': lstm_mae,
    'MSE': lstm_mse,
    'R² Score': lstm_r2
})

# 3. GRU Model
gru_model = Sequential([
    GRU(64, return_sequences=False, input_shape=(1, X_train.shape[1])),
    Dropout(0.2),
    Dense(1)  # Output layer for regression
])

gru_mae, gru_mse, gru_r2 = evaluate_dl_model(gru_model, X_train_dl, X_val_dl, y_train, y_val)
results.append({
    'Model': 'GRU',
    'MAE': gru_mae,
    'MSE': gru_mse,
    'R² Score': gru_r2
})

# Display the results
results_df = pd.DataFrame(results)
print(results_df)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


  super().__init__(**kwargs)


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


  super().__init__(**kwargs)


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
                       Model          MAE           MSE  R² Score
0          Linear Regression    66.000299  4.356666e+03  0.996944
1              Decision Tree     0.000150  1.500000e-04  1.000000
2              Random Forest     0.003966  2.156400e-04  1.000000
3          Gradient Boosting    17.361534  5.269792e+02  0.999630
4                    XGBoost     4.228534  2.555170e+01  0.999982
5   Support Vector Regressor    63.516701  1.012272e+04  0.992899
6       Neural Network (MLP)     0.003628  1.613551e-05  1.000000
7           Ridge Regression    65.997601  4.356231e+03  0.996944
8           Lasso Regression    65.913722  4.362909e+03  0.996940
9      ElasticNet Regression   853.708030  9.879477e+05  0.307000
10                       KNN    39.010250  2.454211e+03  0.998278
11                  CatBoost     3.997031  2.182557e+01  0.999985
12                  LightGBM     4.947838  3.692831e+01  0.999974
1

In [12]:

# Function to evaluate models
def evaluate_model(model, X_train, X_val, y_train, y_val):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    
    mae = mean_absolute_error(y_val, y_pred)
    mse = mean_squared_error(y_val, y_pred)
    r2 = r2_score(y_val, y_pred)
    print("mae",mae)
    print("Mse",mse)
    print("r2_score",r2)
    return mae, mse, r2,model
mlp_model = MLPRegressor(random_state=42)
mlp_mae, mlp_mse, mlp_r2,mlp_model = evaluate_model(mlp_model, X_train_scaled, X_val_scaled, y_train, y_val)


mae 1.3923856108918364
Mse 3.278912841819848
r2_score 0.9999976999934949




In [13]:
from sklearn.model_selection import GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'hidden_layer_sizes': [(50, 50), (100,), (100, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive'],
    'max_iter': [500, 1000]
}

# Create the MLP model
mlp_model = MLPRegressor(random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=mlp_model, param_grid=param_grid, 
                           scoring='neg_mean_absolute_error', cv=3, verbose=2)

# Fit the model on the training data
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters
best_params = grid_search.best_params_

# Display the best parameters
print(f"Best parameters found: {best_params}")

# Use the best model for predictions
best_mlp_model = grid_search.best_estimator_
mlp_results = evaluate_model(best_mlp_model, X_train_scaled, X_val_scaled, y_train, y_val)


Fitting 3 folds for each of 48 candidates, totalling 144 fits
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 6.2min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 4.1min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 3.8min


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 4.4min




[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 3.8min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time=   6.4s
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=adam; total time= 6.3min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=adam; total time= 4.3min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=adam; total time= 3.8min


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time= 8.9min




[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time= 7.7min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=   6.4s
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time= 6.1min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time= 4.0min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time= 3.7min


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 4.3min




[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 3.8min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time=  38.3s
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 6.0min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.9min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.7min


  ret = a @ b
  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 8.7min




[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 7.4min
[CV] END activation=relu, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=  34.8s




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time= 6.6min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time= 6.2min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time= 6.7min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time= 2.7min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time= 2.8min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time= 3.0min
[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=adam; total time=11.2min
[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=adam; total time=10.2min
[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=adam; total time= 9.3min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=sgd; total time= 5.4min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=sgd; total time= 5.4min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=1000, solver=sgd; total time= 5.6min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=adam; total time= 6.7min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=adam; total time= 6.2min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=adam; total time= 6.7min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 2.7min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 2.7min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 2.8min
[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=11.2min
[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time=10.2min
[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 9.1min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b
  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 5.4min


  return ((y_true - y_pred) ** 2).mean() / 2
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 5.5min




[CV] END activation=relu, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 5.8min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time=11.0min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time=11.5min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time=10.9min


  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 5.2min


  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 5.5min


  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 5.3min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=10.8min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=11.1min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time=11.0min


  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=10.4min


  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=11.1min


  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=sgd; total time=10.4min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time=10.8min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time=11.2min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=adam; total time=11.0min


  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 5.3min


  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 5.6min


  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 5.2min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=10.7min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=11.2min
[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time=11.0min


  ret = a @ b
  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=10.5min


  ret = a @ b
  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=11.0min


  ret = a @ b


[CV] END activation=relu, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=10.4min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 3.3min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 3.5min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 3.5min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time=  31.8s
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time=  12.3s
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=500, solver=sgd; total time=  20.6s
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=constant, max_iter=1000, solver=adam; total time= 3.4min
[



[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 5.0min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 4.5min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.3min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.3min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.4min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 4.4min




[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=10.1min
[CV] END activation=tanh, hidden_layer_sizes=(50, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 4.5min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time= 3.3min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time= 3.5min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=adam; total time= 3.0min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time= 2.6min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time= 1.3min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=constant, max_iter=500, solver=sgd; total time= 2.1min
[CV] END activa



[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 4.1min




[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 4.0min




[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 4.4min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 4.4min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.5min
[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 3.1min




[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 8.1min




[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 8.1min




[CV] END activation=tanh, hidden_layer_sizes=(100,), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 8.0min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 4.1min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 4.2min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=adam; total time= 4.0min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 1.1min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 1.3min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=500, solver=sgd; total time= 1.1min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=constant, max_iter=1000, solver=adam; total time= 4.1m



[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 6.7min




[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 6.6min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=500, solver=sgd; total time= 4.4min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 4.1min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 4.2min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=adam; total time= 4.1min




[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=13.4min




[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time=13.2min
[CV] END activation=tanh, hidden_layer_sizes=(100, 50), learning_rate=adaptive, max_iter=1000, solver=sgd; total time= 4.6min


16 fits failed out of a total of 144.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
16 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\khali\anaconda3\envs\F_pi\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\khali\anaconda3\envs\F_pi\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\khali\anaconda3\envs\F_pi\Lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py", line 751, in fit
    return self._fit(X, y, incremental=False)
           ^^^^^^^^^^^^^^^^^^

Best parameters found: {'activation': 'tanh', 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'max_iter': 500, 'solver': 'adam'}
mae 0.2575953266595813
Mse 0.1169574201806161
r2_score 0.9999999179597506


In [14]:
mlp_results


(0.2575953266595813,
 0.1169574201806161,
 0.9999999179597506,
 MLPRegressor(activation='tanh', max_iter=500, random_state=42))

In [15]:
model =MLPRegressor(activation='tanh',max_iter=1000,random_state=42)
model.fit(X_train_scaled,y_train)

In [18]:
rslt = evaluate_model(model,X_train_scaled,X_val_scaled,y_train,y_val)

mae 0.2575953266595813
Mse 0.1169574201806161
r2_score 0.9999999179597506


In [21]:
import joblib 
joblib.dump(model, 'models_saved/mlp_regressor_model.pkl')

['models_saved/mlp_regressor_model.pkl']