In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from lazypredict.Supervised import LazyRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.metrics import make_scorer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
knn = KNeighborsRegressor()

In [2]:
df = pd.read_csv("data/cleaned_train.csv")
df.head()

Unnamed: 0,id,Place Code,Promotion Name,Store Kind,Store Sales,Store Cost,Is Recyclable?,Store Area,Grocery Area,Frozen Area,...,Department,Bar For Salad,Florist,Coffee Bar,Ready Food,Video Store,Gross Weight,Net Weight,Package Weight,Min. Person Yearly Income
0,mc_ID_0,H11go_ZA,Dimes Off,Deluxe,8760000.0,4292400.0,yes,2842.23,2037.64,481.98,...,Household,1,1,1,1,1,28.2,26.6,1.6,10000.0
1,mc_ID_1,S04ne_WA,Budget Bargains,Supermarket,6360000.0,1971600.0,no,2814.95,2049.72,457.36,...,Snack Foods,0,0,0,0,0,16.57,14.97,1.6,50000.0
2,mc_ID_2,L05es_CA,Shelf Emptiers,Supermarket,10860000.0,4452600.0,yes,2192.32,1322.21,523.32,...,Periodicals,0,1,0,0,0,28.64,27.18,1.45,30000.0
3,mc_ID_4,M10da_YU,Sale Winners,Deluxe,11560000.0,4970800.0,no,2862.3,1872.19,593.93,...,Produce,1,1,1,1,1,12.62,9.71,2.91,50000.0
4,mc_ID_5,S03le_WA,Weekend Discount,Supermarket,5220000.0,1618200.0,yes,1970.17,1236.07,440.92,...,Household,0,0,1,0,0,15.41,13.95,1.45,30000.0


In [3]:
target = "Cost"
X = df.drop(columns=target)
y = df[target]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
ord_categorical_columns = ['Place Code', 'Promotion Name', 'Store Kind', 'Is Recyclable?',
                            'Children', 'Degree', 'Work', 'Oreder Brand',
                            'Product', 'Department']

hot_categorical_columns = ['Marriage', 'Gender']

all_categorical_columns = X.select_dtypes("object").columns

numeric_columns = X.select_dtypes("float").columns

In [6]:
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OrdinalEncoder(), ord_categorical_columns),
        ('ohe', OneHotEncoder(), hot_categorical_columns),
        ('num', StandardScaler(), numeric_columns)
    ]
)

In [7]:
model = make_pipeline(
    preprocessor,
    DecisionTreeRegressor(max_depth=20, random_state=42)
)
model.fit(X_train, y_train)

In [8]:
# Check if the model fitted well or not
predictions = model.predict(X_train)

mse = mean_squared_error(y_train, predictions)
rmse = np.sqrt(mse)
rmse

16.24279568376655

In [9]:
predictions = model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
rmse

66.13992962598071

In [10]:
rfmodel = make_pipeline(
    preprocessor,
    RandomForestRegressor(n_estimators=100, random_state=42)
)
rfmodel.fit(X_train, y_train)

In [11]:
# Check if the model fitted well or not
predictions = rfmodel.predict(X_train)

mse = mean_squared_error(y_train, predictions)
rmse = np.sqrt(mse)
rmse

19.280206350134836

In [12]:
predictions = rfmodel.predict(X_test)

mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
rmse

53.03544924733378

In [13]:
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred)**2))

rmse_scorer = make_scorer(rmse, greater_is_better=False)

scores = cross_val_score(rfmodel, X_train, y_train, cv=5, scoring=rmse_scorer)

positive_scores = -scores

print("Cross-validation RMSE scores:", positive_scores)

Cross-validation RMSE scores: [53.65738053 56.10354203 52.34148074 52.18038972 50.51044931]


In [14]:
preprocessor.fit(X_train)

X_train_transformed = preprocessor.transform(X_train)
X_test_transformed = preprocessor.transform(X_test)

## Try Lazypredict

In [15]:
reg = LazyRegressor(verbose=1,ignore_warnings=False, custom_metric=None)
models,predictions = reg.fit(X_train_transformed, X_test_transformed, y_train, y_test)

  2%|▏         | 1/42 [00:00<00:30,  1.35it/s]

{'Model': 'AdaBoostRegressor', 'R-Squared': 0.1229879692040099, 'Adjusted R-Squared': 0.11924272209740783, 'RMSE': 147.04110042057997, 'Time taken': 0.7375996112823486}


  5%|▍         | 2/42 [00:05<01:57,  2.93s/it]

{'Model': 'BaggingRegressor', 'R-Squared': 0.8722005560563311, 'Adjusted R-Squared': 0.8716547933063937, 'RMSE': 56.130699031326074, 'Time taken': 4.461700439453125}


  7%|▋         | 3/42 [00:06<01:17,  1.99s/it]

{'Model': 'BayesianRidge', 'R-Squared': 0.016696374049479545, 'Adjusted R-Squared': 0.012497212657520018, 'RMSE': 155.69683667700198, 'Time taken': 0.8770828247070312}


 14%|█▍        | 6/42 [00:06<00:25,  1.39it/s]

{'Model': 'DecisionTreeRegressor', 'R-Squared': 0.8071444327947982, 'Adjusted R-Squared': 0.8063208503013952, 'RMSE': 68.95282418809465, 'Time taken': 0.6283266544342041}
{'Model': 'DummyRegressor', 'R-Squared': -3.504148029964682e-06, 'Adjusted R-Squared': -0.004273981745815281, 'RMSE': 157.01340419741697, 'Time taken': 0.04151344299316406}
{'Model': 'ElasticNet', 'R-Squared': 0.01438935405967523, 'Adjusted R-Squared': 0.010180340625054574, 'RMSE': 155.87937707768427, 'Time taken': 0.08991837501525879}


 19%|█▉        | 8/42 [00:07<00:16,  2.03it/s]

{'Model': 'ElasticNetCV', 'R-Squared': 0.016740751681517674, 'Adjusted R-Squared': 0.012541779802577535, 'RMSE': 155.69332324803412, 'Time taken': 0.3743155002593994}
{'Model': 'ExtraTreeRegressor', 'R-Squared': 0.7436033515424032, 'Adjusted R-Squared': 0.7425084192358227, 'RMSE': 79.50457871545507, 'Time taken': 0.15826964378356934}


 21%|██▏       | 9/42 [00:20<02:14,  4.08s/it]

{'Model': 'ExtraTreesRegressor', 'R-Squared': 0.8952187312382067, 'Adjusted R-Squared': 0.8947712667452739, 'RMSE': 50.8250477268684, 'Time taken': 12.974992275238037}


 24%|██▍       | 10/42 [00:20<01:35,  3.00s/it]

{'Model': 'GammaRegressor', 'R-Squared': 0.012413960599017226, 'Adjusted R-Squared': 0.008196511320436528, 'RMSE': 156.03550818807867, 'Time taken': 0.3683173656463623}


 26%|██▌       | 11/42 [04:21<37:34, 72.71s/it]

{'Model': 'GaussianProcessRegressor', 'R-Squared': -5.9800688849087935, 'Adjusted R-Squared': -6.009877008260718, 'RMSE': 414.82586074184235, 'Time taken': 240.5233428478241}


 29%|██▊       | 12/42 [04:31<27:07, 54.24s/it]

{'Model': 'GradientBoostingRegressor', 'R-Squared': 0.4747478194542283, 'Adjusted R-Squared': 0.4725047496440684, 'RMSE': 113.79414071212821, 'Time taken': 10.068176031112671}


 31%|███       | 13/42 [04:32<18:34, 38.42s/it]

{'Model': 'HistGradientBoostingRegressor', 'R-Squared': 0.8243723247407336, 'Adjusted R-Squared': 0.8236223133161389, 'RMSE': 65.8009926167201, 'Time taken': 0.9436259269714355}


 33%|███▎      | 14/42 [04:32<12:38, 27.09s/it]

{'Model': 'HuberRegressor', 'R-Squared': 0.014051531776564707, 'Adjusted R-Squared': 0.009841075684507317, 'RMSE': 155.906088952056, 'Time taken': 0.348583459854126}


 36%|███▌      | 15/42 [04:33<08:38, 19.20s/it]

{'Model': 'KNeighborsRegressor', 'R-Squared': -0.03713842973321135, 'Adjusted R-Squared': -0.041567490643104055, 'RMSE': 159.9021606324316, 'Time taken': 0.667849063873291}


 38%|███▊      | 16/42 [05:55<16:25, 37.90s/it]

{'Model': 'KernelRidge', 'R-Squared': -10.950200556348053, 'Adjusted R-Squared': -11.001233441286194, 'RMSE': 542.7796612976816, 'Time taken': 81.74884557723999}


 40%|████      | 17/42 [05:55<11:06, 26.64s/it]

{'Model': 'Lars', 'R-Squared': 0.016506461702418318, 'Adjusted R-Squared': 0.012306489296877055, 'RMSE': 155.7118713638147, 'Time taken': 0.27907228469848633}


 45%|████▌     | 19/42 [05:55<05:03, 13.19s/it]

{'Model': 'LarsCV', 'R-Squared': 0.01704923533993974, 'Adjusted R-Squared': 0.012851580828935893, 'RMSE': 155.66889804495918, 'Time taken': 0.3270723819732666}
{'Model': 'Lasso', 'R-Squared': 0.016594826256287054, 'Adjusted R-Squared': 0.012395231208271129, 'RMSE': 155.70487603614566, 'Time taken': 0.15937352180480957}


 48%|████▊     | 20/42 [05:56<03:25,  9.35s/it]

{'Model': 'LassoCV', 'R-Squared': 0.017012684028474556, 'Adjusted R-Squared': 0.012814873426460927, 'RMSE': 155.6717923147885, 'Time taken': 0.36914515495300293}
{'Model': 'LassoLars', 'R-Squared': 0.016594823042923257, 'Adjusted R-Squared': 0.012395227981184864, 'RMSE': 155.7048762905354, 'Time taken': 0.05033278465270996}


 52%|█████▏    | 22/42 [05:56<01:41,  5.10s/it]

{'Model': 'LassoLarsCV', 'R-Squared': 0.017018619847127048, 'Adjusted R-Squared': 0.01282083459380512, 'RMSE': 155.671322298081, 'Time taken': 0.21400809288024902}


 60%|█████▉    | 25/42 [05:57<00:39,  2.33s/it]

{'Model': 'LassoLarsIC', 'R-Squared': 0.01714137829930784, 'Adjusted R-Squared': 0.012944117281368905, 'RMSE': 155.6616015812756, 'Time taken': 0.2951622009277344}
{'Model': 'LinearRegression', 'R-Squared': 0.016506461702418096, 'Adjusted R-Squared': 0.012306489296876832, 'RMSE': 155.7118713638147, 'Time taken': 0.06548881530761719}
{'Model': 'LinearSVR', 'R-Squared': 0.010171962685455394, 'Adjusted R-Squared': 0.005944939038560526, 'RMSE': 156.2125221139118, 'Time taken': 0.13020992279052734}


 62%|██████▏   | 26/42 [06:17<01:42,  6.39s/it]

{'Model': 'MLPRegressor', 'R-Squared': 0.08731012415470962, 'Adjusted R-Squared': 0.08341251614398237, 'RMSE': 150.00218500043573, 'Time taken': 20.107502222061157}


 67%|██████▋   | 28/42 [07:05<02:53, 12.36s/it]

{'Model': 'NuSVR', 'R-Squared': 0.02547180780149738, 'Adjusted R-Squared': 0.021310121571468166, 'RMSE': 155.00052614784155, 'Time taken': 48.05647850036621}
{'Model': 'OrthogonalMatchingPursuit', 'R-Squared': 0.014820330442103402, 'Adjusted R-Squared': 0.010613157476019874, 'RMSE': 155.84529278898992, 'Time taken': 0.16691970825195312}


 71%|███████▏  | 30/42 [07:05<01:18,  6.57s/it]

{'Model': 'OrthogonalMatchingPursuitCV', 'R-Squared': 0.016798998235654672, 'Adjusted R-Squared': 0.012600275096447455, 'RMSE': 155.68871167997472, 'Time taken': 0.12366819381713867}
{'Model': 'PassiveAggressiveRegressor', 'R-Squared': -0.06439319981599412, 'Adjusted R-Squared': -0.06893865120310871, 'RMSE': 161.98955573181004, 'Time taken': 0.16379332542419434}
{'Model': 'PoissonRegressor', 'R-Squared': 0.016442369775476195, 'Adjusted R-Squared': 0.012242123667755789, 'RMSE': 155.71694496669517, 'Time taken': 0.06905078887939453}


 76%|███████▌  | 32/42 [08:15<03:11, 19.10s/it]

QuantileRegressor model failed to execute
Unable to allocate 7.61 GiB for an array with shape (22580, 45210) and data type float64


 79%|███████▊  | 33/42 [08:16<02:13, 14.80s/it]

{'Model': 'RANSACRegressor', 'R-Squared': -1.3665839682302154, 'Adjusted R-Squared': -1.376690376635469, 'RMSE': 241.5443747451886, 'Time taken': 1.138451099395752}


 81%|████████  | 34/42 [08:58<02:54, 21.86s/it]

{'Model': 'RandomForestRegressor', 'R-Squared': 0.8858349484275655, 'Adjusted R-Squared': 0.8853474108407793, 'RMSE': 53.05209727756488, 'Time taken': 42.41703820228577}


 86%|████████▌ | 36/42 [08:59<01:09, 11.66s/it]

{'Model': 'Ridge', 'R-Squared': 0.0165068418332045, 'Adjusted R-Squared': 0.012306871050997481, 'RMSE': 155.71184127165878, 'Time taken': 0.23682856559753418}
{'Model': 'RidgeCV', 'R-Squared': 0.016510236875666662, 'Adjusted R-Squared': 0.012310280591861678, 'RMSE': 155.71157251088255, 'Time taken': 0.10702371597290039}


 88%|████████▊ | 37/42 [08:59<00:42,  8.44s/it]

{'Model': 'SGDRegressor', 'R-Squared': 0.013561621981421412, 'Adjusted R-Squared': 0.009349073747890069, 'RMSE': 155.9448183769587, 'Time taken': 0.2843892574310303}


 95%|█████████▌| 40/42 [09:52<00:23, 11.74s/it]

{'Model': 'SVR', 'R-Squared': 0.0307917171581934, 'Adjusted R-Squared': 0.026652749402285347, 'RMSE': 154.57687643348802, 'Time taken': 53.21755623817444}
{'Model': 'TransformedTargetRegressor', 'R-Squared': 0.016506461702418096, 'Adjusted R-Squared': 0.012306489296876832, 'RMSE': 155.7118713638147, 'Time taken': 0.044991493225097656}
{'Model': 'TweedieRegressor', 'R-Squared': 0.012382814440567103, 'Adjusted R-Squared': 0.008165232153480528, 'RMSE': 156.03796866653374, 'Time taken': 0.1330564022064209}


 98%|█████████▊| 41/42 [09:55<00:09,  9.41s/it]

{'Model': 'XGBRegressor', 'R-Squared': 0.8527332424729093, 'Adjusted R-Squared': 0.8521043452877402, 'RMSE': 60.25433963077798, 'Time taken': 2.1507630348205566}
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2080
[LightGBM] [Info] Number of data points in the train set: 22580, number of used features: 24
[LightGBM] [Info] Start training from score 519.389129


100%|██████████| 42/42 [09:55<00:00, 14.18s/it]

{'Model': 'LGBMRegressor', 'R-Squared': 0.8287743411010376, 'Adjusted R-Squared': 0.8280431283228215, 'RMSE': 64.97112573940814, 'Time taken': 0.6167998313903809}





In [16]:
print(models)

                               Adjusted R-Squared  R-Squared   RMSE  \
Model                                                                 
ExtraTreesRegressor                          0.89       0.90  50.83   
RandomForestRegressor                        0.89       0.89  53.05   
BaggingRegressor                             0.87       0.87  56.13   
XGBRegressor                                 0.85       0.85  60.25   
LGBMRegressor                                0.83       0.83  64.97   
HistGradientBoostingRegressor                0.82       0.82  65.80   
DecisionTreeRegressor                        0.81       0.81  68.95   
ExtraTreeRegressor                           0.74       0.74  79.50   
GradientBoostingRegressor                    0.47       0.47 113.79   
AdaBoostRegressor                            0.12       0.12 147.04   
MLPRegressor                                 0.08       0.09 150.00   
SVR                                          0.03       0.03 154.58   
NuSVR 

In [17]:
from sklearn.ensemble import ExtraTreesRegressor

In [18]:
exmodel = make_pipeline(
    preprocessor,
    ExtraTreesRegressor(random_state=42)
)
exmodel.fit(X_train, y_train)

In [19]:
# Check if the model fitted well or not
predictions = exmodel.predict(X_train)

mse = mean_squared_error(y_train, predictions)
rmse = np.sqrt(mse)
rmse

8.697849117431592e-13

In [20]:
predictions = exmodel.predict(X_test)

mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
rmse

50.8250477268684

In [75]:
from sklearn.model_selection import KFold, cross_val_score


In [76]:
kf=KFold(n_splits=5,shuffle=True,random_state=42)

In [85]:
scores=cross_val_score(exmodel,X,y,cv=kf,scoring='neg_mean_squared_error')

In [78]:
print(scores)

[-2550.529925   -2551.33726009 -2446.87376554 -2239.40367173
 -2379.45890615]


In [96]:
X_train, X_test, y_train, y_test = train_test_split(df.drop('Cost', axis=1), df['Cost'], test_size=0.2, random_state=30)

# Define the hyperparameters you want to tune
param_grid = {'n_estimators': [50, 100, 150], 'max_depth': [5, 10, 15], 'min_samples_split': [2, 5, 10]}

etr = ExtraTreesRegressor()

grid_search = GridSearchCV(etr, param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

# Train your model with the best hyperparameters
etr = ExtraTreesRegressor(**best_params)
etr.fit(X_train, y_train)

# Get RMSE from training data
y_train_pred = etr.predict(X_train)
rmse = mean_squared_error(y_train, y_train_pred, squared=False)
print(rmse)

ValueError: 
All the 135 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "E:\ana\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "E:\ana\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\ensemble\_forest.py", line 348, in fit
    X, y = self._validate_data(
           ^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\base.py", line 621, in _validate_data
    X, y = check_X_y(X, y, **check_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\utils\validation.py", line 1147, in check_X_y
    X = check_array(
        ^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\utils\validation.py", line 917, in check_array
    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\utils\_array_api.py", line 380, in _asarray_with_order
    array = numpy.asarray(array, order=order, dtype=dtype)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\pandas\core\generic.py", line 2070, in __array__
    return np.asarray(self._values, dtype=dtype)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: could not convert string to float: 'no'

--------------------------------------------------------------------------------
108 fits failed with the following error:
Traceback (most recent call last):
  File "E:\ana\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "E:\ana\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\ensemble\_forest.py", line 348, in fit
    X, y = self._validate_data(
           ^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\base.py", line 621, in _validate_data
    X, y = check_X_y(X, y, **check_params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\utils\validation.py", line 1147, in check_X_y
    X = check_array(
        ^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\utils\validation.py", line 917, in check_array
    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\sklearn\utils\_array_api.py", line 380, in _asarray_with_order
    array = numpy.asarray(array, order=order, dtype=dtype)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\ana\Lib\site-packages\pandas\core\generic.py", line 2070, in __array__
    return np.asarray(self._values, dtype=dtype)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: could not convert string to float: 'yes'


## Extract submissions

In [21]:
samples = pd.read_csv('data/sample_submission.csv')
samples.head()

Unnamed: 0,ID,Cost
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [22]:
test = pd.read_csv("data/cleaned_test.csv")
test.head()

Unnamed: 0,id,Place Code,Promotion Name,Store Kind,Store Sales,Store Cost,Is Recyclable?,Store Area,Grocery Area,Frozen Area,...,Department,Bar For Salad,Florist,Coffee Bar,Ready Food,Video Store,Gross Weight,Net Weight,Package Weight,Min. Person Yearly Income
0,0,B06ls_CA,Fantastic Discounts,Gourmet,11760000.0,4704000.0,yes,2201.06,1424.85,465.54,...,Snacks,1,1,1,1,1,31.83,28.78,3.05,50000.0
1,1,S01em_OR,Bag Stuffer,Deluxe,2160000.0,669600.0,no,2577.16,1735.17,505.07,...,Frozen Foods,1,1,1,1,1,29.94,27.04,2.91,70000.0
2,2,H11go_ZA,Pick Your Savings,Deluxe,1830000.0,823500.0,no,2837.58,2038.11,481.98,...,Dairy,1,1,1,1,1,29.22,26.31,2.91,130000.0
3,3,M10da_YU,Price Winners,Deluxe,8820000.0,4410000.0,no,2859.04,1871.16,593.93,...,Frozen Foods,1,1,1,1,1,28.05,25.0,3.05,10000.0
4,4,L05es_CA,Dollar Days,Supermarket,4320000.0,1987200.0,no,2193.97,1320.15,523.32,...,Beverages,0,1,0,0,0,23.55,20.64,2.91,30000.0


In [23]:
test.isna().sum()

id                           0
Place Code                   0
Promotion Name               0
Store Kind                   0
Store Sales                  0
Store Cost                   0
Is Recyclable?               0
Store Area                   0
Grocery Area                 0
Frozen Area                  0
Meat Area                    0
Marriage                     0
Gender                       0
Children                     0
Degree                       0
Work                         0
Oreder Brand                 0
Product                      0
Department                   0
Bar For Salad                0
Florist                      0
Coffee Bar                   0
Ready Food                   0
Video Store                  0
Gross Weight                 0
Net Weight                   0
Package Weight               0
Min. Person Yearly Income    0
dtype: int64

In [24]:
y_sub_pred = exmodel.predict(test)

In [25]:
samples["Cost"] = y_sub_pred

In [26]:
samples.to_csv("data/submission.csv", index=False)