In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,StandardScaler,MinMaxScaler,OneHotEncoder
from sklearn.ensemble import VotingRegressor
from sklearn.svm import SVC
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split,KFold,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import accuracy_score,log_loss
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import make_column_selector,make_column_transformer

In [2]:
bigmart=pd.read_csv("train_bigmartsales.csv")
bigmart.head(10)

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales
0,FDA15,9.3,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.138
1,DRC01,5.92,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228
2,FDN15,17.5,Low Fat,0.01676,Meat,141.618,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.27
3,FDX07,19.2,Regular,0.0,Fruits and Vegetables,182.095,OUT010,1998,,Tier 3,Grocery Store,732.38
4,NCD19,8.93,Low Fat,0.0,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052
5,FDP36,10.395,Regular,0.0,Baking Goods,51.4008,OUT018,2009,Medium,Tier 3,Supermarket Type2,556.6088
6,FDO10,13.65,Regular,0.012741,Snack Foods,57.6588,OUT013,1987,High,Tier 3,Supermarket Type1,343.5528
7,FDP10,,Low Fat,0.12747,Snack Foods,107.7622,OUT027,1985,Medium,Tier 3,Supermarket Type3,4022.7636
8,FDH17,16.2,Regular,0.016687,Frozen Foods,96.9726,OUT045,2002,,Tier 2,Supermarket Type1,1076.5986
9,FDU28,19.2,Regular,0.09445,Frozen Foods,187.8214,OUT017,2007,,Tier 2,Supermarket Type1,4710.535


In [4]:
X_train = bigmart.drop(['Item_Identifier','Outlet_Identifier','Item_Outlet_Sales'],axis =1)

In [5]:
y_train = bigmart['Item_Outlet_Sales']

In [6]:
imp_mode = SimpleImputer(strategy='most_frequent')
imp_med = SimpleImputer(strategy='median')

In [8]:
ohc = OneHotEncoder(sparse_output=False)

In [9]:
ct_imp = make_column_transformer((imp_mode,make_column_selector
                                  (dtype_include=object)),
                                 (imp_med,make_column_selector(
                                     dtype_include=['int64','float64'])),
                                 verbose_feature_names_out=False).set_output(transform='pandas')
ct_enc = make_column_transformer((ohc,
                                 make_column_selector(dtype_include=object)),
                                ("passthrough",make_column_selector(
                                    dtype_include=['int64','float64'])),
                                verbose_feature_names_out=False).set_output(transform="pandas")

In [10]:
dtr = DecisionTreeRegressor(random_state=23)

In [11]:
pipe = Pipeline([('IMPUTE',ct_imp),("ENC",ct_enc),("TREE",dtr)])

In [12]:
pipe.fit(X_train,y_train)

In [13]:
test = pd.read_csv("test_bigmartsales.csv")

In [14]:
X_test = test.drop(['Item_Identifier','Outlet_Identifier'],axis =1)

In [15]:
prediction = pipe.predict(X_test)

In [16]:
prediction

array([1856.2504, 1230.3984,  759.012 , ..., 1540.6612, 6471.576 ,
       2157.192 ])

In [17]:
submit= pd.DataFrame({'Item_Identifier':test.Item_Identifier,
                      'Outlet_Identifier':test.Outlet_Identifier,
                     'Item_Outlet_Sales':prediction})

In [18]:
submit

Unnamed: 0,Item_Identifier,Outlet_Identifier,Item_Outlet_Sales
0,FDW58,OUT049,1856.2504
1,FDW14,OUT017,1230.3984
2,NCN55,OUT010,759.0120
3,FDQ58,OUT017,4287.7520
4,FDY38,OUT027,7170.6660
...,...,...,...
5676,FDB58,OUT046,3829.0158
5677,FDD47,OUT018,4512.1266
5678,NCO17,OUT045,1540.6612
5679,FDJ26,OUT017,6471.5760


In [20]:
submit.to_csv("Big_Mart_WO_Tuning.csv",index=False)

## Hyperparameter tuning

In [21]:
pipe = Pipeline([('IMPUTE',ct_imp),("ENC",ct_enc),("TREE",dtr)])

In [None]:
params={'TREE__'}