In [2268]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline



In [2269]:
df = pd.read_csv('train.csv')

In [2270]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188533 entries, 0 to 188532
Data columns (total 13 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   id            188533 non-null  int64 
 1   brand         188533 non-null  object
 2   model         188533 non-null  object
 3   model_year    188533 non-null  int64 
 4   milage        188533 non-null  int64 
 5   fuel_type     183450 non-null  object
 6   engine        188533 non-null  object
 7   transmission  188533 non-null  object
 8   ext_col       188533 non-null  object
 9   int_col       188533 non-null  object
 10  accident      186081 non-null  object
 11  clean_title   167114 non-null  object
 12  price         188533 non-null  int64 
dtypes: int64(4), object(9)
memory usage: 18.7+ MB


In [2271]:
X = df.drop(columns=['id'])
y = df['price']

In [2272]:
from datetime import datetime

current_year = datetime.now().year
# Assuming 'model_year' is in the format 'YYYY' and you want to extract just the year
df['car_age'] = current_year - df['model_year'].astype(int)

In [2273]:
df.head()

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,ext_col,int_col,accident,clean_title,price,car_age
0,0,MINI,Cooper S Base,2007,213000,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T,Yellow,Gray,None reported,Yes,4200,17
1,1,Lincoln,LS V8,2002,143250,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T,Silver,Beige,At least 1 accident or damage reported,Yes,4999,22
2,2,Chevrolet,Silverado 2500 LT,2002,136731,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T,Blue,Gray,None reported,Yes,13900,22
3,3,Genesis,G90 5.0 Ultimate,2017,19500,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,Black,Black,None reported,Yes,45000,7
4,4,Mercedes-Benz,Metris Base,2021,7388,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T,Black,Beige,None reported,Yes,97500,3


In [2274]:
df.isnull().sum()

Unnamed: 0,0
id,0
brand,0
model,0
model_year,0
milage,0
fuel_type,5083
engine,0
transmission,0
ext_col,0
int_col,0


In [2275]:
drop_col=['ext_col','int_col', 'accident', 'clean_title']
df.drop(drop_col,inplace=True,axis=1)
object_columns = df.select_dtypes(include=['object']).columns
df[object_columns]

Unnamed: 0,brand,model,fuel_type,engine,transmission
0,MINI,Cooper S Base,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T
1,Lincoln,LS V8,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T
2,Chevrolet,Silverado 2500 LT,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T
3,Genesis,G90 5.0 Ultimate,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode
4,Mercedes-Benz,Metris Base,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T
...,...,...,...,...,...
188528,Cadillac,Escalade ESV Platinum,Gasoline,420.0HP 6.2L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode
188529,Mercedes-Benz,AMG C 43 AMG C 43 4MATIC,Gasoline,385.0HP 3.0L V6 Cylinder Engine Gasoline Fuel,8-Speed A/T
188530,Mercedes-Benz,AMG GLC 63 Base 4MATIC,Gasoline,469.0HP 4.0L 8 Cylinder Engine Gasoline Fuel,7-Speed A/T
188531,Audi,S5 3.0T Prestige,Gasoline,3.0L,1-Speed Automatic


In [2276]:
df[object_columns]

Unnamed: 0,brand,model,fuel_type,engine,transmission
0,MINI,Cooper S Base,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T
1,Lincoln,LS V8,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T
2,Chevrolet,Silverado 2500 LT,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T
3,Genesis,G90 5.0 Ultimate,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode
4,Mercedes-Benz,Metris Base,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T
...,...,...,...,...,...
188528,Cadillac,Escalade ESV Platinum,Gasoline,420.0HP 6.2L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode
188529,Mercedes-Benz,AMG C 43 AMG C 43 4MATIC,Gasoline,385.0HP 3.0L V6 Cylinder Engine Gasoline Fuel,8-Speed A/T
188530,Mercedes-Benz,AMG GLC 63 Base 4MATIC,Gasoline,469.0HP 4.0L 8 Cylinder Engine Gasoline Fuel,7-Speed A/T
188531,Audi,S5 3.0T Prestige,Gasoline,3.0L,1-Speed Automatic


In [2277]:
df

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,price,car_age
0,0,MINI,Cooper S Base,2007,213000,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T,4200,17
1,1,Lincoln,LS V8,2002,143250,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T,4999,22
2,2,Chevrolet,Silverado 2500 LT,2002,136731,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T,13900,22
3,3,Genesis,G90 5.0 Ultimate,2017,19500,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,45000,7
4,4,Mercedes-Benz,Metris Base,2021,7388,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T,97500,3
...,...,...,...,...,...,...,...,...,...,...
188528,188528,Cadillac,Escalade ESV Platinum,2017,49000,Gasoline,420.0HP 6.2L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,27500,7
188529,188529,Mercedes-Benz,AMG C 43 AMG C 43 4MATIC,2018,28600,Gasoline,385.0HP 3.0L V6 Cylinder Engine Gasoline Fuel,8-Speed A/T,30000,6
188530,188530,Mercedes-Benz,AMG GLC 63 Base 4MATIC,2021,13650,Gasoline,469.0HP 4.0L 8 Cylinder Engine Gasoline Fuel,7-Speed A/T,86900,3
188531,188531,Audi,S5 3.0T Prestige,2022,13895,Gasoline,3.0L,1-Speed Automatic,84900,2


In [2278]:
for column in object_columns:
    unique_values = df[column].unique()
    print(f"Unique values in '{column}':")
    print(unique_values)
    print()

Unique values in 'brand':
['MINI' 'Lincoln' 'Chevrolet' 'Genesis' 'Mercedes-Benz' 'Audi' 'Ford'
 'BMW' 'Tesla' 'Cadillac' 'Land' 'GMC' 'Toyota' 'Hyundai' 'Volvo'
 'Volkswagen' 'Buick' 'Rivian' 'RAM' 'Hummer' 'Alfa' 'INFINITI' 'Jeep'
 'Porsche' 'McLaren' 'Honda' 'Lexus' 'Dodge' 'Nissan' 'Jaguar' 'Acura'
 'Kia' 'Mitsubishi' 'Rolls-Royce' 'Maserati' 'Pontiac' 'Saturn' 'Bentley'
 'Mazda' 'Subaru' 'Ferrari' 'Aston' 'Lamborghini' 'Chrysler' 'Lucid'
 'Lotus' 'Scion' 'smart' 'Karma' 'Plymouth' 'Suzuki' 'FIAT' 'Saab'
 'Bugatti' 'Mercury' 'Polestar' 'Maybach']

Unique values in 'model':
['Cooper S Base' 'LS V8' 'Silverado 2500 LT' ... 'e-Golf SE'
 'Integra w/A-Spec Tech Package' 'IONIQ Plug-In Hybrid SEL']

Unique values in 'fuel_type':
['Gasoline' 'E85 Flex Fuel' nan 'Hybrid' 'Diesel' 'Plug-In Hybrid' '–'
 'not supported']

Unique values in 'engine':
['172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel'
 '252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel'
 '320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capabil

In [2279]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Sample DataFrame
data = {
    'brand': ['MINI', 'Lincoln', 'Chevrolet', 'Genesis', 'Mercedes-Benz', 'Audi',
              'Ford', 'BMW', 'Tesla', 'Cadillac', 'Land', 'GMC', 'Toyota',
              'Hyundai', 'Volvo', 'Volkswagen', 'Buick', 'Rivian', 'RAM',
              'Hummer', 'Alfa', 'INFINITI', 'Jeep', 'Porsche', 'McLaren',
              'Honda', 'Lexus', 'Dodge', 'Nissan', 'Jaguar', 'Acura',
              'Kia', 'Mitsubishi', 'Rolls-Royce', 'Maserati', 'Pontiac',
              'Saturn', 'Bentley', 'Mazda', 'Subaru', 'Ferrari', 'Aston',
              'Lamborghini', 'Chrysler', 'Lucid', 'Lotus', 'Scion',
              'smart', 'Karma', 'Plymouth', 'Suzuki', 'FIAT', 'Saab',
              'Bugatti', 'Mercury', 'Polestar', 'Maybach']
}

df1 = pd.DataFrame(data)
df1


Unnamed: 0,brand
0,MINI
1,Lincoln
2,Chevrolet
3,Genesis
4,Mercedes-Benz
5,Audi
6,Ford
7,BMW
8,Tesla
9,Cadillac


In [2280]:
label_encoder = LabelEncoder()
df1['brand_encoder'] = label_encoder.fit_transform(df1['brand'])
print(df1)

            brand  brand_encoder
0            MINI             31
1         Lincoln             28
2       Chevrolet              9
3         Genesis             16
4   Mercedes-Benz             36
5            Audi              3
6            Ford             14
7             BMW              4
8           Tesla             52
9        Cadillac              8
10           Land             26
11            GMC             15
12         Toyota             53
13        Hyundai             19
14          Volvo             55
15     Volkswagen             54
16          Buick              7
17         Rivian             45
18            RAM             44
19         Hummer             18
20           Alfa              1
21       INFINITI             20
22           Jeep             22
23        Porsche             43
24        McLaren             35
25          Honda             17
26          Lexus             27
27          Dodge             11
28         Nissan             39
29        

In [2281]:
df

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,price,car_age
0,0,MINI,Cooper S Base,2007,213000,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T,4200,17
1,1,Lincoln,LS V8,2002,143250,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T,4999,22
2,2,Chevrolet,Silverado 2500 LT,2002,136731,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T,13900,22
3,3,Genesis,G90 5.0 Ultimate,2017,19500,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,45000,7
4,4,Mercedes-Benz,Metris Base,2021,7388,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T,97500,3
...,...,...,...,...,...,...,...,...,...,...
188528,188528,Cadillac,Escalade ESV Platinum,2017,49000,Gasoline,420.0HP 6.2L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,27500,7
188529,188529,Mercedes-Benz,AMG C 43 AMG C 43 4MATIC,2018,28600,Gasoline,385.0HP 3.0L V6 Cylinder Engine Gasoline Fuel,8-Speed A/T,30000,6
188530,188530,Mercedes-Benz,AMG GLC 63 Base 4MATIC,2021,13650,Gasoline,469.0HP 4.0L 8 Cylinder Engine Gasoline Fuel,7-Speed A/T,86900,3
188531,188531,Audi,S5 3.0T Prestige,2022,13895,Gasoline,3.0L,1-Speed Automatic,84900,2


In [2282]:
from sklearn.preprocessing import MinMaxScaler
non_boolean_numerical_features = ['price','model_year', 'milage']
scaler = MinMaxScaler()
df[non_boolean_numerical_features] = scaler.fit_transform(df[non_boolean_numerical_features])

In [2283]:
df

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,price,car_age
0,0,MINI,Cooper S Base,0.66,0.525809,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T,0.000745,17
1,1,Lincoln,LS V8,0.56,0.353544,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T,0.001016,22
2,2,Chevrolet,Silverado 2500 LT,0.56,0.337444,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T,0.004031,22
3,3,Genesis,G90 5.0 Ultimate,0.86,0.047913,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,0.014566,7
4,4,Mercedes-Benz,Metris Base,0.94,0.018000,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T,0.032350,3
...,...,...,...,...,...,...,...,...,...,...
188528,188528,Cadillac,Escalade ESV Platinum,0.86,0.120771,Gasoline,420.0HP 6.2L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,0.008638,7
188529,188529,Mercedes-Benz,AMG C 43 AMG C 43 4MATIC,0.88,0.070388,Gasoline,385.0HP 3.0L V6 Cylinder Engine Gasoline Fuel,8-Speed A/T,0.009485,6
188530,188530,Mercedes-Benz,AMG GLC 63 Base 4MATIC,0.94,0.033465,Gasoline,469.0HP 4.0L 8 Cylinder Engine Gasoline Fuel,7-Speed A/T,0.028759,3
188531,188531,Audi,S5 3.0T Prestige,0.96,0.034070,Gasoline,3.0L,1-Speed Automatic,0.028082,2


In [2284]:
df

Unnamed: 0,id,brand,model,model_year,milage,fuel_type,engine,transmission,price,car_age
0,0,MINI,Cooper S Base,0.66,0.525809,Gasoline,172.0HP 1.6L 4 Cylinder Engine Gasoline Fuel,A/T,0.000745,17
1,1,Lincoln,LS V8,0.56,0.353544,Gasoline,252.0HP 3.9L 8 Cylinder Engine Gasoline Fuel,A/T,0.001016,22
2,2,Chevrolet,Silverado 2500 LT,0.56,0.337444,E85 Flex Fuel,320.0HP 5.3L 8 Cylinder Engine Flex Fuel Capab...,A/T,0.004031,22
3,3,Genesis,G90 5.0 Ultimate,0.86,0.047913,Gasoline,420.0HP 5.0L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,0.014566,7
4,4,Mercedes-Benz,Metris Base,0.94,0.018000,Gasoline,208.0HP 2.0L 4 Cylinder Engine Gasoline Fuel,7-Speed A/T,0.032350,3
...,...,...,...,...,...,...,...,...,...,...
188528,188528,Cadillac,Escalade ESV Platinum,0.86,0.120771,Gasoline,420.0HP 6.2L 8 Cylinder Engine Gasoline Fuel,Transmission w/Dual Shift Mode,0.008638,7
188529,188529,Mercedes-Benz,AMG C 43 AMG C 43 4MATIC,0.88,0.070388,Gasoline,385.0HP 3.0L V6 Cylinder Engine Gasoline Fuel,8-Speed A/T,0.009485,6
188530,188530,Mercedes-Benz,AMG GLC 63 Base 4MATIC,0.94,0.033465,Gasoline,469.0HP 4.0L 8 Cylinder Engine Gasoline Fuel,7-Speed A/T,0.028759,3
188531,188531,Audi,S5 3.0T Prestige,0.96,0.034070,Gasoline,3.0L,1-Speed Automatic,0.028082,2


In [2285]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188533 entries, 0 to 188532
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   id            188533 non-null  int64  
 1   brand         188533 non-null  object 
 2   model         188533 non-null  object 
 3   model_year    188533 non-null  float64
 4   milage        188533 non-null  float64
 5   fuel_type     183450 non-null  object 
 6   engine        188533 non-null  object 
 7   transmission  188533 non-null  object 
 8   price         188533 non-null  float64
 9   car_age       188533 non-null  int64  
dtypes: float64(3), int64(2), object(5)
memory usage: 14.4+ MB


In [2286]:
X_train.head()

Unnamed: 0,id,brand,model_year,milage,fuel_type,engine,transmission,car_age
184031,184031,36,0.86,0.152075,1,258,49,7
173831,173831,39,0.58,0.456656,2,323,38,21
183819,183819,14,0.92,0.22697,2,651,2,4
85525,85525,19,0.98,0.013295,2,441,20,1
41872,41872,11,0.98,0.012102,2,1093,16,1


In [2287]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

label_encoder_brand = LabelEncoder()
label_encoder_fuel_type = LabelEncoder()
label_encoder_transmission = LabelEncoder()
label_encoder_engine = LabelEncoder()

df['brand'] = label_encoder.fit_transform(df['brand'])
df['fuel_type'] = label_encoder.fit_transform(df['fuel_type'])
df['transmission'] = label_encoder.fit_transform(df['transmission'])
df['engine'] = label_encoder.fit_transform(df['engine'])

In [2288]:
x = df.drop(['price', 'model'],axis=1)
x

Unnamed: 0,id,brand,model_year,milage,fuel_type,engine,transmission,car_age
0,0,31,0.66,0.525809,2,116,38,17
1,1,28,0.56,0.353544,2,366,38,22
2,2,9,0.56,0.337444,1,640,38,22
3,3,16,0.86,0.047913,2,863,49,7
4,4,36,0.94,0.018000,2,259,23,3
...,...,...,...,...,...,...,...,...
188528,188528,8,0.86,0.120771,2,866,49,7
188529,188529,36,0.88,0.070388,2,770,31,6
188530,188530,36,0.94,0.033465,2,921,23,3
188531,188531,3,0.96,0.034070,2,512,1,2


In [2289]:
y_scaled = df['price']
y_scaled

Unnamed: 0,price
0,0.000745
1,0.001016
2,0.004031
3,0.014566
4,0.032350
...,...
188528,0.008638
188529,0.009485
188530,0.028759
188531,0.028082


In [2290]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(x, y_scaled, test_size=0.2, random_state=42)

In [2291]:
from sklearn.linear_model import LinearRegression

linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

In [2292]:
y_pred_train = linear_reg.predict(X_train)
y_pred_test = linear_reg.predict(X_test)

In [2293]:
# Evaluate the model
train_mse = mean_squared_error(y_train, y_pred_train, squared=False)
test_mse = mean_squared_error(y_test, y_pred_test, squared=False)

print(f"Training MSE: {train_mse}")
print(f"Testing MSE: {test_mse}")


Training MSE: 0.025718351316140215
Testing MSE: 0.023834362508076853


In [2294]:
test_url = 'https://github.com/robitussin/CCMACLRL_EXAM/blob/a46a4e2a001dedaefc9b431d480b508ce86c2d96/datasets/test.csv?raw=true'
dt=pd.read_csv(test_url)

In [2295]:
sample_submission_url = 'https://github.com/robitussin/CCMACLRL_EXAM/blob/a46a4e2a001dedaefc9b431d480b508ce86c2d96/datasets/sample_submission.csv?raw=true'

sf=pd.read_csv(sample_submission_url)


In [2296]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 188533 entries, 0 to 188532
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   id            188533 non-null  int64  
 1   brand         188533 non-null  int64  
 2   model         188533 non-null  object 
 3   model_year    188533 non-null  float64
 4   milage        188533 non-null  float64
 5   fuel_type     188533 non-null  int64  
 6   engine        188533 non-null  int64  
 7   transmission  188533 non-null  int64  
 8   price         188533 non-null  float64
 9   car_age       188533 non-null  int64  
dtypes: float64(3), int64(6), object(1)
memory usage: 14.4+ MB


In [2297]:
#dt.insert(8, 'price', 8)

In [2298]:
dt.drop(columns=['accident', 'clean_title', 'ext_col','int_col', 'model'], axis=1, inplace=True)

In [2299]:
dt['car_age'] = current_year - dt['model_year'].astype(int)

In [2300]:
dt['brand'] = label_encoder.fit_transform(dt['brand'])
dt['fuel_type'] = label_encoder.fit_transform(dt['fuel_type'])
dt['transmission'] = label_encoder.fit_transform(dt['transmission'])
dt['engine'] = label_encoder.fit_transform(dt['engine'])

In [2301]:
dt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125690 entries, 0 to 125689
Data columns (total 8 columns):
 #   Column        Non-Null Count   Dtype
---  ------        --------------   -----
 0   id            125690 non-null  int64
 1   brand         125690 non-null  int64
 2   model_year    125690 non-null  int64
 3   milage        125690 non-null  int64
 4   fuel_type     125690 non-null  int64
 5   engine        125690 non-null  int64
 6   transmission  125690 non-null  int64
 7   car_age       125690 non-null  int64
dtypes: int64(8)
memory usage: 7.7 MB


In [2302]:
dt.head()

Unnamed: 0,id,brand,model_year,milage,fuel_type,engine,transmission,car_age
0,188533,26,2015,98000,2,326,16,9
1,188534,26,2020,9142,3,787,31,4
2,188535,14,2022,28121,2,541,3,2
3,188536,3,2016,61258,2,193,39,8
4,188537,3,2018,59000,2,365,38,6


In [2303]:
id = sf.pop('id')
y_pred = linear_reg.predict(dt)

# Create a submission DataFrame
submission_df = pd.DataFrame({
    'id': id,
    'class': y_pred
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('submission_file.csv', index=False)
print("Submission file created: submission_file.csv")

Submission file created: submission_file.csv
