In [1]:
#basic imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
df = pd.read_csv('StudentPerformanceFactors.csv')

In [3]:
df.head()

Unnamed: 0,Hours_Studied,Attendance,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Sleep_Hours,Previous_Scores,Motivation_Level,Internet_Access,Tutoring_Sessions,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Physical_Activity,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender,Exam_Score
0,23,84,Low,High,No,7,73,Low,Yes,0,Low,Medium,Public,Positive,3,No,High School,Near,Male,67
1,19,64,Low,Medium,No,8,59,Low,Yes,2,Medium,Medium,Public,Negative,4,No,College,Moderate,Female,61
2,24,98,Medium,Medium,Yes,7,91,Medium,Yes,2,Medium,Medium,Public,Neutral,4,No,Postgraduate,Near,Male,74
3,29,89,Low,Medium,Yes,8,98,Medium,Yes,1,Medium,Medium,Public,Negative,4,No,High School,Moderate,Male,71
4,19,92,Medium,Medium,Yes,6,65,Medium,Yes,3,Medium,High,Public,Neutral,4,No,College,Near,Female,70


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6607 entries, 0 to 6606
Data columns (total 20 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Hours_Studied               6607 non-null   int64 
 1   Attendance                  6607 non-null   int64 
 2   Parental_Involvement        6607 non-null   object
 3   Access_to_Resources         6607 non-null   object
 4   Extracurricular_Activities  6607 non-null   object
 5   Sleep_Hours                 6607 non-null   int64 
 6   Previous_Scores             6607 non-null   int64 
 7   Motivation_Level            6607 non-null   object
 8   Internet_Access             6607 non-null   object
 9   Tutoring_Sessions           6607 non-null   int64 
 10  Family_Income               6607 non-null   object
 11  Teacher_Quality             6529 non-null   object
 12  School_Type                 6607 non-null   object
 13  Peer_Influence              6607 non-null   obje

In [5]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Hours_Studied,6607.0,19.975329,5.990594,1.0,16.0,20.0,24.0,44.0
Attendance,6607.0,79.977448,11.547475,60.0,70.0,80.0,90.0,100.0
Sleep_Hours,6607.0,7.02906,1.46812,4.0,6.0,7.0,8.0,10.0
Previous_Scores,6607.0,75.070531,14.399784,50.0,63.0,75.0,88.0,100.0
Tutoring_Sessions,6607.0,1.493719,1.23057,0.0,1.0,1.0,2.0,8.0
Physical_Activity,6607.0,2.96761,1.031231,0.0,2.0,3.0,4.0,6.0
Exam_Score,6607.0,67.235659,3.890456,55.0,65.0,67.0,69.0,101.0


In [6]:
#dropping duplicates
df.drop_duplicates(inplace=True)

In [7]:
#null values
df.isna().sum().sort_values(ascending=False).head(5)

Parental_Education_Level    90
Teacher_Quality             78
Distance_from_Home          67
Hours_Studied                0
Attendance                   0
dtype: int64

In [8]:
#imputing missing values
print(df['Parental_Education_Level'].value_counts(),"\n")
print(df['Teacher_Quality'].value_counts(),"\n")
print(df['Distance_from_Home'].value_counts(),"\n")

Parental_Education_Level
High School     3223
College         1989
Postgraduate    1305
Name: count, dtype: int64 

Teacher_Quality
Medium    3925
High      1947
Low        657
Name: count, dtype: int64 

Distance_from_Home
Near        3884
Moderate    1998
Far          658
Name: count, dtype: int64 



In [9]:
#filling with mode of each variable
df['Parental_Education_Level'].fillna(df['Parental_Education_Level'].mode()[0], inplace=True)
df['Teacher_Quality'].fillna(df['Teacher_Quality'].mode()[0], inplace=True)
df['Distance_from_Home'].fillna(df['Distance_from_Home'].mode()[0], inplace=True)

In [10]:
df.isna().sum().sort_values(ascending=False).head(5)

Hours_Studied               0
Attendance                  0
Gender                      0
Distance_from_Home          0
Parental_Education_Level    0
dtype: int64

### dividing features into categorical and numerical

In [11]:
cat_cols = ['Parental_Involvement','Access_to_Resources','Extracurricular_Activities','Motivation_Level','Internet_Access','Family_Income','Teacher_Quality','School_Type','Peer_Influence','Learning_Disabilities','Parental_Education_Level','Distance_from_Home','Gender']
num_cols = ['Hours_Studied', 'Attendance','Sleep_Hours','Previous_Scores','Tutoring_Sessions','Physical_Activity']
target_col = ['Exam_Score']

### Coverting categorical variables

In [12]:
df[cat_cols].head()

Unnamed: 0,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Motivation_Level,Internet_Access,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender
0,Low,High,No,Low,Yes,Low,Medium,Public,Positive,No,High School,Near,Male
1,Low,Medium,No,Low,Yes,Medium,Medium,Public,Negative,No,College,Moderate,Female
2,Medium,Medium,Yes,Medium,Yes,Medium,Medium,Public,Neutral,No,Postgraduate,Near,Male
3,Low,Medium,Yes,Medium,Yes,Medium,Medium,Public,Negative,No,High School,Moderate,Male
4,Medium,Medium,Yes,Medium,Yes,Medium,High,Public,Neutral,No,College,Near,Female


In [13]:
#creating a map for low,medim,high
cat_map_lmh = {'Low':0,'Medium':1,'High':2}
cols_to_map = ['Parental_Involvement','Access_to_Resources','Motivation_Level','Family_Income','Teacher_Quality']
df[cols_to_map] = df[cols_to_map].replace(cat_map_lmh)

In [14]:
df['Parental_Education_Level'] = df['Parental_Education_Level'].replace({'High School':1,'College':2,'Postgraduate':3})
df['Distance_from_Home'] = df['Distance_from_Home'].replace({'Near':1,'Moderate':2,'Far':3})

In [15]:
df[cat_cols].head()

Unnamed: 0,Parental_Involvement,Access_to_Resources,Extracurricular_Activities,Motivation_Level,Internet_Access,Family_Income,Teacher_Quality,School_Type,Peer_Influence,Learning_Disabilities,Parental_Education_Level,Distance_from_Home,Gender
0,0,2,No,0,Yes,0,1,Public,Positive,No,1,1,Male
1,0,1,No,0,Yes,1,1,Public,Negative,No,2,2,Female
2,1,1,Yes,1,Yes,1,1,Public,Neutral,No,3,1,Male
3,0,1,Yes,1,Yes,1,1,Public,Negative,No,1,2,Male
4,1,1,Yes,1,Yes,1,2,Public,Neutral,No,2,1,Female


### Rest for one hot encoding

In [16]:
one_hot_cols = ['Extracurricular_Activities','Internet_Access','School_Type','Peer_Influence','Learning_Disabilities','Gender']
df = pd.get_dummies(df,columns=one_hot_cols,drop_first=True)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6607 entries, 0 to 6606
Data columns (total 21 columns):
 #   Column                          Non-Null Count  Dtype
---  ------                          --------------  -----
 0   Hours_Studied                   6607 non-null   int64
 1   Attendance                      6607 non-null   int64
 2   Parental_Involvement            6607 non-null   int64
 3   Access_to_Resources             6607 non-null   int64
 4   Sleep_Hours                     6607 non-null   int64
 5   Previous_Scores                 6607 non-null   int64
 6   Motivation_Level                6607 non-null   int64
 7   Tutoring_Sessions               6607 non-null   int64
 8   Family_Income                   6607 non-null   int64
 9   Teacher_Quality                 6607 non-null   int64
 10  Physical_Activity               6607 non-null   int64
 11  Parental_Education_Level        6607 non-null   int64
 12  Distance_from_Home              6607 non-null   int64
 13  Exa

In [18]:
#max value for any variable is less than 256, thus we can convert to int8 without losing any info
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Hours_Studied,6607.0,19.975329,5.990594,1.0,16.0,20.0,24.0,44.0
Attendance,6607.0,79.977448,11.547475,60.0,70.0,80.0,90.0,100.0
Parental_Involvement,6607.0,1.086423,0.695521,0.0,1.0,1.0,2.0,2.0
Access_to_Resources,6607.0,1.100197,0.698347,0.0,1.0,1.0,2.0,2.0
Sleep_Hours,6607.0,7.02906,1.46812,4.0,6.0,7.0,8.0,10.0
Previous_Scores,6607.0,75.070531,14.399784,50.0,63.0,75.0,88.0,100.0
Motivation_Level,6607.0,0.906463,0.695798,0.0,0.0,1.0,1.0,2.0
Tutoring_Sessions,6607.0,1.493719,1.23057,0.0,1.0,1.0,2.0,8.0
Family_Income,6607.0,0.787649,0.742617,0.0,0.0,1.0,1.0,2.0
Teacher_Quality,6607.0,1.195247,0.596707,0.0,1.0,1.0,2.0,2.0


In [19]:
for col in list(df.columns):
    if df[col].dtype == 'int64':
        df[col] = df[col].astype('int8')

In [20]:
df.head()

Unnamed: 0,Hours_Studied,Attendance,Parental_Involvement,Access_to_Resources,Sleep_Hours,Previous_Scores,Motivation_Level,Tutoring_Sessions,Family_Income,Teacher_Quality,...,Parental_Education_Level,Distance_from_Home,Exam_Score,Extracurricular_Activities_Yes,Internet_Access_Yes,School_Type_Public,Peer_Influence_Neutral,Peer_Influence_Positive,Learning_Disabilities_Yes,Gender_Male
0,23,84,0,2,7,73,0,0,0,1,...,1,1,67,False,True,True,False,True,False,True
1,19,64,0,1,8,59,0,2,1,1,...,2,2,61,False,True,True,False,False,False,False
2,24,98,1,1,7,91,1,2,1,1,...,3,1,74,True,True,True,True,False,False,True
3,29,89,0,1,8,98,1,1,1,1,...,1,2,71,True,True,True,False,False,False,True
4,19,92,1,1,6,65,1,3,1,2,...,2,1,70,True,True,True,True,False,False,False


### Splitting into train and test

In [21]:
X = df.drop('Exam_Score',axis=1)
y = df['Exam_Score']

In [22]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train,X_val,y_train,y_val = train_test_split(X_train,y_train,test_size=0.2,random_state=42)

In [63]:
from catboost import CatBoostRegressor
cat_parameters = {
    'iterations': 1500,                
    'learning_rate': 0.01,               
    'depth': 13,                         
    'l2_leaf_reg': 6,                   
    'bagging_temperature': 0.6,         # Controls intensity of Bayesian bagging
    'border_count': 128,                # Number of splits for numerical features         # Indices of categorical features in the input data
    'thread_count': 8,                  # Number of parallel threads used for training
    'random_seed': 42,                  # Seed for random number generator for reproducibility
    'loss_function': 'RMSE',         # Loss function to optimize during training
    'eval_metric': 'RMSE',               # Metric used for evaluation
    'custom_metric': 'RMSE', # Additional metrics to be evaluated during training
    'use_best_model': True,            
    'od_type': 'Iter',                  
    'verbose': False              
}

In [65]:
model = CatBoostRegressor(
    **cat_parameters
    
)
model.fit(
    X_train, y_train,
    eval_set=(X_val, y_val),
)

<catboost.core.CatBoostRegressor at 0x314ac4550>

In [66]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R2 Score: {r2:.4f}')

Root Mean Squared Error: 2.0586
Mean Absolute Error: 0.9136
R2 Score: 0.7002


## Catboost with optuna

In [67]:
import optuna
def objective(trial,train_x=X_train, test_x=X_val, train_y=y_train, test_y=y_val):
    param = {
        'loss_function': 'RMSE',
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-2, 1.0),
        'max_bin': trial.suggest_int('max_bin', 200, 300),
        'subsample': trial.suggest_uniform('subsample', 0.6, 0.9),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 1000, 5000),
        'max_depth': trial.suggest_int('max_depth', 5, 10),
        'random_state': 2020,
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
    }
    model = CatBoostRegressor(**param)  
    
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=200,verbose=False)
    
    preds = model.predict(test_x)
    
    rmse = mean_squared_error(test_y, preds,squared=False)
    
    return rmse

In [None]:
#I have cleared output for better visibility
study = optuna.create_study(direction='minimize',study_name='Catboost_student_performance')
study.optimize(objective, n_trials=200)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)

In [73]:
best_cat_model = CatBoostRegressor(**study.best_trial.params)
best_cat_model.fit(
    X_train, y_train,
    eval_set=(X_val, y_val),
    verbose=False)

<catboost.core.CatBoostRegressor at 0x317b48fd0>

In [74]:
y_pred = best_cat_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R2 Score: {r2:.4f}')

Root Mean Squared Error: 1.8493
Mean Absolute Error: 0.6005
R2 Score: 0.7581


## XGBoost

In [94]:
params_xgb = {
    "tree_method": "auto",
    "n_estimators": 2000,
    "learning_rate": 0.001,
    "max_depth": 15,
    "min_child_weight": 3,
    "subsample": 1.0,
    "colsample_bytree": 1.0,
    "gamma": 2,
    "reg_lambda": 1,
    "eval_metric": "rmse",
    "random_state": 0,
}
from xgboost import XGBRegressor,XGBRFRegressor
xg_reg = XGBRegressor(**params_xgb,objective='reg:squarederror')
xg_reg.fit(X_train, y_train)
pred_xgb = xg_reg.predict(X_test)

In [95]:
mse = mean_squared_error(y_test, pred_xgb)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, pred_xgb)
r2 = r2_score(y_test, pred_xgb)
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R2 Score: {r2:.4f}')

Root Mean Squared Error: 2.2529
Mean Absolute Error: 1.2209
R2 Score: 0.6409


In [96]:
xg_reg_rf = XGBRFRegressor(**params_xgb,objective='reg:squarederror')
xg_reg_rf.fit(X_train, y_train)
pred_xgb_rf = xg_reg_rf.predict(X_test)
mse = mean_squared_error(y_test, pred_xgb_rf)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, pred_xgb_rf)
r2 = r2_score(y_test, pred_xgb_rf)
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R2 Score: {r2:.4f}')

Root Mean Squared Error: 3.7607
Mean Absolute Error: 2.8242
R2 Score: -0.0005


### LGBM

In [137]:
from lightgbm import LGBMRegressor
lg = LGBMRegressor()
lg.fit(X_train,y_train,eval_set=(X_val,y_val))

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000604 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 196
[LightGBM] [Info] Number of data points in the train set: 4228, number of used features: 20
[LightGBM] [Info] Start training from score 67.226112


In [138]:
pred_lg = lg.predict(X_test)
mse = mean_squared_error(y_test, pred_lg)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, pred_lg)
r2 = r2_score(y_test, pred_lg)
print(f'Root Mean Squared Error: {rmse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R2 Score: {r2:.4f}')

Root Mean Squared Error: 1.9746
Mean Absolute Error: 0.8174
R2 Score: 0.7242


### Best model - LGBM

### CNN !?

In [152]:
class TabularCNN(nn.Module):
    def __init__(self, input_size):
        super(TabularCNN, self).__init__()
        
        self.input_size = input_size
        self.reshape_size = int(np.sqrt(input_size)) + 1
        self.pad_size = self.reshape_size ** 2 - input_size
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2)
        )
        self.conv_output_size = 64 * (self.reshape_size // 2) * (self.reshape_size // 2)
        
        self.fc_layers = nn.Sequential(
            nn.Linear(self.conv_output_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        x = torch.cat([x, torch.zeros(x.size(0), self.pad_size)], dim=1)
        x = x.view(-1, 1, self.reshape_size, self.reshape_size)
        x = self.conv_layers(x)
        x = x.view(-1, self.conv_output_size)
        x = self.fc_layers(x)
        return x

In [153]:
def preprocess_data(df):
    df_processed = df.copy()
    for column in df_processed.columns:
        if df_processed[column].dtype == bool:
            df_processed[column] = df_processed[column].astype(int)
        elif df_processed[column].dtype == 'object':
            le = LabelEncoder()
            df_processed[column] = le.fit_transform(df_processed[column])
    
    # Convert to numpy arrays
    X = df_processed.iloc[:, :-1].values  # all columns except the last one
    y = df_processed.iloc[:, -1].values   # last column
    
    # Scale the features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y

In [154]:
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=100):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            
            outputs = model(batch_X)
            loss = criterion(outputs.squeeze(), batch_y)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        model.eval()
        valid_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in valid_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs.squeeze(), batch_y)
                valid_loss += loss.item()
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], '
                  f'Train Loss: {train_loss/len(train_loader):.4f}, '
                  f'Valid Loss: {valid_loss/len(valid_loader):.4f}')

In [155]:
def main(df, batch_size=32):
    X_scaled, y = preprocess_data(df)
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42)
    train_dataset = TabularDataset(X_train, y_train)
    test_dataset = TabularDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    model = TabularCNN(input_size=X_scaled.shape[1])
    
    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Train the model
    train_model(model, train_loader, test_loader, criterion, optimizer)
    
    return model

# To use the code:
model = main(df)

Epoch [10/100], Train Loss: 0.2250, Valid Loss: 0.2648
Epoch [20/100], Train Loss: 0.1412, Valid Loss: 0.3054
Epoch [30/100], Train Loss: 0.0682, Valid Loss: 0.3432
Epoch [40/100], Train Loss: 0.0492, Valid Loss: 0.3572
Epoch [50/100], Train Loss: 0.0348, Valid Loss: 0.3743
Epoch [60/100], Train Loss: 0.0333, Valid Loss: 0.3713
Epoch [70/100], Train Loss: 0.0282, Valid Loss: 0.3844
Epoch [80/100], Train Loss: 0.0257, Valid Loss: 0.3700
Epoch [90/100], Train Loss: 0.0275, Valid Loss: 0.3742
Epoch [100/100], Train Loss: 0.0258, Valid Loss: 0.3790


In [158]:
class TabularCNN_2(nn.Module):
    def __init__(self, input_size):
        super(TabularCNN_2, self).__init__()
        
        self.input_size = input_size
        self.reshape_size = int(np.sqrt(input_size)) + 1
        self.pad_size = self.reshape_size ** 2 - input_size
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 8, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(8),
            nn.Conv2d(8, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(2)
        )
        self.conv_output_size = 64 * (self.reshape_size // 2) * (self.reshape_size // 2)
        
        self.fc_layers = nn.Sequential(
            nn.Linear(self.conv_output_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        x = torch.cat([x, torch.zeros(x.size(0), self.pad_size)], dim=1)
        x = x.view(-1, 1, self.reshape_size, self.reshape_size)
        x = self.conv_layers(x)
        x = x.view(-1, self.conv_output_size)
        x = self.fc_layers(x)
        return x

In [159]:
def main(df, batch_size=32):
    X_scaled, y = preprocess_data(df)
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42)
    train_dataset = TabularDataset(X_train, y_train)
    test_dataset = TabularDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    model = TabularCNN_2(input_size=X_scaled.shape[1])
    
    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # Train the model
    train_model(model, train_loader, test_loader, criterion, optimizer)
    
    return model

# To use the code:
model = main(df)

Epoch [10/100], Train Loss: 0.1647, Valid Loss: 0.3186
Epoch [20/100], Train Loss: 0.0463, Valid Loss: 0.3542
Epoch [30/100], Train Loss: 0.0296, Valid Loss: 0.3765
Epoch [40/100], Train Loss: 0.0205, Valid Loss: 0.3761
Epoch [50/100], Train Loss: 0.0189, Valid Loss: 0.3948
Epoch [60/100], Train Loss: 0.0154, Valid Loss: 0.4118
Epoch [70/100], Train Loss: 0.0144, Valid Loss: 0.4209
Epoch [80/100], Train Loss: 0.0151, Valid Loss: 0.4183
Epoch [90/100], Train Loss: 0.0112, Valid Loss: 0.4121
Epoch [100/100], Train Loss: 0.0101, Valid Loss: 0.4212
