Auto ML

In [None]:
pip install tpot h2o pandas scikit-learn


Collecting tpot
  Downloading TPOT-0.12.2-py3-none-any.whl.metadata (2.0 kB)
Collecting h2o
  Downloading h2o-3.46.0.5.tar.gz (265.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.6/265.6 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting deap>=1.2 (from tpot)
  Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting update-checker>=0.16 (from tpot)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Collecting stopit>=1.1.1 (from tpot)
  Downloading stopit-1.1.2.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading TPOT-0.12.2-py3-none-any.whl (87 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.4/87.4 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_

Loading The dataset

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset (you can replace the URL with the path to your dataset)
data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")

# Preprocess the dataset (handle missing values, encode categorical data)
data = data.dropna()  # Simple preprocessing, more can be added
X = data.drop("median_house_value", axis=1)
y = data["median_house_value"]

# Encode categorical variables
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


H2O.ai

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import h2o
from h2o.automl import H2OAutoML

# Step 1: Load and Preprocess the Dataset
data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
data = data.dropna()
X = data.drop("median_house_value", axis=1)
y = data["median_house_value"]

# Convert categorical data using one-hot encoding
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Initialize H2O and Convert Data
h2o.init()

# Convert data to H2O frames
train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))

# Define response and predictor columns
response = "median_house_value"
predictors = [col for col in train.columns if col != response]

# Step 3: Run H2O AutoML
aml = H2OAutoML(max_models=20, seed=42)
aml.train(x=predictors, y=response, training_frame=train)

# Step 4: Get the Best Model and Evaluate Performance
best_model = aml.leader
performance = best_model.model_performance(test)

print("Best Model Performance:")
print(f"RMSE: {performance.rmse()}")
print(f"R²: {performance.r2()}")

# Step 5: Compare Other Models from AutoML Leaderboard
leaderboard = aml.leaderboard.as_data_frame()
print("\nAutoML Leaderboard:")
print(leaderboard)

# Step 6: Save the Best Model
h2o.save_model(best_model, path="best_model_h2o", force=True)

# Shutdown H2O
h2o.shutdown()


Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.24" 2024-07-16; OpenJDK Runtime Environment (build 11.0.24+8-post-Ubuntu-1ubuntu322.04); OpenJDK 64-Bit Server VM (build 11.0.24+8-post-Ubuntu-1ubuntu322.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.10/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmp2z2qvbaq
  JVM stdout: /tmp/tmp2z2qvbaq/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmp2z2qvbaq/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,07 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.5
H2O_cluster_version_age:,1 month and 16 days
H2O_cluster_name:,H2O_from_python_unknownUser_dcqzyg
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.170 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
Best Model Performance:
RMSE: 45614.5622336612
R²: 0.8478493030314267

AutoML Leaderboard:
                                             model_id           rmse  \
0   StackedEnsemble_AllModels_1_AutoML_1_20241016_...   45518.277077   
1   StackedEnsemble_BestOfFamily_1_AutoML_1_202410...   46047.067123   
2                      GBM_4_AutoML_1_20241016_100602   46385.181054   
3                      GBM_3_AutoML_1_20241016_100602   46581.911480   
4                      GBM_1_AutoML_1_20241016_100602   46946.258931   
5                      GBM_2_AutoML_1_20241016_100602   47041.056760   
6         GBM_grid_1_AutoML_1_20241016_100602_model_2   47463.498986   
7                      GBM_5_AutoML_1_20241016_1




H2O session _sid_8bcd closed.


  h2o.shutdown()


In [None]:
import h2o
from h2o.automl import H2OAutoML

# Initialize H2O server
h2o.init()

# Convert data to H2O frame
train = h2o.H2OFrame(pd.concat([X_train, y_train], axis=1))
test = h2o.H2OFrame(pd.concat([X_test, y_test], axis=1))

# Define the response and predictor columns
response = "median_house_value"
predictors = [col for col in train.columns if col != response]

# Run AutoML
aml = H2OAutoML(max_models=20, seed=42)
aml.train(x=predictors, y=response, training_frame=train)

# Get the best model and evaluate
best_model = aml.leader
performance = best_model.model_performance(test)
print(performance)

# Save the model for future use
h2o.save_model(best_model, path="best_model_h2o", force=True)

# Shutdown H2O
h2o.shutdown()


Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.24" 2024-07-16; OpenJDK Runtime Environment (build 11.0.24+8-post-Ubuntu-1ubuntu322.04); OpenJDK 64-Bit Server VM (build 11.0.24+8-post-Ubuntu-1ubuntu322.04, mixed mode, sharing)
  Starting server from /usr/local/lib/python3.10/dist-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmpjqro8_43
  JVM stdout: /tmp/tmpjqro8_43/h2o_unknownUser_started_from_python.out
  JVM stderr: /tmp/tmpjqro8_43/h2o_unknownUser_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,06 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.5
H2O_cluster_version_age:,1 month and 16 days
H2O_cluster_name:,H2O_from_python_unknownUser_657pji
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.170 Gb
H2O_cluster_total_cores:,2
H2O_cluster_allowed_cores:,2


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
AutoML progress: |███████████████████████████████████████████████████████████████| (done) 100%
ModelMetricsRegressionGLM: stackedensemble
** Reported on test data. **

MSE: 2074112182.0582428
RMSE: 45542.42178516908
MAE: 29544.644468483875
RMSLE: 0.2209084201894307
Mean Residual Deviance: 2074112182.0582428
R^2: 0.8483301819180165
Null degrees of freedom: 4086
Residual degrees of freedom: 4074
Null deviance: 55895408761548.32
Residual deviance: 8476896488072.038
AIC: 99303.9932264633
H2O session _sid_8fc5 closed.


  h2o.shutdown()


TPOT - Tree-based Pipeline Optimization Tool

In [None]:
from tpot import TPOTRegressor

# Create and train the TPOT regressor
tpot = TPOTRegressor(generations=5, population_size=50, verbosity=2, random_state=42)
tpot.fit(X_train, y_train)

# Evaluate the model
print("Best pipeline:", tpot.fitted_pipeline_)
print("Test Score:", tpot.score(X_test, y_test))

# Export the best pipeline as Python code
tpot.export('best_pipeline.py')


Optimization Progress:   0%|          | 0/300 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: -2501965699.4831247

Generation 2 - Current best internal CV score: -2501965699.4831247

Generation 3 - Current best internal CV score: -2495752984.350515

Generation 4 - Current best internal CV score: -2436706502.728958

Generation 5 - Current best internal CV score: -2290760214.474598

Best pipeline: GradientBoostingRegressor(input_matrix, alpha=0.9, learning_rate=0.1, loss=huber, max_depth=7, max_features=0.6000000000000001, min_samples_leaf=8, min_samples_split=9, n_estimators=100, subsample=0.8)
Best pipeline: Pipeline(steps=[('gradientboostingregressor',
                 GradientBoostingRegressor(loss='huber', max_depth=7,
                                           max_features=0.6000000000000001,
                                           min_samples_leaf=8,
                                           min_samples_split=9, random_state=42,
                                           subsample=0.8))])
Test Score: -2354376615.921508


Meta Learning MAML(Model-Agnostic Meta-Learning)
Code for Few-Shot Learning with MAML

In [6]:
pip install learn2learn


Collecting learn2learn
  Downloading learn2learn-0.2.0.tar.gz (7.0 MB)
     ---------------------------------------- 0.0/7.0 MB ? eta -:--:--
     - -------------------------------------- 0.2/7.0 MB 6.7 MB/s eta 0:00:02
     --- ------------------------------------ 0.6/7.0 MB 7.2 MB/s eta 0:00:01
     ---- ----------------------------------- 0.8/7.0 MB 7.0 MB/s eta 0:00:01
     ------- -------------------------------- 1.4/7.0 MB 7.9 MB/s eta 0:00:01
     ----------- ---------------------------- 1.9/7.0 MB 8.7 MB/s eta 0:00:01
     -------------- ------------------------- 2.5/7.0 MB 9.2 MB/s eta 0:00:01
     --------------- ------------------------ 2.7/7.0 MB 9.0 MB/s eta 0:00:01
     ------------------ --------------------- 3.3/7.0 MB 9.0 MB/s eta 0:00:01
     --------------------- ------------------ 3.8/7.0 MB 9.2 MB/s eta 0:00:01
     ------------------------ --------------- 4.3/7.0 MB 9.4 MB/s eta 0:00:01
     --------------------------- ------------ 4.8/7.0 MB 9.5 MB/s eta 0:00:01


  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [163 lines of output]
      c:\Python312\Lib\site-packages\setuptools\__init__.py:80: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.
      !!
      
              ********************************************************************************
              Requirements should be satisfied by a PEP 517 installer.
              If you are using pip, you can try `pip install --use-pep517`.
              ********************************************************************************
      
      !!
        dist.fetch_build_eggs(dist.setup_requires)
      running bdist_wheel
      running build
      running build_py
      creating build
      creating build\lib.win-amd64-cpython-312
      creating build\lib.win-amd64-cpython-312\learn2learn
      copying learn2learn\_version.py -> build\lib.win-amd64-cpython-312\learn2learn
      copyi

In [4]:
pip install --user learn2learn

Collecting learn2learn
  Using cached learn2learn-0.2.0.tar.gz (7.0 MB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting gym>=0.14.0 (from learn2learn)
  Using cached gym-0.26.2-py3-none-any.whl
Collecting gsutil (from learn2learn)
  Using cached gsutil-5.31-py3-none-any.whl
Collecting qpth>=0.0.15 (from learn2learn)
  Using cached qpth-0.0.18-py3-none-any.whl
Collecting cloudpickle>=1.2.0 (from gym>=0.14.0->learn2learn)
  Using cached cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB)
Collecting gym-notices>=0.0.4 (from gym>=0.14.0->learn2learn)
  Using cached gym_notices-0.0.8-py3-none-any.whl.metadata (1.0 kB)
Collecting cvxpy>=1.1.0 (from qpth>=0.0.15->learn2learn)
  Using cached cvxpy-1.5.3-cp312-cp312-win_amd64.whl.metadata (9.0 kB)
Collecting argcomplete>=1.9.4 (from gsutil->learn2learn)
  Using cached argcomplete-3.5.1-py3-none-any.whl.metadata (16 kB)
Collecting crcmod>=1.7 (from gsutil->learn2learn)
  Using cac

  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [161 lines of output]
      C:\Users\Abhishek P\AppData\Roaming\Python\Python312\site-packages\setuptools\__init__.py:94: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.
      !!
      
              ********************************************************************************
              Requirements should be satisfied by a PEP 517 installer.
              If you are using pip, you can try `pip install --use-pep517`.
              ********************************************************************************
      
      !!
        dist.fetch_build_eggs(dist.setup_requires)
      running bdist_wheel
      running build
      running build_py
      creating build\lib.win-amd64-cpython-312\learn2learn
      copying learn2learn\_version.py -> build\lib.win-amd64-cpython-312\learn2learn
      copying learn2learn\__init__.py -> bu

In [3]:
   !pip install --user --upgrade pip setuptools wheel

Collecting setuptools
  Using cached setuptools-75.2.0-py3-none-any.whl.metadata (6.9 kB)
Collecting wheel
  Downloading wheel-0.44.0-py3-none-any.whl.metadata (2.3 kB)
Using cached setuptools-75.2.0-py3-none-any.whl (1.2 MB)
Downloading wheel-0.44.0-py3-none-any.whl (67 kB)
Installing collected packages: wheel, setuptools
Successfully installed setuptools-75.2.0 wheel-0.44.0




In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from learn2learn.algorithms import MAML
from torch.optim import Adam
from sklearn.model_selection import train_test_split

# Step 1: Load and Preprocess the Dataset
data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
data = data.dropna()  # Handle missing values
X = data.drop("median_house_value", axis=1)
y = data["median_house_value"]

# Binning the target into 5 classes
y_bins = pd.qcut(y, q=5, labels=False)
X = pd.get_dummies(X)  # One-hot encode categorical variables

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_bins, test_size=0.2, random_state=42)

# Step 3: Define the Few-Shot Task Loader
class TaskLoader:
    def __init__(self, X, y, n_classes=5, n_samples=5):
        self.X = X
        self.y = y
        self.n_classes = n_classes
        self.n_samples = n_samples

    def __iter__(self):
        for _ in range(10):  # Create 10 tasks
            tasks_X, tasks_y = [], []
            for class_id in range(self.n_classes):
                # Check if there are enough samples for the class
                if len(self.X[self.y == class_id]) < self.n_samples:
                    raise ValueError(f"Not enough samples for class {class_id}. Needed {self.n_samples}.")

                class_samples = self.X.loc[self.y == class_id].sample(n=self.n_samples)
                tasks_X.append(class_samples)
                tasks_y.append(np.full((self.n_samples,), class_id))
            yield pd.concat(tasks_X), np.concatenate(tasks_y)

# Step 4: Define the Model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(X_train.shape[1], 5)  # 5 classes based on binning

    def forward(self, x):
        return self.fc(x)

# Step 5: Training Loop with MAML
model = SimpleModel()
maml = MAML(model, lr=0.01, first_order=True)
optimizer = Adam(maml.parameters(), lr=0.001)

task_loader = TaskLoader(X_train, y_train)

# Loop through each task
for task_X, task_y in task_loader:
    learner = maml.clone()  # Create a task-specific copy of the model

    # Convert DataFrame to NumPy arrays and ensure all data is numeric
    task_X_tensor = torch.tensor(task_X.values.astype(np.float32), dtype=torch.float32)
    task_y_tensor = torch.tensor(task_y, dtype=torch.long)

    task_dataset = TensorDataset(task_X_tensor, task_y_tensor)
    task_data_loader = DataLoader(task_dataset, batch_size=2)

    # Loop through batches of data
    for batch_X, batch_y in task_data_loader:
        pred = learner(batch_X)  # Get predictions
        loss = nn.CrossEntropyLoss()(pred, batch_y)  # Calculate loss
        learner.adapt(loss)  # Adapt to the task

    optimizer.step()  # Update the meta-learner

# After this point, you can evaluate the learner on unseen tasks or continue training.


ModuleNotFoundError: No module named 'learn2learn'

Multi-Modal Learning


In [None]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from learn2learn.algorithms import MAML
from torch.optim import Adam
import gc  # For garbage collection

# Load and Preprocess the Dataset (keep this section as is if you're using it)
data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
data = data.dropna()
X = data.drop("median_house_value", axis=1)
y = data["median_house_value"]
y_bins = pd.qcut(y, q=5, labels=False)
X = pd.get_dummies(X)

# Subset the data if necessary for RAM efficiency (e.g., 10% of the dataset)
X = X.sample(frac=0.1, random_state=42)
y_bins = y_bins.loc[X.index]

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_bins, test_size=0.2, random_state=42)

# Define the Few-Shot Task Loader
class TaskLoader:
    def __init__(self, X, y, n_classes=5, n_samples=5):
        self.X = X
        self.y = y
        self.n_classes = n_classes
        self.n_samples = n_samples

    def __iter__(self):
        for _ in range(10):  # Create 10 tasks
            tasks_X, tasks_y = [], []
            for class_id in range(self.n_classes):
                class_samples = self.X.loc[self.y == class_id].sample(n=self.n_samples)
                tasks_X.append(class_samples)
                tasks_y.append(np.full((self.n_samples,), class_id))
            yield pd.concat(tasks_X), np.concatenate(tasks_y)

# Define the Model
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc = nn.Linear(X_train.shape[1], 5)  # 5 classes

    def forward(self, x):
        return self.fc(x)

# Training Loop with MAML
model = SimpleModel()
maml = MAML(model, lr=0.01, first_order=True)
optimizer = Adam(maml.parameters(), lr=0.001)

task_loader = TaskLoader(X_train, y_train)

for task_X, task_y in task_loader:
    learner = maml.clone()  # Create task-specific copy of the model

    # Convert DataFrame to NumPy arrays and ensure they are numeric
    task_X_values = task_X.values.astype(np.float32)  # Ensure float32
    task_y_values = task_y.astype(np.int64)  # Ensure int64 for classification

    # Create TensorDataset
    task_dataset = TensorDataset(torch.tensor(task_X_values), torch.tensor(task_y_values))

    task_loader = DataLoader(task_dataset, batch_size=2)

    for batch_X, batch_y in task_loader:
        pred = learner(batch_X)
        loss = nn.CrossEntropyLoss()(pred, batch_y)
        learner.adapt(loss)  # Adapt to the task

    optimizer.step()  # Update the meta-learner

    # Clear unnecessary variables and run garbage collection
    del task_X, task_y, task_dataset
    gc.collect()

# After this point, you can evaluate the learner on unseen tasks or continue training.


In [None]:
import os
import pandas as pd
import numpy as np
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from PIL import Image

# Load and Preprocess the Tabular Data
data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
data = data.dropna()
X = data.drop("median_house_value", axis=1)
y = pd.qcut(data["median_house_value"], q=5, labels=False)  # Binning target into classes
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the dataset class
class MultiModalDataset(Dataset):
    def __init__(self, tabular_data, image_dir, labels, transform=None):
        self.tabular_data = tabular_data
        self.image_dir = image_dir
        self.labels = labels
        self.transform = transform

        # Recursively collect all image files from the directory
        self.image_files = []
        for root, dirs, files in os.walk(image_dir):
            for file in files:
                if file.endswith(('.jpg', '.png', '.jpeg')):
                    self.image_files.append(os.path.join(root, file))

        # Check if images were found
        if len(self.image_files) == 0:
            raise ValueError(f"No images found in directory: {image_dir}. Please check the path and file extensions.")

    def __len__(self):
        return min(len(self.labels), len(self.image_files))  # Ensure the dataset length matches

    def __getitem__(self, idx):
        tabular_item = self.tabular_data.iloc[idx].values.astype(np.float32)
        label = self.labels[idx]

        # Load an image by index
        image_file = self.image_files[idx % len(self.image_files)]
        image = Image.open(image_file).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return tabular_item, image, label

# Define Transforms
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Update the path to your local dataset folder
image_dataset_path = r"C:\Users\Abhishek P\Downloads\archive\seg_train"

# Print the directory contents to debug
print("Files in dataset directory:", os.listdir(image_dataset_path))

train_dataset = MultiModalDataset(X_train, image_dataset_path, y_train.values, transform=image_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define Multi-Modal Model
class MultiModalModel(nn.Module):
    def __init__(self):
        super(MultiModalModel, self).__init__()
        self.image_model = models.resnet18(weights='DEFAULT')  # Use a pretrained ResNet
        self.image_model.fc = nn.Linear(self.image_model.fc.in_features, 128)  # Adjust final layer
        self.tabular_model = nn.Sequential(
            nn.Linear(X_train.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.classifier = nn.Linear(128 + 32, 5)  # Combine features and classify into 5 classes

    def forward(self, tabular_data, images):
        image_features = self.image_model(images)
        tabular_features = self.tabular_model(tabular_data)
        combined_features = torch.cat((image_features, tabular_features), dim=1)
        return self.classifier(combined_features)

# Instantiate and Train the Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiModalModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
model.train()
for epoch in range(10):  # Change the number of epochs as needed
    for tabular_data, images, labels in train_loader:
        tabular_data, images, labels = tabular_data.to(device), images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(tabular_data, images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/10], Loss: {loss.item():.4f}")

# Save the model
torch.save(model.state_dict(), 'multimodal_model.pth')

# Evaluation can be added here to assess model performance on a test set


In [2]:
import os
import pandas as pd
import numpy as np
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from PIL import Image

# Load and Preprocess the Tabular Data
data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
data = data.dropna()
X = data.drop("median_house_value", axis=1)
y = pd.qcut(data["median_house_value"], q=5, labels=False)  # Binning target into classes
X = pd.get_dummies(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the dataset class
class MultiModalDataset(Dataset):
    def __init__(self, tabular_data, image_dir, labels, transform=None):
        self.tabular_data = tabular_data
        self.image_dir = image_dir
        self.labels = labels
        self.transform = transform

        # Recursively collect all image files from the directory
        self.image_files = []
        for root, dirs, files in os.walk(image_dir):
            for file in files:
                if file.endswith(('.jpg', '.png', '.jpeg')):
                    self.image_files.append(os.path.join(root, file))

        # Check if images were found
        if len(self.image_files) == 0:
            raise ValueError(f"No images found in directory: {image_dir}. Please check the path and file extensions.")

    def __len__(self):
        return min(len(self.labels), len(self.image_files))  # Ensure the dataset length matches

    def __getitem__(self, idx):
        tabular_item = self.tabular_data.iloc[idx].values.astype(np.float32)
        label = self.labels[idx]

        # Load an image by index
        image_file = self.image_files[idx % len(self.image_files)]
        image = Image.open(image_file).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return tabular_item, image, label

# Define Transforms
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Update the path to your local dataset folder
image_dataset_path = r"C:\Users\Abhishek P\Downloads\archive\seg_train"

# Print the directory contents to debug
print("Files in dataset directory:", os.listdir(image_dataset_path))

train_dataset = MultiModalDataset(X_train, image_dataset_path, y_train.values, transform=image_transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)  # Reduced batch size

# Define Multi-Modal Model
class MultiModalModel(nn.Module):
    def __init__(self):
        super(MultiModalModel, self).__init__()
        self.image_model = models.resnet18(pretrained=True)  # Use a pretrained ResNet
        self.image_model.fc = nn.Linear(self.image_model.fc.in_features, 128)  # Adjust final layer
        self.tabular_model = nn.Sequential(
            nn.Linear(X_train.shape[1], 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.classifier = nn.Linear(128 + 32, 5)  # Combine features and classify into 5 classes

    def forward(self, tabular_data, images):
        image_features = self.image_model(images)
        tabular_features = self.tabular_model(tabular_data)
        combined_features = torch.cat((image_features, tabular_features), dim=1)
        return self.classifier(combined_features)

# Instantiate the Model
model = MultiModalModel()

# Training Parameters
device = torch.device("cpu")  # Use CPU for training
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training Loop
model.train()
epochs = 5  # Reduce epochs to 5 for faster training
for epoch in range(epochs):
    epoch_loss = 0.0
    for tabular_data, images, labels in train_loader:
        tabular_data, images, labels = tabular_data.to(device), images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(tabular_data, images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss / len(train_loader):.4f}")

# Save the model
torch.save(model.state_dict(), 'multimodal_model.pth')

# Evaluation on Test Set (Example)
model.eval()
test_losses = []
for tabular_data, images, labels in train_loader:  # Example: using train_loader for demonstration
    tabular_data, images, labels = tabular_data.to(device), images.to(device), labels.to(device)
    with torch.no_grad():
        outputs = model(tabular_data, images)
        test_loss = criterion(outputs, labels)
        test_losses.append(test_loss.item())

average_test_loss = np.mean(test_losses)
print(f"Average Test Loss: {average_test_loss:.4f}")


Files in dataset directory: ['seg_train']




Epoch [1/5], Loss: 3.6423
Epoch [2/5], Loss: 1.9112
Epoch [3/5], Loss: 1.5659
Epoch [4/5], Loss: 1.4868
Epoch [5/5], Loss: 1.3757
Average Test Loss: 2.0429


In [6]:
   !pip install torchvision --user

Collecting torchvision
  Using cached torchvision-0.20.0-cp312-cp312-win_amd64.whl.metadata (6.2 kB)
Collecting torch==2.5.0 (from torchvision)
  Using cached torch-2.5.0-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch==2.5.0->torchvision)
  Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Using cached torchvision-0.20.0-cp312-cp312-win_amd64.whl (1.6 MB)
Using cached torch-2.5.0-cp312-cp312-win_amd64.whl (203.1 MB)
Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)
Installing collected packages: sympy, torch, torchvision
  Attempting uninstall: sympy
    Found existing installation: sympy 1.12.1
    Uninstalling sympy-1.12.1:
      Successfully uninstalled sympy-1.12.1
Successfully installed sympy-1.13.1 torch-2.5.0 torchvision-0.20.0



[notice] A new release of pip is available: 24.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
   !python -m pip install --upgrade pip --user

Collecting pip
  Using cached pip-24.2-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-24.2-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1
    Uninstalling pip-24.1:
      Successfully uninstalled pip-24.1
Successfully installed pip-24.2


In [4]:
# Function to Evaluate Model Performance
def evaluate_model(model, dataloader):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for tabular_data, images, labels in dataloader:
            tabular_data, images, labels = tabular_data.to(device), images.to(device), labels.to(device)
            outputs = model(tabular_data, images)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    return accuracy

# Prepare Test Dataset and Dataloader
test_dataset = MultiModalDataset(X_test, r"C:\Users\Abhishek P\Downloads\archive\seg_train", y_test.values, transform=image_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Evaluate After Training
accuracy = evaluate_model(model, test_loader)


Model Accuracy: 29.58%


In [6]:
import random
from collections import deque
from sklearn.metrics import accuracy_score

# Self-Improvement Class
class SelfImprovement:
    def __init__(self, model, learning_rate=0.01, discount_factor=0.95):
        self.model = model
        self.q_table = {}  # To store (state, action) => reward mappings
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor

    def select_action(self, state):
        if state in self.q_table and random.random() > 0.2:  # 80% chance of exploiting
            return max(self.q_table[state], key=self.q_table[state].get)
        else:
            return random.choice(['adjust_lr', 'adjust_batch_size', 'retrain'])

    def update_q_table(self, state, action, reward, next_state):
        old_value = self.q_table.get(state, {}).get(action, 0)
        future_rewards = max(self.q_table.get(next_state, {}).values()) if next_state in self.q_table else 0
        self.q_table.setdefault(state, {})[action] = old_value + self.learning_rate * (reward + self.discount_factor * future_rewards - old_value)

    def feedback_loop(self, performance, history):
        state = tuple(history[-2:])  # Use last two performances as state
        reward = performance - history[-1]  # Reward is improvement over previous performance
        action = self.select_action(state)
        next_state = (history[-1], performance)

        # Adjust model based on action
        if action == 'adjust_lr':
            for g in optimizer.param_groups:
                g['lr'] = max(0.0001, g['lr'] * (1.1 if reward > 0 else 0.9))
        elif action == 'adjust_batch_size':
            train_loader.batch_size = min(64, train_loader.batch_size + (8 if reward > 0 else -8))
        elif action == 'retrain':
            model.train()  # Optional re-training step

        self.update_q_table(state, action, reward, next_state)

# Instantiate the Self-Improvement Module
improvement = SelfImprovement(model)

# Evaluate Model Performance Function
def evaluate_model(model, dataloader):
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for tabular_data, images, labels in dataloader:
            tabular_data, images, labels = tabular_data.to(device), images.to(device), labels.to(device)
            outputs = model(tabular_data, images)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    print(f"Model Accuracy: {accuracy * 100:.2f}%")
    return accuracy


In [20]:
import os
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import DataLoader, DistributedSampler
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.strategies import DDPStrategy
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.model_selection import train_test_split
from PIL import Image
import pandas as pd
import numpy as np
from ray import tune
from ray.tune.integration.pytorch_lightning import TuneReportCallback

class MultiModalDataset(torch.utils.data.Dataset):
    def __init__(self, tabular_data, image_dir, labels, transform=None):
        self.tabular_data = tabular_data
        self.image_dir = image_dir
        self.labels = labels
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

    def __len__(self):
        return min(len(self.labels), len(self.image_files))

    def __getitem__(self, idx):
        tabular_item = torch.tensor(self.tabular_data.iloc[idx].values, dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        image_file = os.path.join(self.image_dir, self.image_files[idx % len(self.image_files)])
        image = Image.open(image_file).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return tabular_item, image, label

class MultiModalModel(LightningModule):
    def __init__(self, tabular_features, learning_rate=0.001):
        super().__init__()
        self.save_hyperparameters()
        self.image_model = models.resnet18(weights='DEFAULT')
        self.image_model.fc = nn.Linear(self.image_model.fc.in_features, 128)
        self.tabular_model = nn.Sequential(
            nn.Linear(tabular_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.classifier = nn.Linear(128 + 32, 5)  # 5 classes for binned house values

    def forward(self, tabular_data, images):
        image_features = self.image_model(images)
        tabular_features = self.tabular_model(tabular_data)
        combined_features = torch.cat((image_features, tabular_features), dim=1)
        return self.classifier(combined_features)

    def training_step(self, batch, batch_idx):
        tabular_data, images, labels = batch
        outputs = self(tabular_data, images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.log('train_loss', loss, on_step=True, on_epoch=True, sync_dist=True)
        return loss

    def validation_step(self, batch, batch_idx):
        tabular_data, images, labels = batch
        outputs = self(tabular_data, images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.log('val_loss', loss, on_step=True, on_epoch=True, sync_dist=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)

def train_model(config, num_epochs=10):
    # Load and preprocess the data
    tabular_data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
    labels = pd.cut(tabular_data.pop('median_house_value'), bins=5, labels=False)
    image_dataset_path = r"C:\Users\Abhishek P\Downloads\archive\seg_train"  # Update with your actual path

    # Setup dataset
    image_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    dataset = MultiModalDataset(tabular_data, image_dataset_path, labels, transform=image_transform)
    train_set, val_set = train_test_split(dataset, test_size=0.2, random_state=42)

    train_loader = DataLoader(train_set, batch_size=config["batch_size"], num_workers=4, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=config["batch_size"], num_workers=4)

   # Initialize model
    model = MultiModalModel(tabular_features=tabular_data.shape[1], learning_rate=config["learning_rate"])

    # Setup callbacks
    checkpoint_callback = ModelCheckpoint(monitor='val_loss')
    tune_report_callback = TuneReportCallback({"val_loss": "val_loss"}, on="validation_end")

    # Initialize trainer
    trainer = Trainer(
        max_epochs=num_epochs,
        accelerator="auto",
        devices="auto",
        strategy=DDPStrategy(find_unused_parameters=False),
        callbacks=[checkpoint_callback, tune_report_callback],
        logger=TensorBoardLogger(save_dir=tune.get_trial_dir(), name="", version="."),
    )

    # Train the model
    trainer.fit(model, train_loader, val_loader)

def tune_hyperparameters(num_samples=10, num_epochs=10):
    config = {
        "batch_size": tune.choice([16, 32, 64]),
        "learning_rate": tune.loguniform(1e-4, 1e-1),
    }

    scheduler = tune.schedulers.ASHAScheduler(
        max_t=num_epochs,
        grace_period=1,
        reduction_factor=2,
        metric="val_loss",  # Specify the metric to optimize
        mode="min"  # We want to minimize the validation loss
    )
    reporter = tune.CLIReporter(
        parameter_columns=["batch_size", "learning_rate"],
        metric_columns=["val_loss", "training_iteration"])

    result = tune.run(
        tune.with_parameters(train_model, num_epochs=num_epochs),
        resources_per_trial={"cpu": 1, "gpu": 0},  # Reduce CPU requirement and remove GPU requirement
        config=config,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        name="tune_multimodal_model"
    )

    best_trial = result.get_best_trial("val_loss", "min", "last")
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['val_loss']}")

    # Load the best model
    best_model = MultiModalModel.load_from_checkpoint(best_trial.checkpoint.value)
    
    # Compress the model
    compressed_model = torch.quantization.quantize_dynamic(
        best_model, {nn.Linear}, dtype=torch.qint8
    )
    
    # Save the compressed model
    torch.save(compressed_model.state_dict(), 'best_compressed_multimodal_model.pth')

if __name__ == "__main__":
    tune_hyperparameters()

2024-10-18 22:20:40,558	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-10-18 22:20:41 (running for 00:00:00.40)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Logical resource usage: 0/4 CPUs, 0/0 GPUs
Result logdir: C:/Users/ABHISH~1/AppData/Local/Temp/ray/session_2024-10-18_22-01-19_729957_27424/artifacts/2024-10-18_22-20-40/tune_multimodal_model/driver_artifacts
Number of trials: 10/10 (10 PENDING)


== Status ==
Current time: 2024-10-18 22:20:46 (running for 00:00:05.46)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Logical resource usage: 4.0/4 CPUs, 0/0 GPUs
Result logdir: C:/Users/ABHISH~1/AppData/Local/Temp/ray/session_2024-10-18_22-01-19_729957_27424/artifacts/2024-10-18_22-20-40/tune_multimodal_model/driver_artifacts
Number of trials: 10/10 (10 PENDING)


== Status ==
Current time: 2024-10-18 22:20:51 (running for 00:00:10.51)
Using AsyncHyperBand: num_stopped=0
Bracket: 

2024-10-18 22:21:12,936	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Abhishek P/ray_results/tune_multimodal_model' in 2.1611s.


== Status ==
Current time: 2024-10-18 22:21:12 (running for 00:00:32.33)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Logical resource usage: 4.0/4 CPUs, 0/0 GPUs
Result logdir: C:/Users/ABHISH~1/AppData/Local/Temp/ray/session_2024-10-18_22-01-19_729957_27424/artifacts/2024-10-18_22-20-40/tune_multimodal_model/driver_artifacts
Number of trials: 10/10 (10 PENDING)
+-------------------------+----------+-------+--------------+-----------------+
| Trial name              | status   | loc   |   batch_size |   learning_rate |
|-------------------------+----------+-------+--------------+-----------------|
| train_model_1b9cb_00000 | PENDING  |       |           16 |     0.0274583   |
| train_model_1b9cb_00001 | PENDING  |       |           64 |     0.0574695   |
| train_model_1b9cb_00002 | PENDING  |       |           64 |     0.000311619 |
| train_model_1b9cb_00003 | PENDING  |       |           16 |     0.000557732 |


2024-10-18 22:21:22,109	INFO tune.py:1041 -- Total run time: 41.55 seconds (30.17 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)
- train_model_1b9cb_00000: FileNotFoundError('Could not fetch metrics for train_model_1b9cb_00000: both result.json and progress.csv were not found at C:/Users/Abhishek P/ray_results/tune_multimodal_model/train_model_1b9cb_00000_0_batch_size=16,learning_rate=0.0275_2024-10-18_22-20-40')
- train_model_1b9cb_00001: FileNotFoundError('Could not fetch metrics for train_model_1b9cb_00001: both result.json and progress.csv were not found at C:/Users/Abhishek P/ray_results/tune_multimodal_model/train_model_1b9cb_00001_1_batch_size=64,learning_rate=0.0575_2024-10-18_22-20-40')
- train_model_1b9cb_00002: FileNotFoundError('Could not fetch metrics for train_model_1b9cb_00002: both result.json and progress.csv were not found at C:/Users/Abhishek P/ray_results/tune_multimodal_model/train_model_1b9cb_00002_2_batch_size=64,learning_rate=0.

AttributeError: 'NoneType' object has no attribute 'config'

In [15]:
import os
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler  # Corrected import
from pytorch_lightning import LightningModule, Trainer
from sklearn.model_selection import train_test_split
from PIL import Image
import pandas as pd
import numpy as np

# Load and preprocess the data
tabular_data = pd.read_csv("https://raw.githubusercontent.com/ageron/handson-ml2/master/datasets/housing/housing.csv")
labels = tabular_data.pop('median_house_value').values  # Use 'median_house_value' as the target
image_dataset_path = r"C:\Users\Abhishek P\Downloads\archive\seg_train"  # Update with your actual path

# Bin the labels into 5 classes
labels_binned = pd.cut(labels, bins=5, labels=False)

class MultiModalDataset(torch.utils.data.Dataset):
    def __init__(self, tabular_data, image_dir, labels, transform=None):
        self.tabular_data = tabular_data
        self.image_dir = image_dir
        self.labels = labels
        self.transform = transform
        self.image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

    def __len__(self):
        return min(len(self.labels), len(self.image_files))

    def __getitem__(self, idx):
        tabular_item = torch.tensor(self.tabular_data.iloc[idx].values, dtype=torch.float32)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        image_file = os.path.join(self.image_dir, self.image_files[idx % len(self.image_files)])
        image = Image.open(image_file).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return tabular_item, image, label

class MultiModalModel(LightningModule):
    def __init__(self, tabular_features):
        super().__init__()
        self.image_model = models.resnet18(pretrained=True)
        self.image_model.fc = nn.Linear(self.image_model.fc.in_features, 128)
        self.tabular_model = nn.Sequential(
            nn.Linear(tabular_features, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.classifier = nn.Linear(128 + 32, 5)  # 5 classes for binned house values

    def forward(self, tabular_data, images):
        image_features = self.image_model(images)
        tabular_features = self.tabular_model(tabular_data)
        combined_features = torch.cat((image_features, tabular_features), dim=1)
        return self.classifier(combined_features)

    def training_step(self, batch, batch_idx):
        tabular_data, images, labels = batch
        outputs = self(tabular_data, images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.log('train_loss', loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

def main():
    # Setup dataset
    image_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

    dataset = MultiModalDataset(tabular_data, image_dataset_path, labels_binned, transform=image_transform)
    train_loader = DataLoader(dataset, batch_size=32, num_workers=0)  # Set num_workers to 0 for debugging

    # Initialize model
    model = MultiModalModel(tabular_features=tabular_data.shape[1])

    # Initialize trainer
    trainer = Trainer(
        max_epochs=5,
        accelerator="auto",
        devices=1,  # Use a single device
    )

    # Train the model
    trainer.fit(model, train_loader)

if __name__ == "__main__":
    main()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type       | Params | Mode 
-----------------------------------------------------
0 | image_model   | ResNet     | 11.2 M | train
1 | tabular_model | Sequential | 2.7 K  | train
2 | classifier    | Linear     | 805    | train
-----------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.983    Total estimated model params size (MB)
73        Modules in train mode
0         Modules in eval mode
C:\Users\Abhishek P\AppData\Roaming\Python\Python312\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
C:\Users\Abhishek P\AppData\Roaming\Python\Python312\site-packages\