In [7]:
import os

In [8]:
%pwd

'/Users/suyash/Desktop/projects/Intent-classification-'

In [6]:
os.chdir('../')

In [9]:
%pwd

'/Users/suyash/Desktop/projects/Intent-classification-'

In [10]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    

In [11]:
from src.ic.constants import *
from src.ic.utils.common import read_yaml,create_directories

In [12]:
class ConfigurationManager:
    def __init__(self,
                 config_path=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        self.config=read_yaml(config_path)
        self.paramss=read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_data_transformation_config(self)-> ModelTrainerConfig:
        config=self.config.model_trainer

        create_directories([config.root_dir])

        data_transformation_config=ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.train_path,
            model_ckpt=config.model_ckpt
            
        )

        return data_transformation_config



In [None]:
from transformers import TFAutoModelForSequenceClassification
from tensorflow.keras.optimizers import Adam

class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig, dataset):
        self.config = config
        self.dataset = dataset

    def train(self):
        # Load model
        model = TFAutoModelForSequenceClassification.from_pretrained(
            self.config.model_ckpt, num_labels=self.config.num_labels
        )

        # Compile model
        optimizer = Adam(learning_rate=self.config.learning_rate)
        loss =tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metrics = ["accuracy"]
        model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

        # Prepare dataset
        train_size = int(0.8 * len(list(self.dataset)))  # 80% for training
        train_dataset = self.dataset.take(train_size).batch(self.config.batch_size)
        val_dataset = self.dataset.skip(train_size).batch(self.config.batch_size)

        # Train model
        model.fit(train_dataset, validation_data=val_dataset, epochs=self.config.epochs)

        # Save model
        model.save_pretrained(self.config.model_save_path)
        print(f"Model saved at {self.config.model_save_path}")

        return model


In [13]:
import os
import yaml
import tensorflow as tf
from transformers import TFAutoModelForSequenceClassification
from tensorflow.keras.optimizers import Adam

class ModelTrainer:
    def __init__(self, config_path, train_dataset, val_dataset, num_labels):
        # Load configuration
        with open(config_path, "r") as file:
            self.config = yaml.safe_load(file)["model_trainer"]

        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.num_labels = num_labels  # Set dynamically
        self.model_ckpt = self.config["model_ckpt"]
        self.epochs = self.config["epochs"]
        self.batch_size = self.config["batch_size"]
        self.learning_rate = self.config["learning_rate"]
        self.model_save_path = self.config["model_save_path"]

        # Ensure model save directory exists
        os.makedirs(os.path.dirname(self.model_save_path), exist_ok=True)

    def train(self):
        """Initialize, compile, train, and save the model."""
        # Load DeBERTa model
        model = TFAutoModelForSequenceClassification.from_pretrained(
            self.model_ckpt, num_labels=self.num_labels
        )

        # Compile model
        optimizer = Adam(learning_rate=self.learning_rate)
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
        metrics = ["accuracy"]
        model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

        # Train model
        model.fit(self.train_dataset.batch(self.batch_size),
                  validation_data=self.val_dataset.batch(self.batch_size),
                  epochs=self.epochs)

        # Save trained model
        model.save_pretrained(self.model_save_path)
        print(f"✅ Model saved at {self.model_save_path}")

        return model


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
train_dataset='artifacts/data_transformation/transformed_data.csv'
val_dataset='artifacts/data_ingestions/transformed_data.csv'

In [17]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load transformed dataset
df = pd.read_csv("artifacts/data_transformation/transformed_data.csv")

# Split into train (80%) and validation (20%)
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Save the split datasets (optional)
train_df.to_csv("artifacts/data_transformation/train.csv", index=False)
val_df.to_csv("artifacts/data_transformation/val.csv", index=False)

print(f"✅ Data split complete: Train size = {len(train_df)}, Validation size = {len(val_df)}")


✅ Data split complete: Train size = 5232, Validation size = 1308


In [None]:

num_labels = 27 
trainer = ModelTrainer("config/config.yaml", train_dataset, val_dataset, num_labels)
trained_model = trainer.train()


In [22]:
train_dataset='artifacts/data_transformation/train.csv'
val_dataset='artifacts/data_transformation/val.csv'

In [24]:
%pwd

'/Users/suyash/Desktop/projects/Intent-classification-'