In [44]:
import pandas as pd
import os
import sys

In [45]:
%pwd

'E:\\Resume_Screening\\resume_screening_ai'

In [46]:
os.chdir('E:/Resume_Screening/resume_screening_ai')

In [47]:
from dataclasses import dataclass
from pathlib import Path

In [48]:
@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir :Path
    model_dir : Path
    train_data_path : Path
    test_data_path : Path

In [49]:
from src.Resume_Screening.constants import *
from src.Resume_Screening import logger
from src.Resume_Screening.utils.common import create_directory,read_yaml

In [50]:
class Configuration:
    def __init__(self, 
                 config_filepath=CONFIG_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.schema = read_yaml(schema_filepath)
        self.params = read_yaml(params_filepath)
        
        # Create the root directory for artifacts
        create_directory([self.config.artifacts_root])
        print([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.models
        create_directory([config.root_dir])
        model_training_config = ModelTrainingConfig(
            root_dir = config.root_dir,
            model_dir= config.model_dir,
            train_data_path = config.train_data_path,
            test_data_path = config.test_data_path
        )
        
        return model_training_config
        
        

In [51]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib
class ModelTraining:
    def __init__(self, config : ModelTrainingConfig):
        self.config = config
    
    def train(self):
      
        train_data = pd.read_csv(self.config.train_data_path)
        
        X = train_data.drop('shortlisted', axis=1)  
        y = train_data['shortlisted'] 
        
        model = LinearRegression()
    
        model.fit(X, y)
        
        joblib.dump(model, self.config.model_dir)
        
        logger.info("Model training completed and saved successfully.")

In [60]:
try:
    config = Configuration()
    model_training_config = config.get_model_training_config()
    model_training = ModelTraining(config=model_training_config)
    data = model_training.train()
except Exception as e:
    raise e

[2025-08-10 00:21:20,744] - INFO : common  : yaml file : config\config.yaml loaded successfully
[2025-08-10 00:21:20,761] - INFO : common  : yaml file : schema.yaml loaded successfully
[2025-08-10 00:21:20,764] - INFO : common  : yaml file : params.yaml loaded successfully
[2025-08-10 00:21:20,766] - INFO : common  : directory created at artifacts
['artifacts']
[2025-08-10 00:21:20,768] - INFO : common  : directory created at artifacts/models
[2025-08-10 00:21:20,841] - INFO : 1842617632  : Model training completed and saved successfully.


In [None]:
def compute_resume_score(self, row, role_skill_map, max_exp_months=140, skill_weight=0.7, exp_weight=0.3):
        role_skills = set(role_skill_map.get(role, []))
        resume_skills = set(matched_skills)

        match_count = len(role_skills.intersection(resume_skills))
        total_role_skills = len(role_skills)
        skill_match_percent = (match_count / total_role_skills * 100) if total_role_skills > 0 else 0

        exp_months = row.get('experience_months', 0)
        exp_score = min(exp_months / max_exp_months * 100, 100)

        final_score = (skill_match_percent * skill_weight) + (exp_score * exp_weight)

        return pd.Series([
            match_count,
            round(skill_match_percent, 2),
            round(exp_score, 2),
            round(final_score, 2)
        ])