In [1]:
from src.utils.main_utils import read_yaml_file
from src.constants import SCHEMA_FILE_PATH

[ 2025-08-11 12:11:30,047 ] root - INFO - ✅ This should appear in both console and log file.


In [2]:
data = read_yaml_file(SCHEMA_FILE_PATH)

In [3]:
data.columns

ConfigBox({'months_as_customer': 'int', 'age': 'int', 'policy_state': 'object', 'policy_csl': 'object', 'policy_deductable': 'int', 'policy_annual_premium': 'float', 'umbrella_limit': 'int', 'insured_sex': 'object', 'insured_education_level': 'object', 'insured_occupation': 'object', 'insured_hobbies': 'object', 'insured_relationship': 'object', 'capital-gains': 'int', 'capital-loss': 'int', 'incident_type': 'object', 'collision_type': 'object', 'incident_severity': 'object', 'authorities_contacted': 'object', 'incident_state': 'object', 'incident_hour_of_the_day': 'int', 'number_of_vehicles_involved': 'int', 'property_damage': 'object', 'bodily_injuries': 'int', 'witnesses': 'int', 'police_report_available': 'object', 'total_claim_amount': 'int', 'injury_claim': 'int', 'property_claim': 'int', 'vehicle_claim': 'int', 'auto_make': 'object', 'auto_year': 'int', 'fraud_reported': 'object'})

In [4]:
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Ridge Classifier": RidgeClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "LightGBM": LGBMClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0),
    "KNN": KNeighborsClassifier(),
    "SVC": SVC(),
    "Linear SVC": LinearSVC(),
    "Gaussian NB": GaussianNB(),
    "Multinomial NB": MultinomialNB(),
    "Bernoulli NB": BernoulliNB(),
    "MLP": MLPClassifier(max_iter=1000)
}


In [5]:
model_params = {
    "Logistic Regression": {
        "C": [0.01, 0.1, 1, 10],
        "penalty": ["l1", "l2", "elasticnet", None],
        "solver": ["lbfgs", "liblinear", "saga"],
        "max_iter": [1000]
    },
    "Ridge Classifier": {
        "alpha": [0.1, 1.0, 10.0]
    },
    "Decision Tree": {
        "max_depth": [None, 5, 10, 20],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
        "criterion": ["gini", "entropy"]
    },
    "Random Forest": {
        "n_estimators": [100, 200, 500],
        "max_depth": [None, 5, 10, 20],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 2],
        "bootstrap": [True, False]
    },
    "Extra Trees": {
        "n_estimators": [100, 200],
        "max_depth": [None, 5, 10],
        "min_samples_split": [2, 5],
        "min_samples_leaf": [1, 2]
    },
    "Gradient Boosting": {
        "n_estimators": [100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "max_depth": [3, 5, 10]
    },
    "XGBoost": {
        "n_estimators": [100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "max_depth": [3, 5, 7],
        "subsample": [0.8, 1.0],
        "colsample_bytree": [0.8, 1.0]
    },
    "LightGBM": {
        "n_estimators": [100, 200],
        "learning_rate": [0.01, 0.1, 0.2],
        "num_leaves": [31, 50, 100],
        "boosting_type": ["gbdt", "dart"]
    },
    "CatBoost": {
        "iterations": [100, 200],
        "depth": [4, 6, 10],
        "learning_rate": [0.01, 0.1, 0.2]
    },
    "KNN": {
        "n_neighbors": [3, 5, 7, 11],
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan", "minkowski"]
    },
    "SVC": {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf", "poly"],
        "gamma": ["scale", "auto"]
    },
    "Linear SVC": {
        "C": [0.1, 1, 10],
        "max_iter": [1000]
    },
    "Gaussian NB": {},  # no main hyperparameters
    "Multinomial NB": {
        "alpha": [0.1, 0.5, 1.0]
    },
    "Bernoulli NB": {
        "alpha": [0.1, 0.5, 1.0]
    },
    "MLP": {
        "hidden_layer_sizes": [(50,), (100,), (100, 50)],
        "activation": ["tanh", "relu"],
        "solver": ["adam", "sgd"],
        "alpha": [0.0001, 0.001],
        "learning_rate": ["constant", "adaptive"],
        "max_iter": [1000]
    }
}


In [6]:
from src.target_mapping import InsuranceModel
import pandas as pd
import numpy as np
import sys
import os
import pickle

In [7]:
model_path = "artifact/model_trainer/trained_model/model.pkl"

In [8]:
with open(model_path, "rb") as f:
    model: InsuranceModel = pickle.load(f)

print("✅ Model loaded successfully!")

✅ Model loaded successfully!


In [9]:
import pandas as pd

sample_input = {
    'months_as_customer': 328,
    'age': 48,
    'policy_number': 521585,
    'policy_bind_date': '10/17/2014',
    'policy_state': 'OH',
    'policy_csl': '250/500',
    'policy_deductable': 1000,
    'policy_annual_premium': 1406.91,
    'umbrella_limit': 0,
    'insured_zip': 466132,
    'insured_sex': 'MALE',
    'insured_education_level': 'MD',
    'insured_occupation': 'craft-repair',
    'insured_hobbies': 'sleeping',
    'insured_relationship': 'husband',
    'capital-gains': 53300,
    'capital-loss': 0,
    'incident_date': '1/25/2015',
    'incident_type': 'Single Vehicle Collision',
    'collision_type': 'Side Collision',
    'incident_severity': 'Major Damage',
    'authorities_contacted': 'Police',
    'incident_state': 'SC',
    'incident_city': 'Columbus',
    'incident_location': '9935 4th Drive',
    'incident_hour_of_the_day': 5,
    'number_of_vehicles_involved': 1,
    'property_damage': 'YES',
    'bodily_injuries': 1,
    'witnesses': 2,
    'police_report_available': 'YES',
    'total_claim_amount': 71610,
    'injury_claim': 6510,
    'property_claim': 13020,
    'vehicle_claim': 52080,
    'auto_make': 'Saab',
    'auto_model': '92x',
    'auto_year': 2004
}

# ✅ Wrap in list to convert single-row dict to DataFrame
input_df = pd.DataFrame([sample_input])

# # Show the DataFrame
input_df.head()

Unnamed: 0,months_as_customer,age,policy_number,policy_bind_date,policy_state,policy_csl,policy_deductable,policy_annual_premium,umbrella_limit,insured_zip,...,bodily_injuries,witnesses,police_report_available,total_claim_amount,injury_claim,property_claim,vehicle_claim,auto_make,auto_model,auto_year
0,328,48,521585,10/17/2014,OH,250/500,1000,1406.91,0,466132,...,1,2,YES,71610,6510,13020,52080,Saab,92x,2004


In [10]:
df = pd.read_csv("artifact/data_ingestion/ingested/train.csv")
data = df[df["fraud_reported"] == "N"]
dataa = data.sample(1)
dataa.drop(columns=["fraud_reported"], inplace=True)

In [11]:
prediction = model.predict(dataa)
print(f"✅ Prediction for sample data: {prediction}")
predicted_label = prediction[0]
print(f"✅ Predicted label: {predicted_label}")

[ 2025-08-11 12:11:32,499 ] root - INFO - Entered predict method of USvisaModel class
[ 2025-08-11 12:11:32,508 ] root - INFO - Transforming input features
[ 2025-08-11 12:11:32,515 ] root - INFO - Starting feature engineering transformation.
[ 2025-08-11 12:11:32,559 ] root - INFO - Feature engineering transformation completed.


[ 2025-08-11 12:11:32,632 ] root - INFO - Performing predictions


✅ Prediction for sample data: [0]
✅ Predicted label: 0


In [32]:
input_df['police_report_available']

0    YES
Name: police_report_available, dtype: object

In [19]:
input_df.columns

Index(['months_as_customer', 'age', 'policy_number', 'policy_bind_date',
       'policy_state', 'policy_csl', 'policy_deductable',
       'policy_annual_premium', 'umbrella_limit', 'insured_zip', 'insured_sex',
       'insured_education_level', 'insured_occupation', 'insured_hobbies',
       'insured_relationship', 'capital-gains', 'capital-loss',
       'incident_date', 'incident_type', 'collision_type', 'incident_severity',
       'authorities_contacted', 'incident_state', 'incident_city',
       'incident_location', 'incident_hour_of_the_day',
       'number_of_vehicles_involved', 'property_damage', 'bodily_injuries',
       'witnesses', 'police_report_available', 'total_claim_amount',
       'injury_claim', 'property_claim', 'vehicle_claim', 'auto_make',
       'auto_model', 'auto_year'],
      dtype='object')