# impoort dependancies

In [None]:

import pandas as pd  # For data manipulation and analysis (e.g., reading CSV, creating DataFrames)
import numpy as np   # For numerical operations, especially with arrays
import warnings      # To manage and filter warning messages for a cleaner output
import os            # For interacting with the operating system (e.g., file paths)
import matplotlib.pyplot as plt # The standard library for creating static, animated, and interactive visualizations
import seaborn as sns           # Built on top of matplotlib, provides a high-level interface for attractive statistical graphics
import plotly.express as px     # For creating interactive plots and dashboards

from sklearn.impute import SimpleImputer              # For handling missing values
from sklearn.preprocessing import (
    StandardScaler,       # For scaling numerical features to have zero mean and unit variance
    MinMaxScaler,         # For scaling numerical features to a specific range (e.g., 0-1)
    OneHotEncoder,        # For converting categorical variables into a one-hot encoded format
    LabelEncoder,         # For encoding target labels with value between 0 and n_classes-1
    OrdinalEncoder        # For converting categorical features into ordinal integers
)
from sklearn.compose import ColumnTransformer         # To apply different transformations to different columns
from sklearn.pipeline import Pipeline                 # To chain multiple processing and modeling steps together
from sklearn.linear_model import LogisticRegression   # For a simple, interpretable baseline model
from sklearn.tree import DecisionTreeClassifier       # For a single decision tree baseline
from sklearn.ensemble import RandomForestClassifier   # Bagging: Random Forest
from xgboost import XGBClassifier                     # Boosting: XGBoost
from catboost import CatBoostClassifier               # Advanced Boosting: CatBoost

from sklearn.metrics import (
    accuracy_score,        # Basic accuracy metric (use with caution on imbalanced data)
    precision_score,       # Metric for campaign efficiency (TP / (TP + FP))
    recall_score,          # Metric for revenue protection (TP / (TP + FN))
    f1_score,              # Harmonic mean of precision and recall
    confusion_matrix,      # To visualize model performance (TP, FP, TN, FN)
    classification_report, # Text report showing the main classification metrics
    roc_auc_score,         # Area Under the Receiver Operating Characteristic Curve
    precision_recall_curve,# To compute precision-recall pairs for different probability thresholds
    auc                    # To compute the Area Under the Curve from points (used with precision_recall_curve)
)

from sklearn.model_selection import (
    train_test_split,      # To split data into training and testing sets
    StratifiedKFold,       # Cross-validation that maintains class distribution
    GridSearchCV,          # Exhaustive search over specified parameter values
    RandomizedSearchCV     # Randomized search over specified parameter values
)

# =============================================================================
# 7. Statistical Analysis
# =============================================================================
from scipy.stats import chi2_contingency, ttest_ind  # For statistical significance testing (Chi-square, t-tests)

# =============================================================================
# 8. (Bonus) Advanced Class Imbalance Handling
# =============================================================================
from imblearn.over_sampling import SMOTE            # Synthetic Minority Over-sampling Technique

# =============================================================================
# 9. Utilities for Pipeline Persistence
# =============================================================================
import joblib  # For saving and loading your trained models and pipelines

# =============================================================================
# Optional: Initial Notebook Configuration
# =============================================================================
# Set plotting style
sns.set_style('whitegrid')

# Filter out future warnings for cleaner output
warnings.filterwarnings('ignore', category=FutureWarning)

# Set pandas display options to show all columns
pd.set_option('display.max_columns', None)

print("All necessary libraries have been imported successfully.")

: 