<div style="display: flex; background-color: RGB(51,165,182);" >
<h1 style="margin: auto; padding: 30px; color:#fff; "> Project 12: Counterfeit bills prediction - Functional app for end-user
 </h1>
</div>

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import pickle
import joblib
from pathlib import Path

def load_data(file_path):
    """Load CSV data into a DataFrame"""
    try:
        df = pd.read_csv(file_path)
        print(f"✅ Data loaded successfully from {file_path}.")
        return df
    except FileNotFoundError:
        print(f"❌ File not found: {file_path}")
        return None

def scale_data(df):
    """Scale the data using the pre-loaded scaler (excluding the 'id' column)."""
    try:
        X = df.drop('id', axis=1).values
        scaler = joblib.load('scaler.pkl')
        X_scaled = scaler.transform(X)  # Use transform, NOT fit_transform
        print("✅ Data scaled successfully.")
        return X_scaled
    except Exception as e:
        print(f"❌ Error in scaling data: {e}")
        return None

def apply_pca(X_scaled, df, n_components=4):
    """Apply PCA using the pre-loaded model and return transformed features & eigenvectors."""
    try:
        pca = joblib.load('pca.pkl')
        X_proj = pca.transform(X_scaled)  # Use transform, NOT fit_transform

        # Retrieve PCA eigenvectors
        eigenvectors = pca.components_
        
        # Create a DataFrame for eigenvectors (Principal Component Loadings)
        features = df.columns.drop('id')
        pc_list = [f"PC{i+1}" for i in range(pca.n_components_)]
        pca_df = pd.DataFrame(eigenvectors, columns=features, index=pc_list)

        print("✅ PCA applied successfully.")
        return X_proj, pca_df
    except Exception as e:
        print(f"❌ Error in PCA transformation: {e}")
        return None, None

def predict_with_model(X_proj, model_path="logistic_model.pkl"):
    """Load the pre-trained model and predict outcomes."""
    try:
        model = pickle.load(open(model_path, "rb"))
        print(f"✅ Model loaded successfully from {model_path}!")
        return model.predict(X_proj)
    except Exception as e:
        print(f"❌ Error loading model from {model_path}: {e}")
        return None

def main(file_path, output_folder):
    """Main function to load data, apply PCA, and make predictions."""
    df = load_data(file_path)
    if df is None:
        return

    X_scaled = scale_data(df)
    if X_scaled is None:
        return

    X_proj, pca_df = apply_pca(X_scaled, df)
    if X_proj is None:
        return

    # Display principal components (eigenvectors)
    print("✅ PCA Components (Eigenvectors):\n", pca_df)
 
    # Predict using the loaded logistic regression model
    predictions = predict_with_model(X_proj)
    if predictions is None:
        return

    # Add predictions to original data
    df['Predicted'] = predictions
    print("✅ Predictions and probabilities added to the dataset.")
    
    # Ensure output folder exists
    output_folder_path = Path(output_folder)
    output_folder_path.mkdir(parents=True, exist_ok=True)

    # Save results to the specified output folder
    output_file_path = output_folder_path / "output_with_predictions.csv"
    df.to_csv(output_file_path, index=False)
    print(f"✅ Results saved to {output_file_path}.")
    
    return df

# Define paths
file_path = r"c:\Users\chimy\Documents\_myWorks\2024_Openclassrooms_data\P12\Data_in_P12\billets_test.csv"
output_folder = r"c:\Users\chimy\Documents\_myWorks\2024_Openclassrooms_data\P12\Data_out_P12"
df_result = main(file_path, output_folder)

✅ Data loaded successfully from c:\Users\chimy\Documents\_myWorks\2024_Openclassrooms_data\P12\Data_in_P12\billets_test.csv.
✅ Data scaled successfully.
✅ PCA applied successfully.
✅ PCA Components (Eigenvectors):
      diagonal  height_left  height_right  margin_low  margin_up    length
PC1 -0.084636     0.330588      0.393558    0.507658   0.439501 -0.527051
PC2  0.941317     0.307531      0.108268   -0.072309  -0.005095  0.048684
PC3 -0.288049     0.885110     -0.162796   -0.109807  -0.269869  0.149064
PC4 -0.101856    -0.049737      0.866823   -0.089715  -0.443476  0.176209
✅ Model loaded successfully from logistic_model.pkl!
✅ Predictions and probabilities added to the dataset.
✅ Results saved to c:\Users\chimy\Documents\_myWorks\2024_Openclassrooms_data\P12\Data_out_P12\output_with_predictions.csv.
