In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

try:
    # --- Step 1: Load Data and Frame the Problem ---
    # Load the built-in wine dataset.
    # We will predict the 'target' (wine class 0, 1, or 2) based on its chemical features.
    wine = load_wine()
    X, y = wine.data, wine.target
    feature_names = wine.feature_names
    
    print("--- Step 1: Data Loaded Successfully ---")
    print(f"Dataset has {X.shape[0]} samples and {X.shape[1]} features.")
    print("Target classes (customer segments):", wine.target_names)
    print("\n")

    # --- Preprocessing and Splitting ---
    # Split data before scaling
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # --- Step 2: Train the Random Forest Model ---
    # We'll use a RandomForestClassifier as we're predicting a class.
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    print("--- Step 2: Model Training Complete ---\n")

    # --- Evaluation (to ensure the model is working) ---
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print("--- Model Evaluation ---")
    print(f"Model Accuracy on Test Set: {accuracy:.4f}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=wine.target_names))

    # --- Step 3: Make a "Recommendation" for a New Product ---
    # Imagine a new wine with the following chemical properties:
    new_wine = np.array([[
        13.5, 2.5, 2.8, 18.0, 100, 2.5, 2.6, 0.3, 1.5, 6.0, 1.05, 3.0, 1000
    ]])
    
    # Scale the new wine's features using the same scaler
    new_wine_scaled = scaler.transform(new_wine)
    
    # Predict the class and the probabilities for the new wine
    predicted_class = model.predict(new_wine_scaled)
    predicted_proba = model.predict_proba(new_wine_scaled)
    
    print("--- Step 3: Generating Recommendation ---")
    print("\nAnalyzing a new wine with the following features:")
    print(pd.DataFrame(new_wine, columns=feature_names))
    
    recommended_class_name = wine.target_names[predicted_class[0]]
    confidence = predicted_proba[0][predicted_class[0]] * 100
    
    print(f"\n✅ Recommendation: This wine is best suited for the '{recommended_class_name}' category.")
    print(f"   Confidence: {confidence:.2f}%")
    print("\nFull Probability Distribution:")
    for i, class_name in enumerate(wine.target_names):
        print(f"   - P(class '{class_name}') = {predicted_proba[0][i]:.4f}")
    
    print("\n--- Analysis Complete ---")

except Exception as e:
    print(f"An error occurred: {e}")

--- Step 1: Data Loaded Successfully ---
Dataset has 178 samples and 13 features.
Target classes (customer segments): ['class_0' 'class_1' 'class_2']


--- Step 2: Model Training Complete ---

--- Model Evaluation ---
Model Accuracy on Test Set: 1.0000

Classification Report:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        15
     class_1       1.00      1.00      1.00        18
     class_2       1.00      1.00      1.00        12

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

--- Step 3: Generating Recommendation ---

Analyzing a new wine with the following features:
   alcohol  malic_acid  ash  alcalinity_of_ash  magnesium  total_phenols  \
0     13.5         2.5  2.8               18.0      100.0            2.5   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0         2.6    