In [1]:
'''
Step 1: Import Libraries
Step 2: Load and Explore the Dataset
Step 3: Data Preprocessing
Step 4: Split Data into Training and Testing Sets
Step 5: Train a Model
Step 6: Evaluate the Model
Step 7: Make Predictions with Custom Input (Optional)
'''

'\nStep 1: Import Libraries\nStep 2: Load and Explore the Dataset\nStep 3: Data Preprocessing\nStep 4: Split Data into Training and Testing Sets\nStep 5: Train a Model\nStep 6: Evaluate the Model\nStep 7: Make Predictions with Custom Input (Optional)\n'

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
from sklearn.datasets import load_wine

# Load the Wine dataset
wine = load_wine()
X = wine.data  # Features
y = wine.target  # Target labels

# Convert to DataFrame for better visualization
data = pd.DataFrame(X, columns=wine.feature_names)
data['target'] = y

# Display the first few rows and basic info
print(data.head())
print(data['target'].value_counts())  # Check class distribution

   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wines  proline  target  
0          

In [4]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
X_scaled = scaler.fit_transform(X)


In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [6]:
# Initialize the RandomForestClassifier
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)


In [7]:
# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=wine.target_names))

# Print the confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

     class_0       1.00      1.00      1.00        14
     class_1       1.00      1.00      1.00        14
     class_2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

Confusion Matrix:
[[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]


In [8]:
# Sample custom input data (replace with actual values for testing)
custom_input = [13.0, 2.0, 2.4, 20.0, 100.0, 2.5, 2.8, 0.29, 1.5, 4.0, 1.0, 3.0, 800.0]

# Reshape and scale the custom input
custom_input = np.array(custom_input).reshape(1, -1)
custom_input_scaled = scaler.transform(custom_input)

# Predict the class of the custom input
prediction = model.predict(custom_input_scaled)
print(f"The model predicts this wine belongs to class: {wine.target_names[prediction[0]]}")


The model predicts this wine belongs to class: class_0
