In [2]:
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import warnings

# Suppress ConvergenceWarning from LogisticRegression
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# 1. Download the Wine Dataset
# The link provided describes the dataset, but the sklearn library provides a direct way to load it.
wine = load_wine()

# Convert to a pandas DataFrame for easier viewing and manipulation
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = pd.Series(wine.target) # Target variable (classes of wine)

print("Wine Dataset Features (X.head()):")
print(X.head())
print("\nWine Dataset Target (y.head()):")
print(y.head())
print(f"\nNumber of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")
print(f"Number of classes: {len(y.unique())}")


# 2. Split the data into training and testing sets
# We use an 80/20 split (80% for training, 20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# stratify=y ensures that the proportion of target classes is the same in both train and test sets

print(f"\nShape of X_train: {X_train.shape}")
print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_train: {y_train.shape}")
print(f"Shape of y_test: {y_test.shape}")

# 3. Apply a Logistic Regression Model
model = LogisticRegression(max_iter=1000) # Increased max_iter for convergence, especially on smaller datasets
model.fit(X_train, y_train)

print("\nLogistic Regression Model trained successfully.")

# 4. Make predictions on the test set
y_pred = model.predict(X_test)

# 5. Evaluate the model using the Accuracy Score
accuracy = accuracy_score(y_test, y_pred)

print(f"\nModel Accuracy Score: {accuracy:.4f}")

# Optional: Display a few predictions vs actuals
print("\nSome Predictions vs Actual values:")
comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(comparison_df.head(10))

Wine Dataset Features (X.head()):
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_wi