# Simple classification

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
print("--- Iris Classification: Logistic Regression ---")


--- Iris Classification: Logistic Regression ---


In [2]:
# Step 1: Loading Iris dataset
print("\nStep 1: Loading Iris dataset...")
# The Iris dataset contains measurements (sepal length/width, petal length/width)
# for 3 species of Iris flowers (Setosa, Versicolor, Virginica).
# Goal: Predict the species based on measurements.
iris = load_iris()
X = iris.data  # Features (the measurements)
y = iris.target  # Target variable (the species labels: 0, 1, 2)
print(" Dataset loaded.")
print(f" Features (X) shape: {X.shape}, Target (y) shape: {y.shape}") # Shows (samples, features) and (samples,)
# print("X ==",X)
# print("y ==",y)


Step 1: Loading Iris dataset...
 Dataset loaded.
 Features (X) shape: (150, 4), Target (y) shape: (150,)


In [4]:
# Step 2: Splitting data into Training and Testing sets
print("\nStep 2: Splitting data into Training and Testing sets...")
# We split the data to train the model on one subset (training set)
# and evaluate its performance on a separate, unseen subset (testing set).
# This helps assess how well the model generalizes to new data.
# test_size=0.3 means 30% of data is for testing, 70% for training.
# random_state ensures the split is the same every time we run the code (reproducibility).
# stratify=y ensures the proportion of each class (species) is the same in both train and test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
print(" Data successfully split.")
print(f" Training set size: {X_train.shape[0]} samples")
print(f" Testing set size: {X_test.shape[0]} samples")

###################################################################################################################################





Step 2: Splitting data into Training and Testing sets...
 Data successfully split.
 Training set size: 105 samples
 Testing set size: 45 samples


In [14]:
# Step 3: Initializing and Training the Logistic Regression model
print("\nStep 3: Initializing and Training the Logistic Regression model...")
# Initialize the Logistic Regression classifier. max_iter is increased to help convergence.
model = LogisticRegression(max_iter=9999999999999999999999999999999)
# Train the model using the training data (X_train, y_train).
# The .fit() method learns the relationship between features and the target variable.
model.fit(X_train, y_train)
print(" Model training complete.")



Step 3: Initializing and Training the Logistic Regression model...
 Model training complete.


In [15]:

# Step 4: Making predictions on the test set
print("\nStep 4: Making predictions on the test set...")
# Use the trained model to predict the species for the unseen test data (X_test).
y_pred = model.predict(X_test)
print(" Predictions made.")



Step 4: Making predictions on the test set...
 Predictions made.


In [16]:
# Step 5: Evaluating the model
print("\nStep 5: Evaluating the model...")
# Compare the model's predictions (y_pred) with the actual species (y_test).
# Accuracy is the proportion of correct predictions.
accuracy = accuracy_score(y_test, y_pred)
print(f" Model Accuracy: {accuracy:.4f}") # Format to 4 decimal places



Step 5: Evaluating the model...
 Model Accuracy: 0.9333


In [20]:
# Optional: Display a few predictions alongside the actual values
print("\nSample Predictions vs Actual Labels:")
for i in range(min(5, len(y_test))): # Show up to 5 samples
    predicted_species = iris.target_names[y_pred[i]]
  
    actual_species = iris.target_names[y_test[i]]
    print(f" Sample {i+1}: Predicted='{predicted_species}', Actual='{actual_species}'")


Sample Predictions vs Actual Labels:
 Sample 1: Predicted='virginica', Actual='virginica'
 Sample 2: Predicted='versicolor', Actual='versicolor'
 Sample 3: Predicted='versicolor', Actual='virginica'
 Sample 4: Predicted='versicolor', Actual='versicolor'
 Sample 5: Predicted='virginica', Actual='virginica'


# Simple Regression

In [22]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
print("\n\n--- Diabetes Progression Prediction: Linear Regression ---")




--- Diabetes Progression Prediction: Linear Regression ---


In [24]:
# Step 1: Loading Diabetes dataset
print("\nStep 1: Loading Diabetes dataset...")
# The Diabetes dataset contains patient baseline physiological measurements
# (age, sex, bmi, blood pressure, etc.) and a quantitative measure of
# disease progression one year later.
# Goal: Predict the disease progression score based on the baseline measurements.
diabetes = load_diabetes()
X = diabetes.data  # Features (baseline measurements)
y = diabetes.target # Target variable (disease progression score)
print(" Dataset loaded.")
print(f" Features (X) shape: {X.shape}, Target (y) shape: {y.shape}")



Step 1: Loading Diabetes dataset...
 Dataset loaded.
 Features (X) shape: (442, 10), Target (y) shape: (442,)


In [25]:
# Step 2: Splitting data into Training and Testing sets
print("\nStep 2: Splitting data into Training and Testing sets...")
# Similar to classification, we split data for training and unbiased evaluation.
# For regression, stratify is typically not used unless target has specific bins.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print(" Data successfully split.")
print(f" Training set size: {X_train.shape[0]} samples")
print(f" Testing set size: {X_test.shape[0]} samples")



Step 2: Splitting data into Training and Testing sets...
 Data successfully split.
 Training set size: 309 samples
 Testing set size: 133 samples


In [26]:
# Step 3: Initializing and Training the Linear Regression model
print("\nStep 3: Initializing and Training the Linear Regression model...")
# Initialize the Linear Regression model.
model = LinearRegression()
# Train the model using the training data.
# For linear regression, .fit() finds the optimal coefficients (slope and intercept)
# for the linear equation that best fits the training data.
model.fit(X_train, y_train)
print(" Model training complete.")



Step 3: Initializing and Training the Linear Regression model...
 Model training complete.


In [27]:
# Step 4: Making predictions on the test set
print("\nStep 4: Making predictions on the test set...")
# Use the trained model to predict the disease progression score for the unseen test data.
y_pred = model.predict(X_test)
print(" Predictions made.")



Step 4: Making predictions on the test set...
 Predictions made.


In [28]:
# Step 5: Evaluating the model
print("\nStep 5: Evaluating the model...")
# Compare the model's predictions (y_pred) with the actual scores (y_test).
# Mean Squared Error (MSE): Average of the squared differences between prediction and actual. Lower is better. Sensitive to outliers.
mse = mean_squared_error(y_test, y_pred)
# R-squared (R2): Proportion of variance in the target variable explained by the model. Ranges from 0 to 1 (ideally). Higher is better.
r2 = r2_score(y_test, y_pred)

print(f" Mean Squared Error (MSE): {mse:.2f}") # Format to 2 decimal places
print(f" R-squared (R2): {r2:.4f}") # Format to 4 decimal places



Step 5: Evaluating the model...
 Mean Squared Error (MSE): 2821.75
 R-squared (R2): 0.4773


In [29]:
# Optional: Display a few predictions alongside the actual values
print("\nSample Predictions vs Actual Scores:")
for i in range(min(5, len(y_test))): # Show up to 5 samples
    print(f" Sample {i+1}: Predicted={y_pred[i]:.2f}, Actual={y_test[i]:.2f}")



Sample Predictions vs Actual Scores:
 Sample 1: Predicted=138.47, Actual=219.00
 Sample 2: Predicted=181.10, Actual=70.00
 Sample 3: Predicted=125.34, Actual=202.00
 Sample 4: Predicted=292.76, Actual=230.00
 Sample 5: Predicted=123.88, Actual=111.00
