**Question 5**: Write a Python program that loads a CSV file into a Pandas DataFrame,
splits into train/test sets, trains a Logistic Regression model, and prints its accuracy.
(Use Dataset from sklearn package)

In [2]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load dataset (Iris dataset)
data = load_iris()

# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Display first few rows
print("Sample Data:\n", df.head())

# Split data into features (X) and target (y)
X = df.drop('target', axis=1)
y = df['target']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Accuracy:", round(accuracy * 100, 2), "%")

Sample Data:
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  

Model Accuracy: 100.0 %


**Question 6**: Write a Python program to train a Logistic Regression model using L2
regularization (Ridge) and print the model coefficients and accuracy.
(Use Dataset from sklearn package)
(Include your Python code and output in the code box below.)

In [3]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load dataset (Iris dataset)
data = load_iris()

# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Features and target
X = df.drop('target', axis=1)
y = df['target']

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create Logistic Regression model with L2 regularization (default)
model = LogisticRegression(penalty='l2', solver='lbfgs', max_iter=200)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Print coefficients and accuracy
print("Model Coefficients:\n", model.coef_)
print("\nIntercepts:\n", model.intercept_)
print("\nModel Accuracy:", round(accuracy_score(y_test, y_pred) * 100, 2), "%")

Model Coefficients:
 [[-0.40538546  0.86892246 -2.2778749  -0.95680114]
 [ 0.46642685 -0.37487888 -0.18745257 -0.72127133]
 [-0.06104139 -0.49404358  2.46532746  1.67807247]]

Intercepts:
 [  8.86383271   2.20981479 -11.0736475 ]

Model Accuracy: 100.0 %


**Question 7**: Write a Python program to train a Logistic Regression model for multiclass
classification using multi_class='ovr' and print the classification report.
(Use Dataset from sklearn package)
(Include your Python code and output in the code box below.)


In [5]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report

# Load dataset (Iris dataset)
data = load_iris()

# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Features and target
X = df.drop('target', axis=1)
y = df['target']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create Logistic Regression model for multiclass classification (One-vs-Rest)
model = LogisticRegression(multi_class='ovr', solver='lbfgs', max_iter=200)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Print classification report
print("Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=data.target_names))

Classification Report:

              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      0.85      0.92        13
   virginica       0.87      1.00      0.93        13

    accuracy                           0.96        45
   macro avg       0.96      0.95      0.95        45
weighted avg       0.96      0.96      0.96        45





**Question 8**: Write a Python program to apply GridSearchCV to tune C and penalty
hyperparameters for Logistic Regression and print the best parameters and validation
accuracy.
(Use Dataset from sklearn package)
(Include your Python code and output in the code box below.)


In [6]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load dataset (Iris dataset)
data = load_iris()

# Convert to DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Features and target
X = df.drop('target', axis=1)
y = df['target']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define Logistic Regression model
model = LogisticRegression(max_iter=200, solver='liblinear')

# Define parameter grid for tuning
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2']
}

# Apply GridSearchCV
grid = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
grid.fit(X_train, y_train)

# Get best parameters and accuracy
best_params = grid.best_params_
best_score = grid.best_score_

# Test accuracy on test data
y_pred = grid.best_estimator_.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred)

print("Best Parameters Found:", best_params)
print("Best Cross-Validation Accuracy:", round(best_score * 100, 2), "%")
print("Validation Accuracy on Test Data:", round(test_accuracy * 100, 2), "%")

Best Parameters Found: {'C': 10, 'penalty': 'l2'}
Best Cross-Validation Accuracy: 95.24 %
Validation Accuracy on Test Data: 100.0 %


**Question 9**: Write a Python program to standardize the features before training Logistic
Regression and compare the model's accuracy with and without scaling.
(Use Dataset from sklearn package)
(Include your Python code and output in the code box below.)


In [7]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load Iris dataset
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ----- Logistic Regression without scaling -----
model_no_scaling = LogisticRegression(max_iter=200)
model_no_scaling.fit(X_train, y_train)
y_pred_no_scaling = model_no_scaling.predict(X_test)
accuracy_no_scaling = accuracy_score(y_test, y_pred_no_scaling)

# ----- Logistic Regression with feature scaling -----
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model_scaled = LogisticRegression(max_iter=200)
model_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(X_test_scaled)
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)

# Print results
print("Accuracy without scaling:", round(accuracy_no_scaling * 100, 2), "%")
print("Accuracy with scaling:", round(accuracy_scaled * 100, 2), "%")


Accuracy without scaling: 100.0 %
Accuracy with scaling: 100.0 %
