In [1]:
from sklearn.datasets import make_classification

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix, classification_report


# Generate a classification dataset

X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)


# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Initialize and train a classifier (Random Forest in this case)

clf = RandomForestClassifier()

clf.fit(X_train, y_train)


# Predict on the test data

y_pred = clf.predict(X_test)


# Confusion Matrix

cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")

print(cm)


# Classification Report (includes precision, recall, F1-score)

report = classification_report(y_test, y_pred)

print("\nClassification Report:")

print(report)



Confusion Matrix:
[[129  16]
 [ 25 130]]

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.89      0.86       145
           1       0.89      0.84      0.86       155

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300



In [2]:
#Assignment 4.4.1

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


# Load the Iris dataset

iris = load_iris()

X, y = iris.data, iris.target


# Split the data into training and testing sets (70% train, 30% test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Initialize the Decision Tree Classifier

clf = DecisionTreeClassifier(random_state=42)


# Train the classifier

clf.fit(X_train, y_train)


# Predict the test set results

y_pred = clf.predict(X_test)


# Evaluate the performance


# 1. Accuracy

accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)


# 2. Confusion Matrix

cm = confusion_matrix(y_test, y_pred)

print("\nConfusion Matrix:")

print(cm)


# 3. Classification Report (Precision, Recall, F1-score)

report = classification_report(y_test, y_pred, target_names=iris.target_names)

print("\nClassification Report:")

print(report)



Accuracy: 1.0

Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      1.00      1.00        13
   virginica       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [4]:
#Assignment 4.4.2

from sklearn.datasets import fetch_openml

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# Load the Boston Housing dataset using fetch_openml

boston = fetch_openml(name='boston', version=1, as_frame=True)

X, y = boston.data, boston.target


# Split the data into training and testing sets (70% train, 30% test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Initialize the Random Forest Regressor

rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)


# Train the regressor

rf_regressor.fit(X_train, y_train)


# Predict the test set results

y_pred = rf_regressor.predict(X_test)


# Evaluate the performance


# 1. Mean Squared Error

mse = mean_squared_error(y_test, y_pred)

print("Mean Squared Error:", mse)


# 2. Mean Absolute Error

mae = mean_absolute_error(y_test, y_pred)

print("Mean Absolute Error:", mae)


# 3. R-squared Score

r2 = r2_score(y_test, y_pred)

print("R-squared:", r2)

Mean Squared Error: 9.619662013157892
Mean Absolute Error: 2.083605263157894
R-squared: 0.8708997131330258


In [5]:
#Assignment 4.2.1

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


# Step 1: Load the Iris dataset

iris = load_iris()

X = iris.data  # Features

y = iris.target  # Labels


# Step 2: Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Step 3: Create and train the logistic regression model

model = LogisticRegression(max_iter=200)  # Increased max_iter to ensure convergence

model.fit(X_train, y_train)


# Step 4: Make predictions on the test data

y_pred = model.predict(X_test)


# Step 5: Evaluate the model's performance

print("Confusion Matrix:")

print(confusion_matrix(y_test, y_pred))


print("\nClassification Report:")

print(classification_report(y_test, y_pred))


print(f"Accuracy Score: {accuracy_score(y_test, y_pred):.2f}")

Confusion Matrix:
[[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Accuracy Score: 1.00


In [6]:
#Assignment 4.2.2

from sklearn.datasets import fetch_california_housing

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error, r2_score


# Step 1: Load the california housing dataset

# (Note: `load_california` has been deprecated. I'll use an alternative approach.)

import pandas as pd

from sklearn.datasets import fetch_openml


# Load dataset from openml

california= fetch_california_housing(as_frame=True)

X = california.data  # Features

y = california.target  # Labels (house prices)


# Step 2: Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Step 3: Create and train the linear regression model

model = LinearRegression()

model.fit(X_train, y_train)


# Step 4: Make predictions on the test data

y_pred = model.predict(X_test)


# Step 5: Evaluate the model's performance

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)


print(f"Mean Squared Error (MSE): {mse:.2f}")

print(f"R-squared (R2): {r2:.2f}")


# Print the coefficients and intercept

print("\nModel Coefficients:")

print(model.coef_)

print(f"Intercept: {model.intercept_:.2f}")

Mean Squared Error (MSE): 0.53
R-squared (R2): 0.60

Model Coefficients:
[ 4.45822565e-01  9.68186799e-03 -1.22095112e-01  7.78599557e-01
 -7.75740400e-07 -3.37002667e-03 -4.18536747e-01 -4.33687976e-01]
Intercept: -37.06
