In [7]:
# task2
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
# ... other imports ...

def get_colab_driver():
    """Sets up a headless Chrome driver configured for Google Colab."""
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    driver = webdriver.Chrome(
        service=ChromeService(ChromeDriverManager().install()),
        options=chrome_options
    )
    return driver

class AutomatedLoginTest:
    def __init__(self):

        self.driver = get_colab_driver()
        self.driver.implicitly_wait(10)


# ... main() function to execute the tests ...

In [3]:
# --- Cell 1: Import Libraries and Load Data ---
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report

print("Libraries loaded successfully.")

# Load the Breast Cancer dataset (used as proxy for 'GitHub Issues' data)
data = load_breast_cancer(as_frame=True)
df = data.frame
X = df.drop(columns=['target']) # Features (e.g., code metrics, past issue history)
y = df['target']             # Original target (0=Benign, 1=Malignant)

print(f"Original dataset shape: {df.shape}")
print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")
print("First 5 rows of data:")
print(df.head())

Libraries loaded successfully.
Original dataset shape: (569, 31)
Features (X) shape: (569, 30)
Target (y) shape: (569,)
First 5 rows of data:
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280       

In [4]:
# --- Cell 2: Preprocessing, Feature Engineering (Custom Labeling) ---

# 1. Create a "High Priority" based on the original malignant target (1)
df['Priority'] = df['target'].map({0: 'Low', 1: 'High'})

# 2. Introduce a "Medium Priority" based on a feature split (e.g., "mean texture")
# We assume that issues with intermediate complexity (mean texture) are "Medium"
texture_median = df['mean texture'].median()

def assign_priority(row):
    # If it's already "High" (Malignant), keep it High
    if row['Priority'] == 'High':
        return 'High'
    # If it's "Low" (Benign) but has a high 'mean texture' (complex characteristics),
    # upgrade it to Medium Priority for resource allocation.
    elif row['mean texture'] >= texture_median:
        return 'Medium'
    else:
        return 'Low'

# Apply the new logic
df['Priority'] = df.apply(assign_priority, axis=1)

# Display the distribution of the new custom priority labels
priority_counts = df['Priority'].value_counts()
print("Distribution of new custom 'Priority' labels:")
print(priority_counts)

# Define the new features (X_new) and the target (y_new)
X_new = df.drop(columns=['target', 'Priority'])
y_new = df['Priority']

# Split the data into Training and Testing sets (70% Train, 30% Test)
X_train, X_test, y_train, y_test = train_test_split(
    X_new, y_new, test_size=0.3, random_state=42, stratify=y_new
)

print(f"\nTraining set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")

Distribution of new custom 'Priority' labels:
Priority
High      357
Medium    166
Low        46
Name: count, dtype: int64

Training set size: 398 samples
Testing set size: 171 samples


In [5]:
# --- Cell 3: Model Training (Random Forest) ---

# Initialize the Random Forest Classifier
# n_estimators is the number of trees in the forest. A higher number generally improves performance.
# random_state ensures reproducibility of the results.
model = RandomForestClassifier(n_estimators=100, random_state=42)

print("Starting Random Forest Model Training...")

# Train the model using the training data
model.fit(X_train, y_train)

print("Model training complete!")

Starting Random Forest Model Training...
Model training complete!


In [6]:
# --- Cell 4: Evaluation and Performance Metrics ---

# Make predictions on the test set
y_pred = model.predict(X_test)

# --- Calculate Required Metrics ---

# 1. Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy Score: {accuracy:.4f}")

# 2. F1-Score (using 'weighted' for multi-class problem)
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"F1-Score (Weighted): {f1:.4f}")

# --- Detailed Classification Report ---

# Provides precision, recall, and F1-score for each category (Low, Medium, High)
print("\n--- Detailed Classification Report ---")
print(classification_report(y_test, y_pred))

# Deliverable: Jupyter Notebook + performance metrics is complete.

Accuracy Score: 0.9649
F1-Score (Weighted): 0.9632

--- Detailed Classification Report ---
              precision    recall  f1-score   support

        High       0.96      1.00      0.98       107
         Low       1.00      0.71      0.83        14
      Medium       0.96      0.96      0.96        50

    accuracy                           0.96       171
   macro avg       0.97      0.89      0.92       171
weighted avg       0.97      0.96      0.96       171

