In [5]:
import nbformat
from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell

nb = new_notebook()

# Add all cells
nb.cells = [

    new_code_cell("""\
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns"""),

    new_code_cell("""\
df = pd.read_csv("https://1drv.ms/x/c/0ea060ea7167efff/EVRfhpSJLWRCrGzAxNJTwpkBWJxLzHG-qTA_rsIlGBTmEg?e=ADrwSw")  # Replace with your local Kaggle file if needed
df.head()"""),

    new_code_cell("""\
df.drop(['id', 'Unnamed: 32'], axis=1, inplace=True)

def map_priority(row):
    if row['diagnosis'] == 'M':
        return 'high'
    elif row['radius_mean'] > 12:
        return 'medium'
    else:
        return 'low'

df['issue_priority'] = df.apply(map_priority, axis=1)

le = LabelEncoder()
df['priority_encoded'] = le.fit_transform(df['issue_priority'])

X = df.drop(['diagnosis', 'issue_priority', 'priority_encoded'], axis=1)
y = df['priority_encoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)"""),

    new_code_cell("""\
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)"""),

    new_code_cell("""\
print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

print("\\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()"""),

    new_markdown_cell("""\
### Ethical Reflection

This model may carry **bias** from the original dataset. For example, if certain patterns in the dataset dominate (like one group having more "high priority" cases), the model could learn to unfairly prioritize or ignore issues from smaller or underrepresented teams.

In a real company, this might result in some teams’ issues being systematically treated as less important.

**IBM AI Fairness 360** can help by checking for bias using fairness metrics like disparate impact and applying correction techniques like reweighting or adversarial debiasing. This ensures fair and transparent AI use.
"""),

    new_markdown_cell("""\
### Bonus Task: Innovation Proposal

**Tool Name**: CodeBuddy AI – Smart Documentation Assistant

**Purpose**: Automatically generate docstrings and markdown documentation for code to save time and improve code quality.

**Workflow**:
1. Developer writes code and pushes to GitHub.
2. CodeBuddy AI analyzes the code structure and logic.
3. It generates docstring suggestions using AI (e.g., GPT-based model).
4. Suggestions are posted as pull request comments.
5. Developer approves and merges documentation.

**Impact**:
- Improves code readability.
- Reduces manual effort in writing docs.
- Speeds up onboarding for new developers.
- Encourages consistent, up-to-date documentation.
""")
]

# Save notebook
with open("task3_predictive_analytics.ipynb", "w", encoding="utf-8") as f:
    nbformat.write(nb, f)
