In [None]:
!pip install --upgrade pip jupyter ipywidgets
!pip install scikit-learn torch torchvision transformers numpy matplotlib

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from sklearn.datasets import make_blobs
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# Binary Classification

In [None]:
# 1. Generate 2D Synthetic Defect Prediction Data for Visualization
def generate_data(n_samples=1000, n_features=2, random_state=42):
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=2,
        n_redundant=0,
        n_clusters_per_class=1,
        random_state=random_state
    )
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    return X.astype(np.float32), y.astype(np.int64)


X, y = generate_data()

# Split data into train and test sets
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]


# Create a PyTorch Dataset
class DefectDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X)
        self.y = torch.tensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]


train_dataset = DefectDataset(X_train, y_train)
test_dataset = DefectDataset(X_test, y_test)

# Loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# 2. Define the Neural Network for Binary Classification
class DefectPredictor(nn.Module):
    def __init__(self, input_size):
        super(DefectPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.relu = nn.ReLU()
        self.output = nn.Linear(10, 2)  # Binary classification

    def forward(self, x):
        x = self.relu(self.fc1(x))
        return self.output(x)


model = DefectPredictor(input_size=X.shape[1])

# 3. Training the Model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 20
train_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    train_losses.append(epoch_loss / len(train_loader))
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}")

# 4. Evaluate the Model
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        preds = torch.argmax(outputs, axis=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
print(f"Test Accuracy: {accuracy:.4f}")


# 5. Plot the Decision Boundary
def plot_decision_boundary(model, X, y):
    # Create a mesh grid of points
    h = 0.02  # Step size for mesh
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Convert grid to tensor and predict
    grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
    model.eval()
    with torch.no_grad():
        Z = model(grid)
        Z = torch.argmax(Z, axis=1).reshape(xx.shape)

    # Plot the decision boundary
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.8, cmap=plt.cm.coolwarm)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.coolwarm)
    plt.title("Decision Boundary of the Neural Network")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.show()


# Plot the learned line (decision boundary)
plot_decision_boundary(model, X_test, y_test)

# Image Recognition

In [None]:
# 1. Load and Preprocess the CIFAR-10 Dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize images
])

batch_size = 32

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


# Function to display images
def imshow(img):
    img = img / 2 + 0.5  # Denormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# Show a few training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

imshow(torchvision.utils.make_grid(images[:4]))
print(' '.join(f'{classes[labels[j]]}' for j in range(4)))


# 2. Define the CNN Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x


model = SimpleCNN().to(device)  # Move model to GPU

# 3. Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Train the Model
num_epochs = 5
train_losses = []

for epoch in range(num_epochs):
    running_loss = 0.0
    model.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if (i + 1) % 100 == 0:
            print(
                f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(trainloader)}], Loss: {running_loss / 100:.4f}')
            running_loss = 0.0

    train_losses.append(loss.item())

print('Finished Training')

# 5. Evaluate the Model
correct = 0
total = 0

model.eval()
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move data to GPU
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on the 10000 test images: {100 * correct / total:.2f}%')

# 6. Visualize Training Loss
plt.figure(figsize=(8, 5))
plt.plot(range(1, num_epochs + 1), train_losses, marker='o')
plt.title("Training Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid()
plt.show()

# 7. Visualize Predictions
dataiter = iter(testloader)
images, labels = next(dataiter)

images, labels = images.to(device), labels.to(device)  # Move to GPU
outputs = model(images)
_, predicted = torch.max(outputs, 1)

images = images.cpu()  # Move images back to CPU for visualization
imshow(torchvision.utils.make_grid(images[:4]))
print('Predicted: ', ' '.join(f'{classes[predicted[j]]}' for j in range(4)))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]}' for j in range(4)))


# Named Entity Recognition

In [None]:
# 1. Load Pretrained Model and Tokenizer
# Use the 'bert-base-cased' model fine-tuned on CoNLL-2003 for NER
model_name = "dbmdz/bert-large-cased-finetuned-conll03-english"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Define NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# 2. Sample Text for NER
text = """
Apple was founded by Steve Jobs and Steve Wozniak in Cupertino, California.
They launched the Apple I in 1976, and today Apple is one of the most valuable companies in the world.
Microsoft, led by Bill Gates, is also a major player in the tech industry.
"""

# Perform NER
print("Performing Named Entity Recognition on the text...")
ner_results = ner_pipeline(text)

# 3. Display Results
print("\nNamed Entities Found:")
for entity in ner_results:
    print(f"{entity['word']} --> {entity['entity_group']} (score: {entity['score']:.2f})")


# 4. Visualize Named Entities
# Highlight entities in the text
def highlight_entities(text, entities):
    current_idx = 0
    for entity in entities:
        start, end = entity["start"], entity["end"]
        print(text[current_idx:start] + f"\033[92m{entity['word']}\033[0m", end="")
        current_idx = end
    print(text[current_idx:])


print("\nText with highlighted entities (visual in terminal):")
highlight_entities(text, ner_results)

# Optional: Create a simple bar chart for entity types
entity_labels = [entity['entity_group'] for entity in ner_results]
unique_labels = list(set(entity_labels))
label_counts = [entity_labels.count(label) for label in unique_labels]

plt.figure(figsize=(8, 5))
plt.bar(unique_labels, label_counts)
plt.title("Entity Type Frequency")
plt.xlabel("Entity Type")
plt.ylabel("Frequency")
plt.show()


# Defect Prediction

In [None]:
# 1. Generate Synthetic Data (Normal and Anomalies)
def generate_data(n_normal=500, n_anomalies=50):
    # Normal data: Clustered around center
    X_normal, _ = make_blobs(n_samples=n_normal, centers=[(0, 0)], cluster_std=0.5, random_state=42)

    # Anomalies: Far from center
    X_anomalies, _ = make_blobs(n_samples=n_anomalies, centers=[(5, 5)], cluster_std=0.5, random_state=42)

    # Combine datasets
    X = np.vstack([X_normal, X_anomalies]).astype(np.float32)
    y = np.hstack([np.zeros(n_normal), np.ones(n_anomalies)])  # 0 = normal, 1 = anomaly

    # Scale data
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y


# Generate Data
X, y = generate_data()

# Visualize the Data
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolors='k', alpha=0.7)
plt.title("Synthetic Data: Normal vs Anomalous Points")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()


# 2. Create PyTorch Dataset and DataLoader
class AnomalyDataset(Dataset):
    def __init__(self, X):
        self.X = torch.tensor(X)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx]


# Split Data into Normal Training Data and Full Test Data
X_train = X[y == 0]  # Only normal data for training
X_test = X  # Test data includes both normal and anomalies
y_test = y

train_dataset = AnomalyDataset(X_train)
test_dataset = AnomalyDataset(X_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# 3. Define the Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


# Instantiate the Model
input_dim = X.shape[1]
model = Autoencoder(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 4. Train the Autoencoder
num_epochs = 50
train_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        recon = model(data)
        loss = criterion(recon, data)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    train_losses.append(epoch_loss / len(train_loader))
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.6f}")

# Plot Training Loss
plt.figure(figsize=(8, 5))
plt.plot(range(1, num_epochs + 1), train_losses, marker='o')
plt.title("Training Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.grid()
plt.show()

# 5. Detect Anomalies on Test Data
model.eval()
reconstruction_errors = []

with torch.no_grad():
    for data in test_loader:
        recon = model(data)
        loss = torch.mean((data - recon) ** 2, dim=1)
        reconstruction_errors.extend(loss.cpu().numpy())

# Set Threshold for Anomalies
threshold = np.percentile(reconstruction_errors, 95)  # Top 5% are anomalies
print(f"Anomaly Detection Threshold: {threshold:.4f}")

# Classify Data Points
predictions = [1 if error > threshold else 0 for error in reconstruction_errors]

# 6. Visualize the Results
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=predictions, cmap='coolwarm', edgecolors='k', alpha=0.7)
plt.title("Anomaly Detection Results")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()

# Print Results
from sklearn.metrics import classification_report

print("Classification Report:")
print(classification_report(y_test, predictions, target_names=["Normal", "Anomaly"]))
