# Data Scientist – Demonstration Notebook

This notebook demonstrates advanced analytics, machine learning, NLP, federated learning, and data visualization, reflecting the responsibilities and requirements of a Data Scientist

In [1]:
# Deep Learning Example (PyTorch)
import torch
import torch.nn as nn
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load and preprocess the demo data
df = pd.read_csv('../data/demo_patients.csv')
categorical_cols = ['gender', 'ethnicity', 'smoking_status', 'diabetes_status', 'physical_activity', 'family_history']
for col in categorical_cols:
    df[col] = df[col].astype('category')

X = pd.get_dummies(df.drop(columns=['cardio_risk', 'patient_id']), drop_first=True)
y = df['cardio_risk']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to torch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.float32).unsqueeze(1)

class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        return self.fc(x)

# Instantiate and train the model
model = SimpleNN(input_dim=X_tensor.shape[1], output_dim=1)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop (few epochs for demo)
final_loss = None
for epoch in range(100):
    optimizer.zero_grad()
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 20 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')
    final_loss = loss.item()

# Predict and show results
with torch.no_grad():
    preds = torch.sigmoid(model(X_tensor))
final_loss, preds.squeeze().numpy()

Epoch 20, Loss: 0.3824
Epoch 40, Loss: 0.2053
Epoch 60, Loss: 0.1299
Epoch 80, Loss: 0.0920
Epoch 100, Loss: 0.0699


(0.06986521184444427,
 array([0.9385098 , 0.09660181, 0.95386505, 0.8822593 , 0.0203329 ,
        0.9292036 , 0.93861324, 0.06872148, 0.9522089 , 0.07222842],
       dtype=float32))