# ANN from Pytorch

In [None]:
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [None]:
# Step 1: Download and load the CSV into pandas DataFrame
df = pd.read_csv('ds_salaries.csv')

# Step 2: Quick view
print(df.shape)
print(df.columns)

(607, 12)
Index(['Unnamed: 0', 'work_year', 'experience_level', 'employment_type',
       'job_title', 'salary', 'salary_currency', 'salary_in_usd',
       'employee_residence', 'remote_ratio', 'company_location',
       'company_size'],
      dtype='object')


In [None]:
df = df[['experience_level', 'employment_type', 'remote_ratio', 'company_size', 'salary_in_usd']].copy()

In [None]:
df.head()

Unnamed: 0,experience_level,employment_type,remote_ratio,company_size,salary_in_usd
0,MI,FT,0,L,79833
1,SE,FT,0,S,260000
2,SE,FT,50,M,109024
3,MI,FT,0,S,20000
4,SE,FT,50,L,150000


In [None]:
# Label encode the categorical columns
label_encoders = {}
for col in ['experience_level', 'employment_type', 'company_size']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [None]:
df.head()

Unnamed: 0,experience_level,employment_type,remote_ratio,company_size,salary_in_usd
0,2,2,0,0,79833
1,3,2,0,2,260000
2,3,2,50,1,109024
3,2,2,0,2,20000
4,3,2,50,0,150000


In [None]:
# Normalize features and target
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

In [None]:
features = df[['experience_level', 'employment_type', 'remote_ratio', 'company_size']]
target = df[['salary_in_usd']]

In [None]:
features_scaled = scaler_X.fit_transform(features)
target_scaled = scaler_y.fit_transform(target)

In [None]:
# Convert to PyTorch tensors
X = torch.tensor(features_scaled, dtype=torch.float32)
y = torch.tensor(target_scaled, dtype=torch.float32)

dataset = TensorDataset(X, y)

print('Dataset length:', len(dataset))
print('Sample features shape:', dataset[0][0].shape)

Dataset length: 607
Sample features shape: torch.Size([4])


In [None]:
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

In [None]:
# Step 2: Define the model (input: 4 features → 2 hidden → 1 output)
model = nn.Sequential(
    nn.Linear(4, 2),
    nn.Linear(2, 1)
)

In [None]:
# Step 3: Define the loss function and optimizer
criterion = nn.MSELoss()                       # Mean Squared Error for regression
optimizer = optim.SGD(model.parameters(), lr=0.001)  # Stochastic Gradient Descent

In [None]:
# Step 4: Training loop
num_epochs = 10

for epoch in range(num_epochs):
    for data in dataloader:
        # Unpack the batch
        feature, target = data

        # Step 4.1: Zero the gradients from the previous step
        optimizer.zero_grad()

        # Step 4.2: Forward pass - compute predictions
        pred = model(feature)

        # Step 4.3: Compute the loss
        loss = criterion(pred, target.view(-1, 1))  # Ensure target shape matches pred

        # Step 4.4: Backward pass - compute gradients
        loss.backward()

        # Step 4.5: Update model parameters
        optimizer.step()

    # Optional: Print epoch loss
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")


Epoch 1/10, Loss: 0.0704
Epoch 2/10, Loss: 0.0208
Epoch 3/10, Loss: 0.0029
Epoch 4/10, Loss: 0.0103
Epoch 5/10, Loss: 0.0062
Epoch 6/10, Loss: 0.0046
Epoch 7/10, Loss: 0.0013
Epoch 8/10, Loss: 0.0048
Epoch 9/10, Loss: 0.0009
Epoch 10/10, Loss: 0.0124
