<a href="https://colab.research.google.com/github/rajgit-123/MyProject/blob/master/Regression_with_ANN_PM2_5_kaggleSubmitted.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [None]:
# Step 1: Data Preprocessing
# Load data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [None]:
test_data.rename(columns={'Unnamed: 0':'ID'}, inplace=True)
print(test_data)

In [None]:
# Handle missing or anomalous values
train_data = train_data.dropna()

In [None]:
# Normalize numerical features
scaler = StandardScaler()
numerical_cols = ['SO2', 'NO2', 'CO', 'O3', 'Temp', 'Press', 'DewP', 'Rain', 'WinSpeed']  # Assuming these are numerical
train_data[numerical_cols] = scaler.fit_transform(train_data[numerical_cols])
test_data[numerical_cols] = scaler.transform(test_data[numerical_cols])  # Use the same scaler for test data


In [None]:
# Encode categorical features if present (e.g., 'Station' and 'WinDir')
train_data = pd.get_dummies(train_data, columns=['Station', 'WinDir'])
test_data = pd.get_dummies(test_data, columns=['Station', 'WinDir'])

# Split dataset into train, validation, and test sets
X_train, X_val, y_train, y_val = train_test_split(train_data.drop(columns=['PM2.5']), train_data['PM2.5'], test_size=0.2, random_state=42)
X_test = test_data  # No labels for test data

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
X_val = torch.tensor(X_val.values, dtype=torch.float32)
y_val = torch.tensor(y_val.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)

In [None]:
# Step 2: Model Architecture
class ANN(nn.Module):
    def __init__(self, input_dim):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
# Step 3: Training the Model
def train(model, criterion, optimizer, X_train, y_train, X_val, y_val, epochs=100, batch_size=64):
    for epoch in range(epochs):
        model.train()
        for i in range(0, len(X_train), batch_size):
            optimizer.zero_grad()
            outputs = model(X_train[i:i+batch_size])
            loss = criterion(outputs.squeeze(), y_train[i:i+batch_size])
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs.squeeze(), y_val)
            print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}, Val Loss: {val_loss.item()}')

In [None]:
# Initialize model
input_dim = X_train.shape[1]
model = ANN(input_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train(model, criterion, optimizer, X_train, y_train, X_val, y_val)

Epoch 1/100, Loss: 9894.9287109375, Val Loss: 7791.39501953125
Epoch 2/100, Loss: 8451.46484375, Val Loss: 6926.37646484375
Epoch 3/100, Loss: 6125.14013671875, Val Loss: 5736.28662109375
Epoch 4/100, Loss: 8476.4033203125, Val Loss: 6108.65966796875
Epoch 5/100, Loss: 3937.06103515625, Val Loss: 4807.16845703125
Epoch 6/100, Loss: 4819.40673828125, Val Loss: 3287.059326171875
Epoch 7/100, Loss: 2141.216796875, Val Loss: 3918.288818359375
Epoch 8/100, Loss: 2085.848876953125, Val Loss: 2494.140380859375
Epoch 9/100, Loss: 2214.25390625, Val Loss: 2509.012451171875
Epoch 10/100, Loss: 2103.810302734375, Val Loss: 2521.751708984375
Epoch 11/100, Loss: 1861.76904296875, Val Loss: 2607.4833984375
Epoch 12/100, Loss: 1664.718017578125, Val Loss: 2891.29052734375
Epoch 13/100, Loss: 1804.4295654296875, Val Loss: 2733.58740234375
Epoch 14/100, Loss: 1742.0772705078125, Val Loss: 2272.525634765625
Epoch 15/100, Loss: 1483.044677734375, Val Loss: 2312.58203125
Epoch 16/100, Loss: 1457.208740234

In [None]:
# Step 4: Model Evaluation
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    # Since we don't have ground truth for the test set, we cannot calculate RMSE or other metrics

In [None]:
# Step 5: Generate predictions for submission
test_ids = range(len(X_test))

submission_df = pd.DataFrame({'ID': test_ids,'PM2.5': test_outputs.squeeze().numpy()})
submission_df.to_csv('submission.csv', index=False)