<a href="https://colab.research.google.com/github/sonakshisharan/Machine_Learning/blob/main/connectionist_learning_models_Classifiers_built_using_Artificial_Neural_Networks_Backpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv('/content/drive/MyDrive/ml dataset/abalone.data.csv')


In [None]:
print(df.head())
print(df.describe())


  Sex  Length  Diameter  Height  Whole_weight  Shucked_weight  Viscera_weight  \
0   M   0.455     0.365   0.095        0.5140          0.2245          0.1010   
1   M   0.350     0.265   0.090        0.2255          0.0995          0.0485   
2   F   0.530     0.420   0.135        0.6770          0.2565          0.1415   
3   M   0.440     0.365   0.125        0.5160          0.2155          0.1140   
4   I   0.330     0.255   0.080        0.2050          0.0895          0.0395   

   Shell_weight  Rings  
0         0.150     15  
1         0.070      7  
2         0.210      9  
3         0.155     10  
4         0.055      7  
            Length     Diameter       Height  Whole_weight  Shucked_weight  \
count  4177.000000  4177.000000  4177.000000   4177.000000     4177.000000   
mean      0.523992     0.407881     0.139516      0.828742        0.359367   
std       0.120093     0.099240     0.041827      0.490389        0.221963   
min       0.075000     0.055000     0.000000      0

In [None]:
# Assuming 'Sex' is a categorical variable in the dataset
df = pd.get_dummies(df, columns=['Sex'], drop_first=True)

X = df.drop('Rings', axis=1)
y = df['Rings']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X_train_scaled, y_train)
y_pred = lin_reg.predict(X_test_scaled)

print('Linear Regression RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))
print('Linear Regression R^2:', r2_score(y_test, y_pred))

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)

print('Random Forest RMSE:', np.sqrt(mean_squared_error(y_test, y_pred_rf)))
print('Random Forest R^2:', r2_score(y_test, y_pred_rf))


Linear Regression RMSE: 2.2116130871218367
Linear Regression R^2: 0.5481628137889262
Random Forest RMSE: 2.2599865562542094
Random Forest R^2: 0.5281810502563149


In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# If you have specific environment variables to load
# api_key = os.getenv('API_KEY')


In [None]:
df = pd.read_csv('/content/drive/MyDrive/ml dataset/abalone.data.csv')


In [None]:
# Identify categorical columns and continuous columns
categorical_features = ['Sex']
continuous_features = [col for col in df.columns if col not in ['Sex', 'Rings']]

# Preprocessing for continuous features: Standardization
continuous_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

# Combined preprocessing for numerical and categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', continuous_transformer, continuous_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Splitting the dataset before applying transformations
X = df.drop('Rings', axis=1)
y = df['Rings']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply transformations
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# y needs to be converted to float for regression tasks with PyTorch
y_train = y_train.astype(float)
y_test = y_test.astype(float)


In [None]:
# Ensure your NumPy arrays are in float32 form for compatibility with PyTorch
X_train_tensor = torch.tensor(X_train.astype(np.float32))
X_test_tensor = torch.tensor(X_test.astype(np.float32))
y_train_tensor = torch.tensor(y_train.values.astype(np.float32))
y_test_tensor = torch.tensor(y_test.values.astype(np.float32))

# If your target variable needs to be reshaped (e.g., for regression tasks)
y_train_tensor = y_train_tensor.view(y_train_tensor.shape[0], 1)
y_test_tensor = y_test_tensor.view(y_test_tensor.shape[0], 1)


In [None]:
import torch.nn as nn
import torch.nn.functional as F

class AbaloneAgePredictor(nn.Module):
    def __init__(self):
        super(AbaloneAgePredictor, self).__init__()
        # Define the architecture here
        self.input_layer = nn.Linear(10, 64)  # Input layer with 10 nodes as per the assignment specification
        self.hidden_layer1 = nn.Linear(64, 32)  # First hidden layer with 64 nodes
        self.output_layer = nn.Linear(32, 1)  # Output layer with 1 node for regression

    def forward(self, x):
        # Forward pass through the network
        x = torch.relu(self.input_layer(x))  # Activation function for first layer
        x = torch.relu(self.hidden_layer1(x))  # Activation function for second layer
        x = self.output_layer(x)  # No activation for the output layer in regression tasks
        return x


In [None]:
model = AbaloneAgePredictor()


In [None]:
import torch.optim as optim


In [None]:
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)


In [None]:
from torch.utils.data import TensorDataset, DataLoader

# Create TensorDataset objects
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoader objects
batch_size = 64  # You can adjust the batch size
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# Assume you're using a DataLoader named 'train_loader' for your training data
num_epochs = 100  # Define the number of epochs

for epoch in range(num_epochs):
    for inputs, targets in train_loader:
        # Step 1: Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Step 2: Backward pass and optimize
        optimizer.zero_grad()  # Clear existing gradients
        loss.backward()  # Compute gradients
        optimizer.step()  # Update weights

    # Print loss every epoch or every few epochs
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/100], Loss: 11.1239
Epoch [20/100], Loss: 9.2500
Epoch [30/100], Loss: 5.0650
Epoch [40/100], Loss: 18.3985
Epoch [50/100], Loss: 4.4483
Epoch [60/100], Loss: 7.3621
Epoch [70/100], Loss: 8.1814
Epoch [80/100], Loss: 29.2319
Epoch [90/100], Loss: 14.1876
Epoch [100/100], Loss: 6.3535


In [None]:
def evaluate(model, X_test, y_test):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Inference without tracking gradients
        outputs = model(X_test)
        mse = nn.MSELoss()
        loss = mse(outputs, y_test)
    return loss.item()

# Assuming X_test_tensor and y_test_tensor are your test data and labels as tensors
test_loss = evaluate(model, X_test_tensor, y_test_tensor)
print(f'Test MSE Loss: {test_loss}')


Test MSE Loss: 10.833641052246094


In [None]:
optimizer = optim.Adagrad(model.parameters(), lr=0.01)


In [None]:
optimizer

Adagrad (
Parameter Group 0
    differentiable: False
    eps: 1e-10
    foreach: None
    initial_accumulator_value: 0
    lr: 0.01
    lr_decay: 0
    maximize: False
    weight_decay: 0
)

In [None]:
def train_and_evaluate(model, optimizer, criterion, train_loader, test_loader, num_epochs=100):
    for epoch in range(num_epochs):
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

        if (epoch+1) % 10 == 0:
            test_loss = evaluate(model, criterion, test_loader)
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss:.4f}')

def evaluate(model, criterion, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

    return total_loss / len(test_loader)

# Using SGD
model = AbaloneAgePredictor()
optimizer_sgd = optim.SGD(model.parameters(), lr=0.1)
train_and_evaluate(model, optimizer_sgd, criterion, train_loader, test_loader, num_epochs=100)

# Using Adagrad
model = AbaloneAgePredictor()
optimizer_adagrad = optim.Adagrad(model.parameters(), lr=0.01)
train_and_evaluate(model, optimizer_adagrad, criterion, train_loader, test_loader, num_epochs=100)


Epoch [10/100], Train Loss: nan, Test Loss: nan
Epoch [20/100], Train Loss: nan, Test Loss: nan
Epoch [30/100], Train Loss: nan, Test Loss: nan
Epoch [40/100], Train Loss: nan, Test Loss: nan
Epoch [50/100], Train Loss: nan, Test Loss: nan
Epoch [60/100], Train Loss: nan, Test Loss: nan
Epoch [70/100], Train Loss: nan, Test Loss: nan
Epoch [80/100], Train Loss: nan, Test Loss: nan
Epoch [90/100], Train Loss: nan, Test Loss: nan
Epoch [100/100], Train Loss: nan, Test Loss: nan
Epoch [10/100], Train Loss: 1.8452, Test Loss: 4.8317
Epoch [20/100], Train Loss: 3.7053, Test Loss: 4.6516
Epoch [30/100], Train Loss: 8.8154, Test Loss: 4.5895
Epoch [40/100], Train Loss: 2.7989, Test Loss: 4.5261
Epoch [50/100], Train Loss: 6.3701, Test Loss: 4.5020
Epoch [60/100], Train Loss: 2.7168, Test Loss: 4.4593
Epoch [70/100], Train Loss: 1.7931, Test Loss: 4.4379
Epoch [80/100], Train Loss: 3.0237, Test Loss: 4.4126
Epoch [90/100], Train Loss: 2.6437, Test Loss: 4.4098
Epoch [100/100], Train Loss: 3.83

In [None]:
def train_and_evaluate(model, optimizer, criterion, train_loader, test_loader, num_epochs=100):
    for epoch in range(num_epochs):
        model.train()
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # Check for NaN loss
            if torch.isnan(loss):
                print("NaN loss encountered during training. Exiting...")
                return

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if (epoch+1) % 10 == 0:
            test_loss = evaluate(model, criterion, test_loader)
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Test Loss: {test_loss:.4f}')

def evaluate(model, criterion, test_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

            # Check for NaN loss
            if torch.isnan(loss):
                print("NaN loss encountered during evaluation. Exiting...")
                return float('inf')  # Return infinity to indicate invalid loss

    return total_loss / len(test_loader)

# Using SGD
model = AbaloneAgePredictor()
optimizer_sgd = optim.SGD(model.parameters(), lr=0.1)
train_and_evaluate(model, optimizer_sgd, criterion, train_loader, test_loader, num_epochs=100)

# Using Adagrad
model = AbaloneAgePredictor()
optimizer_adagrad = optim.Adagrad(model.parameters(), lr=0.01)
train_and_evaluate(model, optimizer_adagrad, criterion, train_loader, test_loader, num_epochs=100)


NaN loss encountered during training. Exiting...
Epoch [10/100], Train Loss: 5.1802, Test Loss: 4.8475
Epoch [20/100], Train Loss: 1.6012, Test Loss: 4.6639
Epoch [30/100], Train Loss: 4.0529, Test Loss: 4.5903
Epoch [40/100], Train Loss: 2.7831, Test Loss: 4.5569
Epoch [50/100], Train Loss: 2.6741, Test Loss: 4.5240
Epoch [60/100], Train Loss: 4.4402, Test Loss: 4.5125
Epoch [70/100], Train Loss: 4.7123, Test Loss: 4.4962
Epoch [80/100], Train Loss: 2.5208, Test Loss: 4.4615
Epoch [90/100], Train Loss: 4.2704, Test Loss: 4.4424
Epoch [100/100], Train Loss: 1.7379, Test Loss: 4.4351
