In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

# Load and preprocess Heart Disease dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']
data = pd.read_csv(url, header=None, names=columns, na_values="?")
data = data.dropna()  # Remove rows with missing values
data['target'] = (data['target'] > 0).astype(int)  # Convert to binary classification

X = data.drop(columns=['target']).values
y = data['target'].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create DataLoader for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)



In [4]:
# Define MLP model in PyTorch
class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)



In [5]:
# Initialize model, loss function, and optimizer
input_dim = X.shape[1]
model = MLPModel(input_dim)
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    correct = 0
    total = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        predictions = (outputs > 0.5).float()
        correct += (predictions == batch_y).sum().item()
        total += batch_y.size(0)

    epoch_accuracy = correct / total
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.4f}")


Epoch 1/10, Loss: 0.5136, Accuracy: 0.7595
Epoch 2/10, Loss: 0.3715, Accuracy: 0.8439
Epoch 3/10, Loss: 0.3316, Accuracy: 0.8734
Epoch 4/10, Loss: 0.3020, Accuracy: 0.8776
Epoch 5/10, Loss: 0.2919, Accuracy: 0.8608
Epoch 6/10, Loss: 0.2579, Accuracy: 0.8903
Epoch 7/10, Loss: 0.2323, Accuracy: 0.8945
Epoch 8/10, Loss: 0.2040, Accuracy: 0.9198
Epoch 9/10, Loss: 0.1776, Accuracy: 0.9156
Epoch 10/10, Loss: 0.1532, Accuracy: 0.9367


In [6]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor).item()
    test_predictions = (test_outputs > 0.5).float()
    test_accuracy = (test_predictions == y_test_tensor).float().mean().item()

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.5046, Test Accuracy: 0.9000


## Question 1

In [7]:
# Load and preprocess dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt"
columns = ['f1', 'f2', 'f3', 'f4', 'target']
data = pd.read_csv(url, header=None, names=columns, na_values="?")


In [8]:
data.head()

Unnamed: 0,f1,f2,f3,f4,target
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [9]:
data.isna().sum()

f1        0
f2        0
f3        0
f4        0
target    0
dtype: int64

In [10]:
data.dtypes

f1        float64
f2        float64
f3        float64
f4        float64
target      int64
dtype: object

In [11]:
x=data.drop(columns='target',axis=1).values
y=data['target'].values

In [12]:
scaler = StandardScaler()
X = scaler.fit_transform(x)

In [13]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [14]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create DataLoader for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)



In [15]:
# Define MLP model in PyTorch
class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)



In [16]:
# Initialize model, loss function, and optimizer
input_dim = X.shape[1]
model = MLPModel(input_dim)
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    correct = 0
    total = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        predictions = (outputs > 0.5).float()
        correct += (predictions == batch_y).sum().item()
        total += batch_y.size(0)

    epoch_accuracy = correct / total
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.4f}")


Epoch 1/10, Loss: 0.0771, Accuracy: 0.9717
Epoch 2/10, Loss: 0.0022, Accuracy: 1.0000
Epoch 3/10, Loss: 0.0004, Accuracy: 1.0000
Epoch 4/10, Loss: 0.0002, Accuracy: 1.0000
Epoch 5/10, Loss: 0.0001, Accuracy: 1.0000
Epoch 6/10, Loss: 0.0001, Accuracy: 1.0000
Epoch 7/10, Loss: 0.0001, Accuracy: 1.0000
Epoch 8/10, Loss: 0.0001, Accuracy: 1.0000
Epoch 9/10, Loss: 0.0000, Accuracy: 1.0000
Epoch 10/10, Loss: 0.0000, Accuracy: 1.0000


In [17]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor).item()
    test_predictions = (test_outputs > 0.5).float()
    test_accuracy = (test_predictions == y_test_tensor).float().mean().item()

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.0000, Test Accuracy: 1.0000


## Question 2

In [18]:

url="https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
df=pd.read_csv(url)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [19]:
df.shape

(891, 12)

In [20]:
df.drop(columns=['PassengerId','Name','Ticket','Cabin'],axis=1,inplace=True)

In [21]:
df.isna().sum()

Survived      0
Pclass        0
Sex           0
Age         177
SibSp         0
Parch         0
Fare          0
Embarked      2
dtype: int64

In [22]:
df['Age']=df['Age'].fillna(df['Age'].mean())

In [23]:
df['Embarked'].value_counts()

Embarked
S    644
C    168
Q     77
Name: count, dtype: int64

In [24]:
df['Embarked']=df['Embarked'].fillna('S')

In [25]:
df.isna().sum()

Survived    0
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Embarked    0
dtype: int64

In [26]:
df=pd.get_dummies(df,columns=['Sex','Embarked'])

In [27]:
df.head()


Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,0,3,22.0,1,0,7.25,False,True,False,False,True
1,1,1,38.0,1,0,71.2833,True,False,True,False,False
2,1,3,26.0,0,0,7.925,True,False,False,False,True
3,1,1,35.0,1,0,53.1,True,False,False,False,True
4,0,3,35.0,0,0,8.05,False,True,False,False,True


In [28]:
x=df.drop(columns=['Survived']).values
y=df['Survived'].values
scaler = StandardScaler()
X = scaler.fit_transform(x)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [29]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Create DataLoader for training
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)



In [30]:
# Define MLP model in PyTorch
class MLPModel(nn.Module):
    def __init__(self, input_dim):
        super(MLPModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)



In [31]:
# Initialize model, loss function, and optimizer
input_dim = X.shape[1]
model = MLPModel(input_dim)
criterion = nn.BCELoss()  
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train the model
epochs = 30
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    correct = 0
    total = 0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        predictions = (outputs > 0.5).float()
        correct += (predictions == batch_y).sum().item()
        total += batch_y.size(0)

    epoch_accuracy = correct / total
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.4f}")


Epoch 1/30, Loss: 0.4743, Accuracy: 0.7809
Epoch 2/30, Loss: 0.4493, Accuracy: 0.8301
Epoch 3/30, Loss: 0.4208, Accuracy: 0.8230
Epoch 4/30, Loss: 0.4116, Accuracy: 0.8371
Epoch 5/30, Loss: 0.4165, Accuracy: 0.8357
Epoch 6/30, Loss: 0.4093, Accuracy: 0.8413
Epoch 7/30, Loss: 0.3995, Accuracy: 0.8329
Epoch 8/30, Loss: 0.4018, Accuracy: 0.8357
Epoch 9/30, Loss: 0.3878, Accuracy: 0.8413
Epoch 10/30, Loss: 0.3997, Accuracy: 0.8329
Epoch 11/30, Loss: 0.3869, Accuracy: 0.8385
Epoch 12/30, Loss: 0.3809, Accuracy: 0.8455
Epoch 13/30, Loss: 0.3832, Accuracy: 0.8413
Epoch 14/30, Loss: 0.3779, Accuracy: 0.8455
Epoch 15/30, Loss: 0.3794, Accuracy: 0.8441
Epoch 16/30, Loss: 0.3833, Accuracy: 0.8497
Epoch 17/30, Loss: 0.3819, Accuracy: 0.8455
Epoch 18/30, Loss: 0.3599, Accuracy: 0.8483
Epoch 19/30, Loss: 0.3705, Accuracy: 0.8539
Epoch 20/30, Loss: 0.3647, Accuracy: 0.8525
Epoch 21/30, Loss: 0.3662, Accuracy: 0.8441
Epoch 22/30, Loss: 0.3635, Accuracy: 0.8469
Epoch 23/30, Loss: 0.3719, Accuracy: 0.84

In [32]:
# Evaluate the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor).item()
    test_predictions = (test_outputs > 0.5).float()
    test_accuracy = (test_predictions == y_test_tensor).float().mean().item()

print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


Test Loss: 0.5051, Test Accuracy: 0.8045
