In [10]:
import pandas as pd
from sklearn.impute import SimpleImputer

#loading the dataset
df = pd.read_csv(r'C:\Users\lenovo\Desktop\ML Model Deployment\dataset.csv', header=0)
df

Unnamed: 0,Id,Age,Age.Group,Gender,Locality,Marital.Status,Life.Style,Sleep,Category,Depression,...,oldpeak,slope,ca,thal,num,SK,SK.React,Reaction,Mortality,Follow.Up
0,1,45,41-50,Female,RURAL,MARRIED,NO,NO,FREE,YES,...,3.0,2,0,7,2,1,NO,0,0,60
1,2,51,51-60,Female,URBAN,MARRIED,NO,NO,FREE,YES,...,1.2,2,0,7,2,1,NO,0,0,15
2,3,55,51-60,Female,RURAL,MARRIED,YES,YES,FREE,YES,...,3.4,2,0,3,2,1,NO,0,0,6
3,4,55,51-60,Female,RURAL,MARRIED,YES,YES,FREE,YES,...,2.0,2,1,7,3,1,NO,0,0,52
4,5,56,51-60,Female,RURAL,MARRIED,YES,NO,FREE,YES,...,4.0,3,2,7,3,1,NO,0,0,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
363,364,55,51-60,Male,URBAN,MARRIED,NO,NO,FREE,YES,...,0.5,2,0,7,3,0,BODY.PAIN,1,0,60
364,365,55,51-60,Male,RURAL,MARRIED,YES,NO,FREE,YES,...,2.0,2,3,7,3,1,STOMACH.BLEEDING,1,0,36
365,366,58,51-60,Male,URBAN,MARRIED,NO,NO,FREE,YES,...,0.8,1,3,3,1,1,COUGH.BLEEDING,1,0,32
366,367,58,51-60,Male,URBAN,MARRIED,NO,NO,FREE,YES,...,2.6,2,0,7,4,1,COUGH.BLEEDING,1,0,32


In [16]:
#importing necessary libraries
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Basic data cleaning
# For numeric columns, fill missing values with the median
# For categorical columns, fill missing values with the most frequent 
for column in df.columns:
    if df[column].dtype == 'object':  # Categorical data
        df[column].fillna(df[column].mode()[0], inplace=True) #replcing
    else:  # Numeric data
        df[column].fillna(df[column].median(), inplace=True)

# Preprocessing
# Define the columns that need different preprocessing
categorical_cols = df.select_dtypes(include=['object']).columns  # All text columns
numeric_cols = df.select_dtypes(exclude=['object']).columns  # All numeric columns

# Preprocessing for numerical data: scaling
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())  # Scale data to have mean 0 and variance 1
])5

# Preprocessing for categorical data: one-hot encoding
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))  # Convert categorical data
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Apply the preprocessing to the dataframe
df_processed = preprocessor.fit_transform(df)
df_processed

array([[-1.06744235, -0.49319696,  1.0792154 , ...,  1.        ,
         0.        ,  0.        ],
       [-0.37828659, -0.49319696, -0.92659908, ...,  1.        ,
         0.        ,  0.        ],
       [ 0.08115059, -0.49319696,  1.0792154 , ...,  1.        ,
         0.        ,  0.        ],
       ...,
       [ 0.42572847, -0.49319696,  1.0792154 , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.42572847, -0.49319696, -0.92659908, ...,  0.        ,
         0.        ,  0.        ],
       [-0.49314588, -0.49319696, -0.92659908, ...,  0.        ,
         0.        ,  0.        ]])

In [1]:
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Assuming 'df_processed' is the preprocessed dataset from earlier
# Convert the processed data into PyTorch tensors
X = torch.tensor(df_processed, dtype=torch.float)
y = torch.tensor(df['Diagnosis'].values, dtype=torch.long)  # Assuming 'Diagnosis' is the target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X.numpy(), y.numpy(), test_size=0.2, random_state=42)
X_train = torch.tensor(X_train, dtype=torch.float)
X_test = torch.tensor(X_test, dtype=torch.float)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and testing
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=10, shuffle=False)

# Define the model combining GNN and RNN
class DrugCombinationModel(nn.Module):
    def __init__(self, num_features, hidden_dim, rnn_hidden, num_classes):
        super(DrugCombinationModel, self).__init__()
        self.gcn = GCNConv(num_features, hidden_dim)
        self.rnn = nn.LSTM(hidden_dim, rnn_hidden, batch_first=True)
        self.fc = nn.Linear(rnn_hidden, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index  # Assuming data contains graph data
        x = self.gcn(x, edge_index)              # Graph Convolution
        x, _ = self.rnn(x.unsqueeze(0))          # Add batch dimension and pass through RNN
        x = self.fc(x.squeeze(0))                # Remove batch dimension and classify
        return x

# Hyperparameters and setup
num_features = X_train.shape[1]  # Number of features should match the input size
hidden_dim = 64
rnn_hidden = 128
num_classes = len(torch.unique(y))

model = DrugCombinationModel(num_features, hidden_dim, rnn_hidden, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Training loop
def train_model(model, train_loader):
    model.train()
    for epoch in range(10):  # number of epochs
        for data, labels in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            print(f'Epoch {epoch}, Loss {loss.item()}')

# Example usage
train_model(model, train_loader)

#REMINDERS!
#pip install torch torchvision torchaudio -f https://download.pytorch.org/whl/cu111/torch_stable.html
#pip install torch-geometric 
#pip install pandas
#pip install matplotlib
#pip install scikit-learn
#pip install torch-geometric
#pip install -U scikit-learn
#pip install torch
#pip install networkx

ModuleNotFoundError: No module named 'torch'