In [1]:
import joblib

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

In [19]:
df=pd.read_csv('./melanoma_cancer_dataset/HAM10000_metadata.csv')
df

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear
...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face


In [20]:
df = df[df.dx != 'akiec']

In [21]:
df['dx']=[1 if i in ['mel','bcc'] else 0 for i in df['dx']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['dx']=[1 if i in ['mel','bcc'] else 0 for i in df['dx']]


In [22]:
loc_encoder = LabelEncoder()

df['localization'] = loc_encoder.fit_transform(df['localization'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['localization'] = loc_encoder.fit_transform(df['localization'])


In [23]:
encoded_loc=list(df['localization'])

In [24]:
loc_original=list(loc_encoder.inverse_transform(df['localization']))

In [25]:
loc_dict = {category: encoded for category, encoded in zip(loc_original, encoded_loc)}
loc_dict

{'scalp': 11,
 'ear': 4,
 'face': 5,
 'back': 2,
 'trunk': 12,
 'chest': 3,
 'upper extremity': 14,
 'abdomen': 0,
 'unknown': 13,
 'lower extremity': 9,
 'genital': 7,
 'neck': 10,
 'hand': 8,
 'foot': 6,
 'acral': 1}

In [26]:
sex_encoder = LabelEncoder()
df['sex'] = sex_encoder.fit_transform(df['sex'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sex'] = sex_encoder.fit_transform(df['sex'])


In [30]:
result_dict={0:'benign',1:'malign'}

In [27]:
sex_original=list(sex_encoder.inverse_transform(df['sex']))
sex_encode=list(df['sex'])
sex_dict={category: encoded for category, encoded in zip(sex_original, sex_encode)}
sex_dict

{'male': 1, 'female': 0, 'unknown': 2}

In [31]:
import json
sex_data = json.dumps(sex_dict)
loc_data = json.dumps(loc_dict)
result_data=json.dumps(result_dict)
# write the JSON string to a file
with open('sex.json', 'w') as f:
    f.write(sex_data)
with open('loc.json', 'w') as f:
    f.write(loc_data)
with open('result.json', 'w') as f:
    f.write(result_data)

In [28]:
df['sex']

0        1
1        1
2        1
3        1
4        1
        ..
9683     1
9684     1
9685     1
9686     0
10014    0
Name: sex, Length: 9688, dtype: int32

In [None]:
sex_dict = {category: encoded for category, encoded in zip(loc_original, encoded_loc)}
mapping_dict

In [10]:
df=df.dropna()

In [11]:
X=df[['age','sex','localization']]
y=df['dx']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
lr_model = LogisticRegression()
lr_model.fit(X_train, y_train)

# Predict on the test data
y_pred = lr_model.predict(X_test)

# Evaluate the model (you might need different evaluation metrics)
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.83


In [13]:
joblib.dump(lr_model, 'logistic.pkl')

['logistic.pkl']

In [48]:
y_test

7579    0
5652    0
5044    0
2778    1
2123    1
       ..
455     0
8511    0
1207    0
6802    0
8767    0
Name: dx, Length: 1927, dtype: int64

In [55]:
lr_model.predict_proba([[80.0,1,2]])



array([[0.52602028, 0.47397972]])

In [21]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
torch.seed(42)
# Define the CNN architecture
class SkinCancerCNN(nn.Module):
    def __init__(self, num_classes=2):
        super(SkinCancerCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(in_features=16*16*32, out_features=128)  # Update in_features
        self.fc2 = nn.Linear(in_features=128, out_features=num_classes)
        self.lr=joblib.load(filename='logistic.pkl')
    def forward(self, x, genetics=0,age='empty',sex='empty',location='empty'):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 16*16*32)  # Update the size here
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        x = nn.functional.softmax(x)
        if (age != 'empty' and sex!='empty' and location!='empty'):
            lr=lr_model.predict_proba([[age,sex,location]])
            if genetics == 1:
                lr[0][0]-=0.10
                lr[0][1]+=0.10
            if lr[0][1] > lr[0][0]:
                x[0][0]-=0.10
                x[0][1]+=0.10
        else:
            if genetics == 1:
                x[0][0]-=0.05
                x[0][1]+=0.05   
        return x

# Usage


# Data preprocessing and loading
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

train_dataset = torchvision.datasets.ImageFolder(root='./melanoma_cancer_dataset/train', transform=transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Initialize the model, loss function, and optimizer
c_model = SkinCancerCNN(num_classes=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(c_model.parameters(), lr=0.001)


In [22]:
num_epochs = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


for epoch in range(num_epochs):
    total_correct = 0
    total_samples = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Check the shapes of images and labels here
        #print(images.shape, labels.shape)

        optimizer.zero_grad()
        outputs = c_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

    accuracy = 100 * total_correct / total_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.2f}%')

print('Training finished.')

# Save the trained model
torch.save(c_model.state_dict(), 'skin_cancer_model.pth')

  x = nn.functional.softmax(x)


Epoch [1/10], Loss: 0.4841, Accuracy: 79.79%
Epoch [2/10], Loss: 0.3242, Accuracy: 87.29%
Epoch [3/10], Loss: 0.5913, Accuracy: 88.31%
Epoch [4/10], Loss: 0.5147, Accuracy: 89.12%
Epoch [5/10], Loss: 0.3350, Accuracy: 89.14%
Epoch [6/10], Loss: 0.7098, Accuracy: 89.43%
Epoch [7/10], Loss: 0.3133, Accuracy: 90.11%
Epoch [8/10], Loss: 0.3838, Accuracy: 90.11%
Epoch [9/10], Loss: 0.3546, Accuracy: 89.43%
Epoch [10/10], Loss: 0.5201, Accuracy: 90.57%
Training finished.


In [9]:
model.eval()

SkinCancerCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=8192, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)

In [43]:
from PIL import Image


img = Image.open('/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test/benign/melanoma_10021.jpg')
print(model(transform(img).to(device)))
torch.max(model(transform(img).to(device)), 1)[1]

tensor([[1.6131, 0.0035]], device='cuda:0', grad_fn=<AddmmBackward0>)


tensor([0], device='cuda:0')

In [51]:
img1 = Image.open('/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test/malignant/melanoma_10118.jpg')
print(model(transform(img1).to(device)))
torch.max(model(transform(img1).to(device)), 1)[1]

tensor([[-0.2117, -0.1857]], device='cuda:0', grad_fn=<AddmmBackward0>)


tensor([1], device='cuda:0')

In [50]:
nn.functional.softmax(model(transform(img1).to(device)))

  nn.functional.softmax(model(transform(img1).to(device)))


tensor([[0.4935, 0.5065]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [23]:
d_model = SkinCancerCNN(num_classes=2).to(device)

In [25]:
d_model.load_state_dict(torch.load('skin_cancer_model.pth'))

<All keys matched successfully>

In [29]:
from PIL import Image

In [36]:

img = Image.open('./melanoma_cancer_dataset/test/malignant/melanoma_10105.jpg')

print(d_model(transform(img).to(device),genetics=1,age=80,sex=1,location=11))
print(d_model(transform(img).to(device)))
print(d_model(transform(img).to(device),genetics=1))

tensor([[-0.0951,  1.0951]], grad_fn=<CopySlices>)
tensor([[0.0049, 0.9951]], grad_fn=<SoftmaxBackward0>)
tensor([[-0.0451,  1.0451]], grad_fn=<CopySlices>)


  x = nn.functional.softmax(x)
  x = nn.functional.softmax(x)
