<a href="https://colab.research.google.com/github/rajantripathi/Machine-Learning-projects/blob/master/mammography_image_efficientNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import os

# Define the path to the directory containing the MIAS mammography images
image_dir = '/content/drive/MyDrive/mias'

# Create an empty list to store the file names and labels
data = []

# Loop over each image in the directory
for filename in os.listdir(image_dir):
    # Get the label for the current image
    if filename.startswith('mdb'):
        label = 'benign'
    elif filename.startswith('malignant'):
        label = 'malignant'
    else:
        label = 'unknown'
        
    # Append the file name and label to the list
    data.append({'filename': filename, 'label': label})
    
# Convert the list to a Pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('/content/drive/MyDrive/mias_metadata.csv', index=False)


FileNotFoundError: ignored

In [2]:
df0 = pd.read_csv('/content/drive/MyDrive/mias_metadata.csv')
df0.head()
df0.to_excel('/content/drive/MyDrive/BC_metadata/original.xlsx')

In [3]:
df1= pd.read_csv('/content/drive/MyDrive/BC_metadata/mias_metadata.csv')
df1.head()
df1.to_excel('/content/drive/MyDrive/BC_metadata/original_data1.xlsx')

In [4]:
df3 = df1.drop(['Images path'  ] , axis = 'columns')
df1.head()



Unnamed: 0.1,Unnamed: 0,Images path,Labels
0,0,/kaggle/input/mias-png/mdb001.png,1
1,1,/kaggle/input/mias-png/mdb002.png,1
2,2,/kaggle/input/mias-png/mdb003.png,0
3,3,/kaggle/input/mias-png/mdb004.png,0
4,4,/kaggle/input/mias-png/mdb005.png,1


In [5]:
df3['filename'] = df0.filename
df3.head()

Unnamed: 0.1,Unnamed: 0,Labels,filename
0,0,1,mdb123.png
1,1,1,mdb099.png
2,2,0,mdb001.png
3,3,0,mdb144.png
4,4,1,mdb211.png


In [6]:
frame = [df0 , df1]

df_new = pd.concat(frame)
df_new

Unnamed: 0.1,filename,label,Unnamed: 0,Images path,Labels
0,mdb123.png,benign,,,
1,mdb099.png,benign,,,
2,mdb001.png,benign,,,
3,mdb144.png,benign,,,
4,mdb211.png,benign,,,
...,...,...,...,...,...
317,,,317.0,/kaggle/input/mias-png/mdb318.png,0.0
318,,,318.0,/kaggle/input/mias-png/mdb319.png,0.0
319,,,319.0,/kaggle/input/mias-png/mdb320.png,0.0
320,,,320.0,/kaggle/input/mias-png/mdb321.png,0.0


In [7]:
#To perform breast cancer classification on the MIAS mammography dataset using a pre-trained network, you can follow these steps:

# 1. Import necessary libraries
# 2. Download and preprocess the MIAS dataset
# 3. Load the pre-trained network
# 4. Fine-tune the network and train the model
# 5. Evaluate the model

# I'll provide an example using the TensorFlow library in Python. You can run this code in a Google Colab notebook.

# **Step 1: Import necessary libraries**

# ```python
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
# ```

# **Step 2: Download and preprocess the MIAS dataset**

# First, download the MIAS dataset from the official website and upload it to your Google Colab environment. You can use the following code to upload files:

# ```python


In [8]:
# from google.colab import files
# uploaded = files.upload()
# ```

# Next, you can preprocess the dataset. The format of the dataset should be a CSV file containing the file names and labels. Split the dataset into training, validation, and testing sets:

# ```python
# Read the CSV file
data = pd.read_excel('/content/drive/MyDrive/breast_data.xlsx')
data.head()
data = data.drop('Unnamed: 0' , axis = 'columns')
data.head()

Unnamed: 0,filename,label
0,mdb123.png,1
1,mdb099.png,1
2,mdb001.png,0
3,mdb144.png,0
4,mdb211.png,1


In [9]:
# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)

train_data.shape , val_data.shape

((192, 2), (65, 2))

In [10]:
# Create ImageDataGenerators for training, validation, and testing
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)



In [11]:
type(train_datagen)

keras.preprocessing.image.ImageDataGenerator

In [12]:
train_generator = train_datagen.flow_from_dataframe(train_data, 
                                                    directory='/content/drive/MyDrive/mias_images', 
                                                    x_col='filename', 
                                                    y_col='label', 
                                                    target_size=(224, 224), 
                                                    class_mode='raw', 
                                                    batch_size=32)

val_generator = val_datagen.flow_from_dataframe(val_data, 
                                                directory='/content/drive/MyDrive/mias_images', 
                                                x_col='filename', 
                                                y_col='label', 
                                                target_size=(224, 224), 
                                                class_mode='raw', 
                                                batch_size=32)

test_generator = test_datagen.flow_from_dataframe(test_data, 
                                                  directory='/content/drive/MyDrive/mias_images', 
                                                  x_col='filename', 
                                                  y_col='label', 
                                                  target_size=(224, 224), 
                                                  class_mode='raw', 
                                                  batch_size=32)


Found 192 validated image filenames.
Found 65 validated image filenames.
Found 65 validated image filenames.


In [13]:
# ```

# **Step 3: Load the pre-trained network**

# Here, we use the EfficientNetB0 pre-trained model as the base model:

# ```python
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# ```



Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


In [14]:
# **Step 4: Fine-tune the network and train the model**

# Add custom layers for classification and create the final model:

# ```python
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)



In [15]:
# Freeze the base model
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(lr=1e-3), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, 
                    epochs=10, 
                    validation_data=val_generator)




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [17]:


# **Step 5: Evaluate the model**

# Evaluate the model on the test dataset:

# ```python
loss, accuracy = model.evaluate(test_generator)
print('Test loss:', loss)
print('Test accuracy:', accuracy)
# ```

# This code will perform breast cancer classification on the MIAS mammography dataset using the pre-trained EfficientNetB0 model. You can modify the code to use other pre-trained networks and adjust hyperparameters as needed.

Test loss: 0.6594410538673401
Test accuracy: 0.6307692527770996
