<a href="https://colab.research.google.com/github/mithun-martin/MACHINE-LEARNING/blob/main/ANN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
#➡️ These are from TensorFlow Keras, a deep learning library.
#Sequential means the ANN will have layers stacked one after another.
#Dense is a fully connected neural network layer — meaning every neuron in one layer connects to every neuron in the next.
import numpy as np


In [4]:
#load data
#split data
#normalizing data
#building ann model
#1.initialize model
#2.add 1st hidden layer
#3.add 2nd hidden layer
#4.add output layer
#5.compile ann

In [5]:
data = load_breast_cancer()

In [6]:
#data.data: the input features (like tumor radius, texture etc.)
#data.target: the labels (0 = malignant, 1 = benign)

In [7]:
X = data.data
y = data.target

In [8]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 0)

#test_size=0.2 means 20% data goes for testing, 80% for training.
#random_state=0 ensures the split is the same every time you run it (for reproducibility).

In [10]:
sc = StandardScaler()

In [11]:
#✅ Rule of Thumb:
#fit_transform() on training data
#transform() on test and future data

In [12]:
X_train = sc.fit_transform(X_train)
#➡️ Fits the scaler to the training data (calculates mean & std dev) and transforms (scales) it.

In [13]:
X_test = sc.transform(X_test)
#➡️ Applies the same scaling (based on training data) to test data.

In [14]:
#initializing ann model
model = Sequential()

#➡️ Creates a blank sequential neural network model where you can now start adding layers.

In [16]:
#adding 1st hidden layer
model.add(Dense(units = 16,activation="relu", input_dim =X_train.shape[1]))

#➡️ Adds a layer with:
#16 neurons

#activation='relu' — introduces non-linearity (ReLU = Rectified Linear Unit).
#input_dim=X_train.shape[1] — input dimension is number of features (30 in this case).

#input_dim tells the model how many input features (columns) are coming into the model — in other words, how many input neurons are needed.


In [17]:
# 📌 Why do we need to specify `input_dim`?
# -----------------------------------------

# Because when you create your **first Dense (hidden) layer**, Keras needs to know:

# *   "How many inputs are coming in per data point?"

# *   So it can create the correct number of **weights (one per input × one per neuron)**

# 📌 What about other layers?
# ---------------------------

# 👉 You only need to specify `input_dim` for the **first hidden layer**.

# After that:

# *   Keras knows how many neurons were in the previous layer.

# *   So it automatically connects the next layer to those outputs.

# That’s why subsequent layers don’t need `input_dim`.


In [18]:
# 📌 What is `X_train.shape[1]`?
# ------------------------------

# In Python (NumPy, Pandas), when you call `.shape` on a 2D array (like a table of data):

#print(X_train.shape)  # returns a tuple (number of rows, number of columns)

# Example:

# (455, 30)

# 455 → number of samples (rows)
# 30 → number of features (columns)

# 📌 Then what is `X_train.shape[1]`?
# -----------------------------------

# 👉 `shape[1]` means take the second value of that shape tuple → which is number of columns (features)

# So if:

#X_train.shape = (455, 30)

# Then:

#X_train.shape[1] = 30

# And this is exactly what you need for `input_dim` — because:

# input_dim = number of input features per sample


In [19]:
#🔲 Add Second Hidden Layer
model.add(Dense(units = 8,activation = "relu"))

In [20]:
#add output  layer
model.add(Dense(units = 1,activation = "sigmoid"))

#➡️ Adds the final layer:
#1 neuron because we’re doing binary classification.
#activation='sigmoid' — squashes the output between 0 and 1 (like a probability).

In [22]:
#final compiling the model and preparing for training
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])



#➡️ Prepares the model for training:
#optimizer='adam' — efficient method for adjusting weights.
#loss='binary_crossentropy' — suitable loss function for binary classification.
#metrics=['accuracy'] — measures how often predictions match labels.



In [24]:
#📈 Train the ANN model

model.fit(X_train,y_train,batch_size = 32, epochs = 100)

# ➡️ Starts training the model:

# *   **batch_size=32** — model updates its weights after every 32 samples.

# *   **epochs=100** — number of complete passes over the entire training data.

# The model improves weights after each epoch to reduce the loss.


Epoch 1/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8177 - loss: 0.5575 
Epoch 2/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8787 - loss: 0.4820 
Epoch 3/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9470 - loss: 0.4120 
Epoch 4/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9305 - loss: 0.3367 
Epoch 5/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9541 - loss: 0.2472 
Epoch 6/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9541 - loss: 0.1866 
Epoch 7/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9642 - loss: 0.1304 
Epoch 8/100
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9749 - loss: 0.0994 
Epoch 9/100
[1m15/15[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7c7fa2d3ac10>

In [25]:
# 📌 What is an **epoch**?
# ----------------------------

# ➡️ One **epoch** means the model has seen **the entire training dataset once**.
# It is **one complete pass through all the training samples**.

# Example:
# If you have 1000 training samples and set epochs=10,
# it means the model will see the entire 1000 samples **10 times** (one time per epoch)
# while updating weights each time to improve predictions.

# More epochs → model gets more chances to learn patterns from the data.
# Too few epochs → model may underfit (not learn enough)
# Too many epochs → model may overfit (learn too much, including noise)




In [26]:
# 📌 What is **batch_size**?
# ------------------------------

# ➡️ When training a model, it doesn't update its weights after seeing the entire dataset at once.
# Instead, it breaks the dataset into smaller groups called **batches**.

# ➡️ **batch_size** defines how many samples the model should look at before updating its weights once.

# Example:
# If you have 1000 training samples and batch_size=32:
# - The model will take the first 32 samples, make predictions, calculate error, and update weights.
# - Then move to the next 32 samples, and so on.
# - It will complete this for all samples in one epoch.

# Smaller batch size → slower, but can give better learning stability.
# Larger batch size → faster, but may sometimes miss finer patterns in the data.


In [27]:
#so if sample size 1000..epoch 10..batch size 30..then 10 times entir daset will be seen by modle and every 30 sample it update weight so in one epoch close to 30 times it changes weifghts so for 10 epochs 300 arund 300+ times it update weight