<a href="https://colab.research.google.com/github/madhura2024/cancer_prediction_using_ml/blob/main/cancer_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
# Step 1: Install dependencies
!pip install -q pandas scikit-learn tensorflow




# Step 2: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report






# deep learning tensor flow imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder




model = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/BreastCancer.csv")
# print(model.columns)
model[model.select_dtypes(include='object').columns] = model.select_dtypes(include='object').apply(pd.Series.str.strip)
model = model.drop(columns=['Id'])
model = model.dropna().drop_duplicates()





#  check names of columns ,
# if strip used axis = 1 has to be used
#  error=ignore if column name doesnt exist
# fit_tranform to just feed data to x train and only transform to find data for y train
# drop columns that are irrelevant , if doesn't exist error=ignore

x = model.drop('Class', axis=1)
y = model['Class']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=11)





# scale to set scale , remove outliers , differences
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)





# The model may still run, but the results can be inaccurate or unstable.
# It’s like stopping a test before finishing — the model didn’t "learn" enough
logreg = LogisticRegression(max_iter=10000, random_state=11)
logreg.fit(x_train_scaled, y_train)
y_pred = logreg.predict(x_test_scaled)




print("logical regression ", accuracy_score(y_test, y_pred))




forest = RandomForestClassifier(random_state=11)
forest.fit(x_train_scaled, y_train)
y_pred_forest = forest.predict(x_test_scaled)




print("Random Forest regression", accuracy_score(y_test, y_pred))




svm = SVC(kernel='rbf', probability=True, random_state=11)
svm.fit(x_train_scaled, y_train)
y_pred_svm = svm.predict(x_test_scaled)




print("svm regression ", accuracy_score(y_test, y_pred))





# encode all to be safe
# le = LabelEncoder()
# y_encode= le.fit_transform(y)     but unnecessary here since already integer value no string so all good .







# tensorflow: The main library we’re using to make the smart model.
# Sequential: Lets us build a model step by step, layer by layer.      (this is the order)
# Dense: A layer where every "neuron" connects to every input (a basic building block of the model).             (Make the first layer with 32 thinking units)
# Finally, give me 1 output — either 0 or 1."
# sigmoid squishes the result between 0 and 1, like a probability.




model = Sequential([
    Dense(32, activation='relu', input_shape=(x_train_scaled.shape[1],)),
                                                                                                  # (relu helps the model learn better — like saying "ignore negatives, keep positives.")
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])






# Use a smart way to improve yourself.” (adam is like a smart tutor)
# binary because it’s a yes/no problem)
# accuracy']: “While you're learning, show me how often you’re right.”








model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)








#  in logical regression max_iter determines model to read data 1000 ,, to learn properly , here we have epoch = 50 (learn 50 times)   batch size=32
model.fit(
    x_train_scaled, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=2
)



# accuracy
print("deep learning ", accuracy_score(y_test, y_pred))

logical regression  0.9629629629629629
Random Forest regression 0.9629629629629629
svm regression  0.9629629629629629
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


8/8 - 1s - 180ms/step - accuracy: 0.0996 - loss: 1.0294 - val_accuracy: 0.1587 - val_loss: 0.8711
Epoch 2/50
8/8 - 0s - 18ms/step - accuracy: 0.1315 - loss: 0.8586 - val_accuracy: 0.2857 - val_loss: 0.7587
Epoch 3/50
8/8 - 0s - 17ms/step - accuracy: 0.3227 - loss: 0.7287 - val_accuracy: 0.4603 - val_loss: 0.6725
Epoch 4/50
8/8 - 0s - 38ms/step - accuracy: 0.5578 - loss: 0.6333 - val_accuracy: 0.6667 - val_loss: 0.6039
Epoch 5/50
8/8 - 0s - 26ms/step - accuracy: 0.8526 - loss: 0.5575 - val_accuracy: 0.9365 - val_loss: 0.5430
Epoch 6/50
8/8 - 0s - 38ms/step - accuracy: 0.9243 - loss: 0.4917 - val_accuracy: 0.9524 - val_loss: 0.4851
Epoch 7/50
8/8 - 0s - 36ms/step - accuracy: 0.9482 - loss: 0.4293 - val_accuracy: 0.9524 - val_loss: 0.4279
Epoch 8/50
8/8 - 0s - 24ms/step - accuracy: 0.9562 - loss: 0.3699 - val_accuracy: 0.9524 - val_loss: 0.3738
Epoch 9/50
8/8 - 0s - 28ms/step - accuracy: 0.9522 - loss: 0.3171 - val_accuracy: 0.9524 - val_loss: 0.3272
Epoch 10/50
8/8 - 0s - 13ms/step - acc

take input

In [28]:
def predict_user_input():

    raw = input("Paste all 9 numbers (separated by space or comma):\n")


    values = [float(x) for x in raw.replace(",", " ").split()]


    if len(values) != 9:
     print("You must enter exactly 9 values.")
     return


    values_scaled = scaler.transform([values])


    pred1 = logreg.predict(values_scaled)[0]
    pred2 = forest.predict(values_scaled)[0]
    pred3 = svm.predict(values_scaled)[0]

    import numpy as np
    values_tf = np.array([values_scaled[0]])
    pred4 = 1 if model.predict(values_tf)[0][0] > 0.5 else 0

    print("\n--- Predictions ---")
    if pred1 == 1:
        print("Logistic Regression: Malignant")
    else:
        print("Logistic Regression: Benign")

    if pred2 == 1:
        print("Random Forest: Malignant")
    else:
        print("Random Forest: Benign")

    if pred3 == 1:
        print("SVM: Malignant")
    else:
        print("SVM: Benign")

    if pred4 == 1:
        print("TensorFlow DL Model: Malignant")
    else:
        print("TensorFlow DL Model: Benign")

predict_user_input()

Paste all 9 numbers (separated by space or comma):
1,2,3,4,5,6,7,8,9
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step





--- Predictions ---
Logistic Regression: Malignant
Random Forest: Malignant
SVM: Malignant
TensorFlow DL Model: Malignant
