1. Virtual environment

In [1]:
pip install numpy pandas scikit-learn streamlit


Collecting streamlit
  Downloading streamlit-1.37.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl.metadata (37 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading streamlit-1.37.0-py2.py3-none-any.whl (8.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K   [90m━━━━━━━━━━

2. # Upload the dataset

In [2]:
from sklearn.datasets import load_breast_cancer
import pandas as pd

# Load the dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Save to CSV
df.to_csv('breast_cancer_data.csv', index=False)


3. Data preparation

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def load_and_preprocess_data():
    # Load the dataset
    df = pd.read_csv('breast_cancer_data.csv')

    # Separate features and target
    X = df.drop(columns=['target'])
    y = df['target']

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Standardize the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test

if __name__ == "__main__":
    X_train, X_test, y_train, y_test = load_and_preprocess_data()


4. Feature selection

In [4]:
from sklearn.feature_selection import SelectKBest, f_classif

def select_features(X_train, y_train, X_test, k=10):
    # Apply SelectKBest with f_classif as the scoring function
    selector = SelectKBest(score_func=f_classif, k=k)
    X_train_selected = selector.fit_transform(X_train, y_train)
    X_test_selected = selector.transform(X_test)

    return X_train_selected, X_test_selected

if __name__ == "__main__":
    X_train, X_test, y_train, y_test = load_and_preprocess_data()
    X_train_selected, X_test_selected = select_features(X_train, y_train, X_test)


5. Model Tuning

In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

def grid_search_cv(X_train, y_train):
    # Define the model
    model = MLPClassifier(max_iter=1000)

    # Define the parameter grid
    param_grid = {
        'hidden_layer_sizes': [(50, 50), (100,)],
        'activation': ['tanh', 'relu'],
        'solver': ['sgd', 'adam'],
        'alpha': [0.0001, 0.05],
        'learning_rate': ['constant','adaptive'],
    }

    # Set up GridSearchCV
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

    # Fit the model
    grid_search.fit(X_train, y_train)

    # Get the best parameters and score
    best_params = grid_search.best_params_
    best_score = grid_search.best_score_

    return best_params, best_score

if __name__ == "__main__":
    X_train, X_test, y_train, y_test = load_and_preprocess_data()
    X_train_selected, X_test_selected = select_features(X_train, y_train, X_test)
    best_params, best_score = grid_search_cv(X_train_selected, y_train)
    print(f"Best Parameters: {best_params}")
    print(f"Best Score: {best_score}")




Best Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'solver': 'adam'}
Best Score: 0.9604395604395604


 Implementing an Artificial Neural Network (ANN) Model
1. ANN Model Creation

In [6]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

def create_and_evaluate_ann(X_train, X_test, y_train, y_test, best_params):
    # Create the model with best parameters from Grid Search
    model = MLPClassifier(**best_params, max_iter=1000)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    return accuracy, report

if __name__ == "__main__":
    X_train, X_test, y_train, y_test = load_and_preprocess_data()
    X_train_selected, X_test_selected = select_features(X_train, y_train, X_test)
    best_params, best_score = grid_search_cv(X_train_selected, y_train)
    accuracy, report = create_and_evaluate_ann(X_train_selected, X_test_selected, y_train, y_test, best_params)
    print(f"Accuracy: {accuracy}")
    print(f"Classification Report:\n{report}")




Accuracy: 0.9736842105263158
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.98      0.97        43
           1       0.99      0.97      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



 6:- Building a Streamlit App Locally
1. Streamlit Code

In [7]:
import streamlit as st
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.neural_network import MLPClassifier

# Load and preprocess the data
@st.cache
def load_data():
    data = load_breast_cancer()
    df = pd.DataFrame(data.data, columns=data.feature_names)
    df['target'] = data.target
    return df

# Main Streamlit app
def main():
    st.title("Breast Cancer Prediction App")

    # Load data
    df = load_data()
    st.write(df.head())

    # Feature selection
    X = df.drop(columns=['target'])
    y = df['target']
    selector = SelectKBest(score_func=f_classif, k=10)
    X_selected = selector.fit_transform(X, y)

    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the model
    model = MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=1000, activation='relu', solver='adam', alpha=0.0001, learning_rate='constant')
    model.fit(X_train_scaled, y_train)

    # Make predictions
    predictions = model.predict(X_test_scaled)

    # Display results
    st.write("Predictions:")
    st.write(predictions)

    # Allow user to input features and predict
    st.sidebar.header("User Input Features")
    input_data = {}
    for col in df.columns[:-1]:
        input_data[col] = st.sidebar.slider(col, float(df[col].min()), float(df[col].max()), float(df[col].mean()))

    input_df = pd.DataFrame(input_data, index=[0])
    input_scaled = scaler.transform(selector.transform(input_df))
    input_prediction = model.predict(input_scaled)

    st.sidebar.write(f"Prediction: {'Malignant' if input_prediction[0] == 1 else 'Benign'}")

if __name__ == "__main__":
    main()


2024-07-27 18:20:03.031 
  command:

    streamlit run /usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2024-07-27 18:20:03.037 
`st.cache` is deprecated and will be removed soon. Please use one of Streamlit's new caching commands, `st.cache_data` or `st.cache_resource`.
More information [in our docs](https://docs.streamlit.io/develop/concepts/architecture/caching).

**Note**: The behavior of `st.cache` was updated in Streamlit 1.36 to the new caching logic used by `st.cache_data` and `st.cache_resource`.
This might lead to some problems or unexpected behavior in certain edge cases.

2024-07-27 18:20:03.038 No runtime found, using MemoryCacheStorageManager
2024-07-27 18:20:03.050 No runtime found, using MemoryCacheStorageManager
