In [None]:
!pip install snowflake-connector-python pandas scikit-learn

In [None]:
import pandas as pd
import snowflake.connector

#step 1: connect to snowflake
conn = snowflake.connector.connect(
    user='YOUR_USERNAME',
    password='YOUR_PASSWORD',
    account='YOUR_ACCOUNT',
    database='BREAST_CANCER_DATASET', #DATABASE NAME
    schema='PUBLIC',
    warehouse='COMPUTE_WH' #WAREHOUSE NAME
)

In [None]:
#step 2: run query to fetch data
query = 'SELECT * FROM "MEDIDATA"' #TABLE NAME
df = pd.read_sql(query, conn)

#step 3: close connection
conn.close()

EDA

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

naive bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
le = LabelEncoder()
df['DIAGNOSIS'] = le.fit_transform(df['DIAGNOSIS'])

In [None]:
X = df.drop(columns=['ID', 'DIAGNOSIS'])  # This is correct!
y = df['DIAGNOSIS']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
nb_model = GaussianNB()

In [None]:
nb_model.fit(X_train_scaled, y_train)

In [None]:
y_pred = nb_model.predict(X_test_scaled)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Accuracy:", accuracy)

In [None]:
con_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix \n", con_matrix)

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
import pickle
with open('nb_model.pkl', 'wb') as f:
    pickle.dump(nb_model, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.figure(figsize=(8, 5))
df['DIAGNOSIS'].value_counts().plot(kind='bar', color=['skyblue', 'salmon'])
plt.title('Distribution of Diagnosis (0=Benign, 1=Malignant)')
plt.xlabel('Diagnosis')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()




In [None]:
plt.figure(figsize=(8, 6))
sns.heatmap(con_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Benign', 'Malignant'],
            yticklabels=['Benign', 'Malignant'])
plt.title('Confusion Matrix - Naive Bayes')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
pip install gradio

In [None]:
import gradio as gr

def predict_cancer(radius_mean, texture_mean, perimeter_mean, area_mean,
                   compactness_mean, concavity_mean, concave_points_mean,
                   radius_worst, texture_worst, perimeter_worst, area_worst):

    try:
        features = np.array([[
            radius_mean,           # User input
            texture_mean,          # User input
            perimeter_mean,        # User input
            area_mean,             # User input
            0.096,                 # smoothness_mean (avg)
            compactness_mean,      # User input
            concavity_mean,        # User input
            concave_points_mean,   # User input
            0.181,                 # symmetry_mean (avg)
            0.063,                 # fractal_dimension_mean (avg)
            0.4,                   # radius_se (avg)
            1.2,                   # texture_se (avg)
            2.9,                   # perimeter_se (avg)
            40.0,                  # area_se (avg)
            0.007,                 # smoothness_se (avg)
            0.025,                 # compactness_se (avg)
            0.032,                 # concavity_se (avg)
            0.012,                 # concave_points_se (avg)
            0.020,                 # symmetry_se (avg)
            0.003,                 # fractal_dimension_se (avg)
            radius_worst,          # User input
            texture_worst,         # User input
            perimeter_worst,       # User input
            area_worst,            # User input
            0.132,                 # smoothness_worst (avg)
            0.254,                 # compactness_worst (avg)
            0.272,                 # concavity_worst (avg)
            0.115,                 # concave_points_worst (avg)
            0.290,                 # symmetry_worst (avg)
            0.084                  # fractal_dimension_worst (avg)
        ]])

        model = pickle.load(open('nb_model.pkl', 'rb'))
        scaler = pickle.load(open('scaler.pkl', 'rb'))

        features_scaled = scaler.transform(features)

        prediction = model.predict(features_scaled)[0]
        probability = model.predict_proba(features_scaled)[0]

        if prediction == 0:
            result = "üü¢ Benign"
            confidence = f"{probability[0]*100:.2f}%"
            message = "The tumor is predicted to be **benign** (non-cancerous)."
            color = "#10b981"
        else:
            result = "üî¥ Malignant"
            confidence = f"{probability[1]*100:.2f}%"
            message = "The tumor is predicted to be **malignant** (cancerous). Please consult with a healthcare professional."
            color = "#ef4444"

        output_html = f"""
        <div style="padding: 30px; border-radius: 15px; background: linear-gradient(135deg, {color}15 0%, {color}05 100%); border: 2px solid {color}40;">
            <h2 style="color: {color}; margin: 0 0 15px 0; font-size: 28px;">{result}</h2>
            <p style="font-size: 16px; color: #374151; margin: 10px 0;">{message}</p>
            <p style="font-size: 20px; font-weight: 600; color: {color}; margin: 15px 0 0 0;">
                Confidence: {confidence}
            </p>
            <div style="margin-top: 15px; padding: 15px; background: white; border-radius: 8px;">
                <p style="margin: 0; color: #6b7280; font-size: 14px;">
                    <strong>Benign probability:</strong> {probability[0]*100:.2f}% |
                    <strong>Malignant probability:</strong> {probability[1]*100:.2f}%
                </p>
            </div>
        </div>
        """

        return output_html

    except Exception as e:
        return f"""
        <div style="padding: 20px; border-radius: 10px; background: #fee2e2; border: 2px solid #ef4444;">
            <p style="color: #991b1b; margin: 0;">‚ö†Ô∏è Error: {str(e)}</p>
        </div>
        """

# CSS
custom_css = """
.gradio-container {
    font-family: 'Inter', sans-serif;
    max-width: 900px;
    margin: 0 auto;
}

h1 {
    text-align: center;
    color: #1f2937;
    font-size: 2.5em;
    margin-bottom: 10px;
    font-weight: 700;
}

.subtitle {
    text-align: center;
    color: #6b7280;
    font-size: 1.1em;
    margin-bottom: 30px;
}

footer {
    display: none !important;
}
"""

# Create Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as app:

    gr.HTML("""
        <div style="text-align: center; margin-bottom: 30px;">
            <h1>üî¨ Breast Cancer Prediction</h1>
            <p class="subtitle">Enter the most important cell nucleus measurements</p>
        </div>
    """)

    with gr.Row():
        with gr.Column():
            gr.Markdown("### üìä Mean Measurements")
            radius_mean = gr.Number(label="Radius Mean", value=14.0,
                                   info="Average radius of cell nuclei")
            texture_mean = gr.Number(label="Texture Mean", value=19.0,
                                    info="Standard deviation of gray-scale values")
            perimeter_mean = gr.Number(label="Perimeter Mean", value=92.0,
                                      info="Average perimeter of nuclei")
            area_mean = gr.Number(label="Area Mean", value=655.0,
                                 info="Average area of nuclei")
            compactness_mean = gr.Number(label="Compactness Mean", value=0.104,
                                        info="Perimeter¬≤ / area - 1.0")
            concavity_mean = gr.Number(label="Concavity Mean", value=0.089,
                                      info="Severity of concave portions")
            concave_points_mean = gr.Number(label="Concave Points Mean", value=0.049,
                                           info="Number of concave portions")

        with gr.Column():
            gr.Markdown("### üî∫ Worst Measurements")
            radius_worst = gr.Number(label="Radius Worst", value=16.3,
                                    info="Largest radius value")
            texture_worst = gr.Number(label="Texture Worst", value=25.7,
                                     info="Largest texture value")
            perimeter_worst = gr.Number(label="Perimeter Worst", value=107.0,
                                       info="Largest perimeter value")
            area_worst = gr.Number(label="Area Worst", value=880.0,
                                  info="Largest area value")

    predict_btn = gr.Button("üîç Predict Diagnosis", variant="primary", size="lg")

    output = gr.HTML(label="Prediction Result")

    predict_btn.click(
        fn=predict_cancer,
        inputs=[
            radius_mean, texture_mean, perimeter_mean, area_mean,
            compactness_mean, concavity_mean, concave_points_mean,
            radius_worst, texture_worst, perimeter_worst, area_worst
        ],
        outputs=output
    )

    gr.HTML("""
        <div style="text-align: center; margin-top: 30px; padding: 20px; background: #f3f4f6; border-radius: 10px;">
            <p style="color: #6b7280; font-size: 0.9em; margin: 0;">
                ‚öïÔ∏è <strong>Disclaimer:</strong> This is an AI prediction tool for educational purposes only.
                Always consult with qualified healthcare professionals for medical diagnosis and treatment.
            </p>
        </div>
    """)

# Launch the app
if __name__ == "__main__":
    app.launch(share=True)