<a href="https://colab.research.google.com/github/sarangchisim/projects-readme.txt/blob/main/netguardml_v4_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
!pip install streamlit pandas numpy scikit-learn seaborn matplotlib xgboost
streamlit_code = """
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import plotly.express as px
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt

# Streamlit app configuration
st.set_page_config(page_title="NetGuardML", layout="wide")
st.title("NetGuardML - Intrusion Detection System")

# Initialize session state with default values
def initialize_session_state():
    if 'data' not in st.session_state:
        st.session_state.data = None
    if 'target_column' not in st.session_state:
        st.session_state.target_column = None
    if 'model' not in st.session_state:
        st.session_state.model = None
    if 'X_test' not in st.session_state:
        st.session_state.X_test = None
    if 'y_test' not in st.session_state:
        st.session_state.y_test = None
    if 'preprocessor' not in st.session_state:
        st.session_state.preprocessor = None
    if 'X_train_processed' not in st.session_state:
        st.session_state.X_train_processed = None
    if 'X_test_processed' not in st.session_state:
        st.session_state.X_test_processed = None
    if 'y_train' not in st.session_state:
        st.session_state.y_train = None
    if 'step' not in st.session_state:
        st.session_state.step = 1

initialize_session_state()

# Reset session state to Step 1
def reset_to_step_1():
    st.session_state.data = None
    st.session_state.target_column = None
    st.session_state.model = None
    st.session_state.X_test = None
    st.session_state.y_test = None
    st.session_state.preprocessor = None
    st.session_state.X_train_processed = None
    st.session_state.X_test_processed = None
    st.session_state.y_train = None
    st.session_state.step = 1

# Navigation buttons
def show_navigation_buttons():
    col1, col2 = st.columns(2)
    with col1:
        if st.session_state.step > 1:
            if st.button("One Step Back"):
                st.session_state.step -= 1
                st.rerun()
    with col2:
        if st.session_state.step > 1:
            if st.button("Reset to 1st Step"):
                reset_to_step_1()
                st.rerun()

# Step 1: Upload and preprocess data
if st.session_state.step == 1:
    st.header("Step 1: Upload and Preprocess Data")
    uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
    if uploaded_file is not None:
        try:
            st.session_state.data = pd.read_csv(uploaded_file)
            st.write("Data Preview:")
            st.dataframe(st.session_state.data.head())
            st.session_state.step = 2
            st.rerun()
        except Exception as e:
            st.error(f"Error loading CSV file: {e}")

# Step 2: Select target and features
if st.session_state.step == 2 and st.session_state.data is not None:
    st.header("Step 2: Select Target and Features")

    # Display the dataset
    st.subheader("Dataset Preview")
    st.dataframe(st.session_state.data, height=300)

    # Select target column
    columns = st.session_state.data.columns.tolist()
    st.session_state.target_column = st.selectbox("Select Target Column", columns,
                                                 help="Choose the column to predict (e.g., xAttack)")

    # Select columns to drop (optional)
    columns_to_drop = st.multiselect("Select columns to drop (optional)",
                                    [col for col in columns if col != st.session_state.target_column],
                                    help="Optionally select columns to exclude from analysis")

    # Select columns to encode (optional)
    encode_cols = st.multiselect("Select categorical columns to encode (optional)",
                                [col for col in columns if col != st.session_state.target_column and col not in columns_to_drop],
                                help="Optionally select categorical columns for encoding (e.g., protocol_type, service, flag)")

    if st.button("Preprocess Data"):
        try:
            # Prepare data
            data = st.session_state.data.drop(columns=columns_to_drop, errors='ignore')
            X = data.drop(columns=[st.session_state.target_column])
            y = data[st.session_state.target_column]

            # Define preprocessor
            numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
            categorical_features = encode_cols if encode_cols else []

            preprocessor = ColumnTransformer(
                transformers=[
                    ('num', Pipeline([
                        ('imputer', SimpleImputer(strategy='mean')),
                        ('scaler', StandardScaler())
                    ]), numeric_features),
                    ('cat', Pipeline([
                        ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
                        ('onehot', OneHotEncoder(handle_unknown='ignore'))
                    ]), categorical_features)
                ])

            # Split data
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

            # Fit preprocessor and transform data
            X_train_processed = preprocessor.fit_transform(X_train)
            X_test_processed = preprocessor.transform(X_test)

            # Store in session state
            st.session_state.preprocessor = preprocessor
            st.session_state.X_test = X_test
            st.session_state.y_test = y_test
            st.session_state.X_train_processed = X_train_processed
            st.session_state.X_test_processed = X_test_processed
            st.session_state.y_train = y_train

            st.success("Data preprocessed successfully!")
            st.session_state.step = 3
            st.rerun()
        except Exception as e:
            st.error(f"Error during preprocessing: {e}")

    show_navigation_buttons()

# Step 3: Train models and visualize
if st.session_state.step == 3 and 'X_train_processed' in st.session_state and st.session_state.X_train_processed is not None:
    st.header("Step 3: Train Models and Visualize")

    # Model selection
    model_choice = st.selectbox("Select Model", ["Random Forest", "XGBoost", "Logistic Regression", "Decision Tree", "SVM", "KNN"])

    # Clustering parameters
    st.subheader("Clustering Analysis")
    n_clusters = st.slider("Select number of clusters", min_value=2, max_value=10, value=4)

    if st.button("Train Model and Perform Clustering"):
        try:
            # Train selected model
            if model_choice == "Random Forest":
                model = RandomForestClassifier(n_estimators=100, random_state=42)
            elif model_choice == "XGBoost":
                model = xgb.XGBClassifier(random_state=42)
            elif model_choice == "Logistic Regression":
                model = LogisticRegression(max_iter=1000, random_state=42)
            elif model_choice == "Decision Tree":
                model = DecisionTreeClassifier(random_state=42)
            elif model_choice == "SVM":
                model = SVC(kernel='linear', random_state=42)
            else:  # KNN
                model = KNeighborsClassifier(n_neighbors=5)

            model.fit(st.session_state.X_train_processed, st.session_state.y_train)
            st.session_state.model = model

            # Evaluate model
            y_pred = model.predict(st.session_state.X_test_processed)
            accuracy = accuracy_score(st.session_state.y_test, y_pred)
            precision = precision_score(st.session_state.y_test, y_pred, average='weighted')
            recall = recall_score(st.session_state.y_test, y_pred, average='weighted')
            f1 = f1_score(st.session_state.y_test, y_pred, average='weighted')

            st.write("Model Performance:")
            st.write(f"Accuracy: {accuracy:.4f}")
            st.write(f"Precision: {precision:.4f}")
            st.write(f"Recall: {recall:.4f}")
            st.write(f"F1 Score: {f1:.4f}")

            # Feature importance (for Random Forest and Decision Tree)
            if model_choice in ["Random Forest", "Decision Tree"]:
                feature_names = (st.session_state.preprocessor
                                .named_transformers_['num'].named_steps['scaler']
                                .get_feature_names_out().tolist() +
                                st.session_state.preprocessor
                                .named_transformers_['cat'].named_steps['onehot']
                                .get_feature_names_out().tolist())
                importance = model.feature_importances_
                feature_importance = pd.DataFrame({'Feature': feature_names, 'Importance': importance})
                feature_importance = feature_importance.sort_values('Importance', ascending=False)

                fig, ax = plt.subplots()
                sns.barplot(x='Importance', y='Feature', data=feature_importance.head(10))
                st.pyplot(fig)

            # Confusion matrix
            cm = confusion_matrix(st.session_state.y_test, y_pred)
            fig, ax = plt.subplots()
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.xlabel('Predicted')
            plt.ylabel('Actual')
            st.pyplot(fig)

            # Clustering
            st.subheader("Clustering Results")
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            cluster_labels = kmeans.fit_predict(st.session_state.X_train_processed)

            # PCA for visualization
            pca = PCA(n_components=2)
            X_pca = pca.fit_transform(st.session_state.X_train_processed)

            # Create DataFrame for plotting
            plot_data = pd.DataFrame({
                'PC1': X_pca[:, 0],
                'PC2': X_pca[:, 1],
                'Cluster': cluster_labels,
                'Attack': st.session_state.y_train
            })

            # Plotly scatter plot
            fig = px.scatter(plot_data, x='PC1', y='PC2', color='Attack',
                            symbol='Cluster', title='K-Means Clustering with PCA',
                            labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'})
            st.plotly_chart(fig)

        except Exception as e:
            st.error(f"Error during model training or clustering: {e}")

    # Next button to advance to Step 4
    if st.button("Next"):
        st.session_state.step = 4
        st.rerun()

    show_navigation_buttons()

# Step 4: Predict on new data
if st.session_state.step == 4 and 'model' in st.session_state and st.session_state.model is not None:
    st.header("Step 4: Predict on New Data")
    new_file = st.file_uploader("Upload new CSV for predictions", type=["csv"])

    if new_file is not None:
        try:
            new_data = pd.read_csv(new_file)
            if st.button("Predict"):
                # Ensure new data has same columns as training data (except target)
                expected_columns = st.session_state.X_test.columns
                if set(expected_columns).issubset(new_data.columns):
                    new_data_processed = st.session_state.preprocessor.transform(new_data[expected_columns])
                    predictions = st.session_state.model.predict(new_data_processed)

                    # Add predictions to new data
                    new_data['Predicted_Attack'] = predictions
                    st.write("Predictions:")
                    st.write(new_data)

                    # Download predictions
                    csv = new_data.to_csv(index=False)
                    st.download_button("Download Predictions", csv, "predictions.csv", "text/csv")
                else:
                    st.error("New data must have the same columns as the training data (excluding target).")
        except Exception as e:
            st.error(f"Error processing new data: {e}")

    show_navigation_buttons()

st.write("____NetGuardML - By Team Z3r0___")
"""

with open('app.py', 'w') as f:
    f.write(streamlit_code)



In [13]:
!streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K⠼[1G[0K⠴[1G[0K⠦[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.32.204.204:8501[0m
[0m
[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0Kyour url is: https://yummy-lands-hope.loca.lt
[34m  Stopping...[0m
^C
