<a href="https://colab.research.google.com/github/saivarunvanama/WEB-TECHNOLOGIES/blob/main/deepseek2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required packages
!pip install pandas scikit-learn matplotlib seaborn pyngrok flask-ngrok gradio

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import gradio as gr
from pyngrok import ngrok

Collecting pyngrok
  Downloading pyngrok-7.2.4-py3-none-any.whl.metadata (8.7 kB)
Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)

In [None]:
def preprocess_data(file_path):
    # Load the dataset
    df = pd.read_csv('/content/CTU-IoT-ramsomware -Capture-1-1conn.log.labeled.csv')

    # Drop unnecessary columns
    df = df.drop(columns=['Unnamed: 0', 'id.orig_h', 'id.resp_h', 'history'], errors='ignore')

    # Fill missing values
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    df[numeric_cols] = df[numeric_cols].fillna(0)

    categorical_cols = df.select_dtypes(include=['object']).columns
    df[categorical_cols] = df[categorical_cols].fillna('Unknown')

    # Convert categorical features to numerical
    df = pd.get_dummies(df, columns=['proto', 'service', 'conn_state'])

    # Encode the target variable
    df['label'] = df['label'].map({'Benign': 0, 'Malicious': 1})

    return df

def train_model(df):
    # Split data into features and target
    X = df.drop(columns=['label'])
    y = df['label']

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train Random Forest classifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    print("Model Accuracy:", accuracy_score(y_test, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    return model, X_test, y_test

# Load and preprocess the sample data
df = preprocess_data('cypherstrome.csv')

# Train the model
model, X_test, y_test = train_model(df)

# Save the model for later use
with open('ransomware_detector.pkl', 'wb') as f:
    pickle.dump(model, f)

Model Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       388
           1       1.00      1.00      1.00      4241

    accuracy                           1.00      4629
   macro avg       1.00      1.00      1.00      4629
weighted avg       1.00      1.00      1.00      4629



In [None]:
def plot_confusion_matrix(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Benign', 'Malicious'],
                yticklabels=['Benign', 'Malicious'])
    plt.title('Confusion Matrix')
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    return plt

def plot_feature_importance(model, feature_names):
    importances = model.feature_importances_
    indices = np.argsort(importances)[-10:]  # Top 10 features
    plt.figure(figsize=(10, 6))
    plt.title('Top 10 Important Features')
    plt.barh(range(len(indices)), importances[indices], color='b', align='center')
    plt.yticks(range(len(indices)), [feature_names[i] for i in indices])
    plt.xlabel('Relative Importance')
    return plt

def plot_label_distribution(y_true):
    plt.figure(figsize=(6, 6))
    y_true.value_counts().plot(kind='pie', autopct='%1.1f%%',
                              labels=['Benign', 'Malicious'],
                              colors=['lightgreen', 'lightcoral'])
    plt.title('Class Distribution in Predictions')
    plt.ylabel('')
    return plt

In [None]:
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

def detect_ransomware(file_obj):
    # Load the model
    with open('ransomware_detector.pkl', 'rb') as f:
        model = pickle.load(f)

    # Read the uploaded file
    df = pd.read_csv(file_obj.name)

    # Preprocess the data
    processed_df = preprocess_data(file_obj.name)

    # Prepare features
    X = processed_df.drop(columns=['label'], errors='ignore')

    # Make predictions
    predictions = model.predict(X)

    # Add predictions to DataFrame
    processed_df['prediction'] = predictions

    # Plot Confusion Matrix
    if 'label' in processed_df.columns:
        conf_matrix_fig = plot_confusion_matrix(processed_df['label'], predictions)
        conf_matrix_path = 'confusion_matrix.png'
        conf_matrix_fig.savefig(conf_matrix_path)
        plt.close(conf_matrix_fig)
    else:
        conf_matrix_path = None  # Or handle differently if label is missing

    # Plot Feature Importance
    feature_importance_fig = plot_feature_importance(model, X.columns)
    feature_importance_path = 'feature_importance.png'
    feature_importance_fig.savefig(feature_importance_path)
    plt.close(feature_importance_fig)

    # Plot Class Distribution (Safe vs Ransomware)
    safe_count = (predictions == 0).sum()
    ransomware_count = (predictions == 1).sum()

    class_distribution_fig = plt.figure(figsize=(4, 4))
    labels = ['Safe', 'Ransomware']
    counts = [safe_count, ransomware_count]

    # Pie chart
    plt.pie(counts, labels=labels, autopct='%1.1f%%', colors=['#4CAF50', '#F44336'])
    plt.title('Prediction Distribution: Safe vs Ransomware')
    label_dist_path = 'label_distribution.png'
    plt.savefig(label_dist_path)
    plt.close(class_distribution_fig)

    # Create summary stats
    total_samples = len(predictions)
    malicious_percent = (ransomware_count / total_samples) * 100

    results = {
        "Total Samples": total_samples,
        "Safe Connections": int(safe_count),
        "Ransomware Connections": int(ransomware_count),
        "Ransomware Percentage": f"{malicious_percent:.2f}%"
    }

    return (conf_matrix_path, feature_importance_path, label_dist_path, results)


In [None]:
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def detect_ransomware(file):
    # Read uploaded CSV file
    df = pd.read_csv(file.name)

    # Simulated prediction and actual columns for demo
    df['predicted'] = df.index % 2
    df['actual'] = (df.index + 1) % 2

    # Confusion Matrix
    cm = pd.crosstab(df['actual'], df['predicted'], rownames=['Actual'], colnames=['Predicted'])
    plt.figure(figsize=(4, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    confusion_path = "confusion_matrix.png"
    plt.title("Confusion Matrix")
    plt.savefig(confusion_path)
    plt.close()

    # Feature Importance (dummy)
    features = ['duration', 'bytes_sent', 'bytes_received']
    importance = [0.4, 0.35, 0.25]
    plt.figure(figsize=(6, 4))
    sns.barplot(x=importance, y=features)
    plt.title("Feature Importance")
    feature_path = "feature_importance.png"
    plt.savefig(feature_path)
    plt.close()

    # Safe vs Ransomware Pie Chart
    safe_count = (df['predicted'] == 0).sum()
    ransomware_count = (df['predicted'] == 1).sum()

    plt.figure(figsize=(4, 4))
    plt.pie(
        [safe_count, ransomware_count],
        labels=['Safe', 'Ransomware'],
        colors=['#4CAF50', '#F44336'],
        autopct='%1.1f%%',
        startangle=90
    )
    plt.title("Prediction Distribution: Safe vs Ransomware")
    class_dist_path = "class_distribution.png"
    plt.savefig(class_dist_path)
    plt.close()

    # Detection Results
    results = {
        "Total Records": len(df),
        "Safe Connections": int(safe_count),
        "Ransomware Connections": int(ransomware_count),
        "Ransomware Percentage": f"{(ransomware_count / len(df)) * 100:.2f}%"
    }

    return confusion_path, feature_path, class_dist_path, results

# Create Gradio interface (Modern syntax)
def create_interface():
    iface = gr.Interface(
        fn=detect_ransomware,
        inputs=gr.File(label="Upload Network Traffic CSV File", file_types=[".csv"]),
        outputs=[
            gr.Image(label="Confusion Matrix", type="filepath"),
            gr.Image(label="Feature Importance", type="filepath"),
            gr.Image(label="Prediction Distribution (Pie Chart)", type="filepath"),
            gr.JSON(label="Detection Results")
        ],
        title="Ransomware Detection System",
        description="""
        Upload a network traffic CSV file to detect potential ransomware activity.
        The system will analyze the data and provide visualizations of the detection results.
        """
    )
    return iface

# Launch the app
interface = create_interface()
interface.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3e1ee09be3612152d0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
