<a href="https://colab.research.google.com/github/prashantmalan/NSE/blob/main/Anomalywithdash1610224_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import joblib
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Load and preprocess data
def read_data(transaction_url, classification_url):
    transaction_data = pd.read_csv(transaction_url)
    column_classification = pd.read_csv(classification_url)
    return transaction_data, column_classification

def preprocess_data(transaction_data, column_classification):
    transaction_data = transaction_data[~transaction_data.applymap(lambda x: ';' in str(x)).any(axis=1)]
    transaction_data.reset_index(drop=True, inplace=True)
    transaction_data = transaction_data.applymap(lambda x: str(x).replace(',', '') if isinstance(x, str) else x)
    transaction_data = transaction_data.applymap(lambda x: str(x).replace('+', '') if isinstance(x, str) else x)
    transaction_data = transaction_data[~transaction_data.applymap(lambda x: ';' in str(x)).any(axis=1)]

    feature_categories = column_classification.set_index('Column')['Type'].to_dict()
    numeric_features = [col for col, cat in feature_categories.items() if cat == 'Amt']
    numeric_features = [col for col in numeric_features if col in transaction_data.columns]

    all_nan_columns = transaction_data[numeric_features].columns[transaction_data[numeric_features].isna().all()].tolist()
    transaction_data[all_nan_columns] = 0

    return transaction_data, numeric_features

def create_test_data(transaction_data, numeric_features, multiplier=-1):
    test_data = transaction_data.tail(100).copy()
    for feature in numeric_features:
        test_data[feature] = pd.to_numeric(test_data[feature], errors='coerce')
    test_data[numeric_features] = test_data[numeric_features].fillna(0)
    test_data['Total notional quantity-Leg 1'] = test_data['Total notional quantity-Leg 1'].apply(lambda x: x * multiplier)
    return test_data

# Initialize data
transaction_data, column_classification = read_data(
    'https://raw.githubusercontent.com/prashantmalan/NSE/main/Pm_CFTC_2017649.csv',
    'https://raw.githubusercontent.com/prashantmalan/NSE/main/col_cat_5.csv'
)
transaction_data, numeric_features = preprocess_data(transaction_data, column_classification)
test_data = create_test_data(transaction_data, numeric_features)

# Initialize the Dash app
app = JupyterDash(__name__)

# Layout
app.layout = html.Div([
    html.H1("ML Pipeline Dashboard", style={'text-align': 'center', 'color': '#4CAF50'}),
    html.Div([
        html.Div([
            html.H3("Configure and Run Model", style={'color': '#2196F3'}),
            dcc.Dropdown(
                id='model-dropdown',
                options=[
                    {'label': 'Isolation Forest', 'value': 'isolation_forest'}
                ],
                value='isolation_forest',
                style={'width': '100%', 'margin-bottom': '10px'}
            ),
            dcc.Input(
                id='model-param-input',
                type='text',
                placeholder='Enter contamination level (e.g. 0.05)',
                value='0.05',
                style={'width': '100%', 'margin-bottom': '10px'}
            ),
            html.Div([
                html.Button('Train Model', id='train-button', n_clicks=0, style={'width': '48%', 'margin-right': '4%', 'background-color': '#1f77b4', 'color': 'white', 'border': 'none', 'padding': '10px', 'border-radius': '5px'}),
                html.Button('Run Prediction', id='predict-button', n_clicks=0, style={'width': '48%', 'background-color': '#ff7f0e', 'color': 'white', 'border': 'none', 'padding': '10px', 'border-radius': '5px'})
            ], style={'display': 'flex', 'justify-content': 'space-between', 'margin-bottom': '10px'}),
            dcc.Loading(
                id="loading-1",
                type="circle",
                children=[
                    html.Div(id='training-status', style={'text-align': 'center', 'margin-bottom': '10px', 'color': '#FF5722'})
                ]
            ),
            dcc.Graph(id='train-graph'),
            dcc.Loading(
                id="loading-2",
                type="circle",
                children=[
                    html.Div(id='prediction-status', style={'text-align': 'center', 'margin-bottom': '10px', 'color': '#FF5722'})
                ]
            ),
            dcc.Graph(id='prediction-graph')
        ], style={'width': '100%', 'padding': '20px', 'border': '1px solid #ccc', 'border-radius': '5px', 'box-shadow': '2px 2px 5px lightgrey'})
    ], style={'width': '80%', 'margin': 'auto'})
], style={'width': '100%', 'height': '100vh', 'background-color': '#f0f0f0'})

def plot_anomalies(transaction_data, anomaly_scores_df, numeric_features):
    n_features = len(numeric_features)
    n_cols = 4
    n_rows = int(np.ceil(n_features / n_cols))
    fig = make_subplots(rows=n_rows, cols=n_cols, subplot_titles=numeric_features, horizontal_spacing=0.05, vertical_spacing=0.05)

    for i, feature in enumerate(numeric_features):
        row = i // n_cols + 1
        col = i % n_cols + 1

        sorted_indices = transaction_data[feature].sort_values().index
        normal_data = pd.to_numeric(transaction_data.loc[sorted_indices, feature][anomaly_scores_df[feature] == 1])
        anomalous_data = pd.to_numeric(transaction_data.loc[sorted_indices, feature][anomaly_scores_df[feature] == -1])

        fig.add_trace(go.Scatter(
            x=sorted_indices[anomaly_scores_df[feature] == 1],
            y=normal_data,
            mode='markers',
            name='Normal',
            customdata=anomaly_scores_df.loc[sorted_indices][anomaly_scores_df[feature] == 1][['Product name', 'Action type']],
            hovertemplate='Product name: %{customdata[0]}<br>Action type: %{customdata[1]}<br>Value: %{y}<extra></extra>',
            marker=dict(size=8, color='green', opacity=0.7)
        ), row=row, col=col)

        fig.add_trace(go.Scatter(
            x=sorted_indices[anomaly_scores_df[feature] == -1],
            y=anomalous_data,
            mode='markers',
            name='Anomalous',
            customdata=anomaly_scores_df.loc[sorted_indices][anomaly_scores_df[feature] == -1][['Product name', 'Action type']],
            hovertemplate='Product name: %{customdata[0]}<br>Action type: %{customdata[1]}<br>Value: %{y}<extra></extra>',
            marker=dict(size=8, color='red', opacity=0.7)
        ), row=row, col=col)

        fig.update_yaxes(range=[normal_data.min(), normal_data.max()], row=row, col=col)

    fig.update_layout(
        title='Anomaly Scores for Each Feature',
        showlegend=False,
        hovermode='closest',
        font=dict(size=10),
        autosize=False,
        width=1500,
        height=1200
    )
    return fig

def load_and_predict(transaction_data, numeric_features, model_dir='models'):
    anomaly_scores_df = pd.DataFrame(index=transaction_data.index)
    for feature in numeric_features:
        model_path = os.path.join(model_dir, f'model_anomaly_{feature}.joblib')
        pipeline = joblib.load(model_path)
        feature_data = transaction_data[[feature]].copy()
        scores = pipeline.predict(feature_data)
        anomaly_scores_df[feature] = scores
    anomaly_scores_df['Product name'] = transaction_data['Product name']
    anomaly_scores_df['Action type'] = transaction_data['Action type']
    return anomaly_scores_df

# Train models and plot
@app.callback(
    [Output('train-graph', 'figure'),
     Output('training-status', 'children')],
    [Input('train-button', 'n_clicks')],
    [State('model-param-input', 'value')]
)
def train_models(n_clicks, contamination_level):
    if n_clicks > 0:
        model_dir = 'models'
        os.makedirs(model_dir, exist_ok=True)
        pipeline_template = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler()),
            ('isolation_forest', IsolationForest(contamination=float(contamination_level), random_state=42))
        ])

        for feature in numeric_features:
            feature_data = transaction_data[[feature]].copy()
            pipeline = pipeline_template.fit(feature_data)
            model_path = os.path.join(model_dir, f'model_anomaly_{feature}.joblib')
            joblib.dump(pipeline, model_path)

        # Predict anomalies on training data for visualization
        anomaly_scores_df = load_and_predict(transaction_data, numeric_features)
        return plot_anomalies(transaction_data, anomaly_scores_df, numeric_features), "Training Complete"

    return go.Figure(), "Awaiting Training"

# Test models and plot
@app.callback(
    [Output('prediction-graph', 'figure'),
     Output('prediction-status', 'children')],
    [Input('predict-button', 'n_clicks')]
)
def test_models(n_clicks):
    if n_clicks > 0:
        # Predict anomalies on test data
        anomaly_scores_df = load_and_predict(test_data, numeric_features)
        return plot_anomalies(test_data, anomaly_scores_df, numeric_features), "Prediction Complete"

    return go.Figure(), "Awaiting Prediction"

# Run the app
app.run_server(mode='inline')

ModuleNotFoundError: No module named 'jupyter_dash'

In [3]:
from jupyter_dash import JupyterDash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import joblib
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Load and preprocess data
def read_data(transaction_url, classification_url):
    transaction_data = pd.read_csv(transaction_url)
    column_classification = pd.read_csv(classification_url)
    return transaction_data, column_classification

def preprocess_data(transaction_data, column_classification):
    transaction_data = transaction_data[~transaction_data.applymap(lambda x: ';' in str(x)).any(axis=1)]
    transaction_data.reset_index(drop=True, inplace=True)
    transaction_data = transaction_data.applymap(lambda x: str(x).replace(',', '') if isinstance(x, str) else x)
    transaction_data = transaction_data.applymap(lambda x: str(x).replace('+', '') if isinstance(x, str) else x)
    transaction_data = transaction_data[~transaction_data.applymap(lambda x: ';' in str(x)).any(axis=1)]

    feature_categories = column_classification.set_index('Column')['Type'].to_dict()
    numeric_features = [col for col, cat in feature_categories.items() if cat == 'Amt']
    numeric_features = [col for col in numeric_features if col in transaction_data.columns]

    all_nan_columns = transaction_data[numeric_features].columns[transaction_data[numeric_features].isna().all()].tolist()
    transaction_data[all_nan_columns] = 0

    return transaction_data, numeric_features

def create_test_data(transaction_data, numeric_features, multiplier=-1):
    test_data = transaction_data.tail(100).copy()
    for feature in numeric_features:
        test_data[feature] = pd.to_numeric(test_data[feature], errors='coerce')
    test_data[numeric_features] = test_data[numeric_features].fillna(0)
    test_data['Total notional quantity-Leg 1'] = test_data['Total notional quantity-Leg 1'].apply(lambda x: x * multiplier)
    return test_data

# Initialize data
transaction_data, column_classification = read_data(
    'https://raw.githubusercontent.com/prashantmalan/NSE/main/Pm_CFTC_2017649.csv',
    'https://raw.githubusercontent.com/prashantmalan/NSE/main/col_cat_5.csv'
)
transaction_data, numeric_features = preprocess_data(transaction_data, column_classification)
test_data = create_test_data(transaction_data, numeric_features)

# Initialize the Dash app
app = JupyterDash(__name__)

# Layout
app.layout = html.Div([
    html.H1("ML Pipeline Dashboard", style={'text-align': 'center', 'color': '#4CAF50'}),
    html.Div([
        html.Div([
            html.H3("Configure and Run Model", style={'color': '#2196F3'}),
            dcc.Dropdown(
                id='model-dropdown',
                options=[
                    {'label': 'Isolation Forest', 'value': 'isolation_forest'}
                ],
                value='isolation_forest',
                style={'width': '100%', 'margin-bottom': '10px'}
            ),
            dcc.Input(
                id='model-param-input',
                type='text',
                placeholder='Enter contamination level (e.g. 0.05)',
                value='0.05',
                style={'width': '100%', 'margin-bottom': '10px'}
            ),
            html.Div([
                html.Button('Train Model', id='train-button', n_clicks=0, style={'width': '48%', 'margin-right': '4%', 'background-color': '#1f77b4', 'color': 'white', 'border': 'none', 'padding': '10px', 'border-radius': '5px'}),
                html.Button('Run Prediction', id='predict-button', n_clicks=0, style={'width': '48%', 'background-color': '#ff7f0e', 'color': 'white', 'border': 'none', 'padding': '10px', 'border-radius': '5px'})
            ], style={'display': 'flex', 'justify-content': 'space-between', 'margin-bottom': '10px'}),
            dcc.Loading(
                id="loading-1",
                type="circle",
                children=[
                    html.Div(id='training-status', style={'text-align': 'center', 'margin-bottom': '10px', 'color': '#FF5722'})
                ]
            ),
            dcc.Graph(id='train-graph'),
            dcc.Loading(
                id="loading-2",
                type="circle",
                children=[
                    html.Div(id='prediction-status', style={'text-align': 'center', 'margin-bottom': '10px', 'color': '#FF5722'})
                ]
            ),
            dcc.Graph(id='prediction-graph')
        ], style={'width': '100%', 'padding': '20px', 'border': '1px solid #ccc', 'border-radius': '5px', 'box-shadow': '2px 2px 5px lightgrey'})
    ], style={'width': '80%', 'margin': 'auto'})
], style={'width': '100%', 'height': '100vh', 'background-color': '#f0f0f0'})

def plot_anomalies(transaction_data, anomaly_scores_df, numeric_features):
    n_features = len(numeric_features)
    n_cols = 4
    n_rows = int(np.ceil(n_features / n_cols))
    fig = make_subplots(rows=n_rows, cols=n_cols, subplot_titles=numeric_features, horizontal_spacing=0.05, vertical_spacing=0.05)

    for i, feature in enumerate(numeric_features):
        row = i // n_cols + 1
        col = i % n_cols + 1

        sorted_indices = transaction_data[feature].sort_values().index
        normal_data = pd.to_numeric(transaction_data.loc[sorted_indices, feature][anomaly_scores_df[feature] == 1])
        anomalous_data = pd.to_numeric(transaction_data.loc[sorted_indices, feature][anomaly_scores_df[feature] == -1])

        fig.add_trace(go.Scatter(
            x=sorted_indices[anomaly_scores_df[feature] == 1],
            y=normal_data,
            mode='markers',
            name='Normal',
            customdata=anomaly_scores_df.loc[sorted_indices][anomaly_scores_df[feature] == 1][['Product name', 'Action type']],
            hovertemplate='Product name: %{customdata[0]}<br>Action type: %{customdata[1]}<br>Value: %{y}<extra></extra>',
            marker=dict(size=8, color='green', opacity=0.7)
        ), row=row, col=col)

        fig.add_trace(go.Scatter(
            x=sorted_indices[anomaly_scores_df[feature] == -1],
            y=anomalous_data,
            mode='markers',
            name='Anomalous',
            customdata=anomaly_scores_df.loc[sorted_indices][anomaly_scores_df[feature] == -1][['Product name', 'Action type']],
            hovertemplate='Product name: %{customdata[0]}<br>Action type: %{customdata[1]}<br>Value: %{y}<extra></extra>',
            marker=dict(size=8, color='red', opacity=0.7)
        ), row=row, col=col)

        fig.update_yaxes(range=[normal_data.min(), normal_data.max()], row=row, col=col)

    fig.update_layout(
        title='Anomaly Scores for Each Feature',
        showlegend=False,
        hovermode='closest',
        font=dict(size=10),
        autosize=False,
        width=1500,
        height=1200
    )
    return fig

def load_and_predict(transaction_data, numeric_features, model_dir='models'):
    anomaly_scores_df = pd.DataFrame(index=transaction_data.index)
    for feature in numeric_features:
        model_path = os.path.join(model_dir, f'model_anomaly_{feature}.joblib')
        pipeline = joblib.load(model_path)
        feature_data = transaction_data[[feature]].copy()
        scores = pipeline.predict(feature_data)
        anomaly_scores_df[feature] = scores
    anomaly_scores_df['Product name'] = transaction_data['Product name']
    anomaly_scores_df['Action type'] = transaction_data['Action type']
    return anomaly_scores_df

# Train models and plot
@app.callback(
    [Output('train-graph', 'figure'),
     Output('training-status', 'children')],
    [Input('train-button', 'n_clicks')],
    [State('model-param-input', 'value')]
)
def train_models(n_clicks, contamination_level):
    if n_clicks > 0:
        model_dir = 'models'
        os.makedirs(model_dir, exist_ok=True)
        pipeline_template = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler()),
            ('isolation_forest', IsolationForest(contamination=float(contamination_level), random_state=42))
        ])

        for feature in numeric_features:
            feature_data = transaction_data[[feature]].copy()
            pipeline = pipeline_template.fit(feature_data)
            model_path = os.path.join(model_dir, f'model_anomaly_{feature}.joblib')
            joblib.dump(pipeline, model_path)

        # Predict anomalies on training data for visualization
        anomaly_scores_df = load_and_predict(transaction_data, numeric_features)
        return plot_anomalies(transaction_data, anomaly_scores_df, numeric_features), "Training Complete"

    return go.Figure(), "Awaiting Training"

# Test models and plot
@app.callback(
    [Output('prediction-graph', 'figure'),
     Output('prediction-status', 'children')],
    [Input('predict-button', 'n_clicks')]
)
def test_models(n_clicks):
    if n_clicks > 0:
        # Predict anomalies on test data
        anomaly_scores_df = load_and_predict(test_data, numeric_features)
        return plot_anomalies(test_data, anomaly_scores_df, numeric_features), "Prediction Complete"

    return go.Figure(), "Awaiting Prediction"

# Run the app
app.run_server(mode='inline', debug=True)

  transaction_data = pd.read_csv(transaction_url)
  transaction_data = transaction_data[~transaction_data.applymap(lambda x: ';' in str(x)).any(axis=1)]
  transaction_data = transaction_data.applymap(lambda x: str(x).replace(',', '') if isinstance(x, str) else x)
  transaction_data = transaction_data.applymap(lambda x: str(x).replace('+', '') if isinstance(x, str) else x)
  transaction_data = transaction_data[~transaction_data.applymap(lambda x: ';' in str(x)).any(axis=1)]

JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [2]:
pip install jupyter-dash

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.2-py3-none-any.whl.metadata (3.6 kB)
Collecting dash (from jupyter-dash)
  Downloading dash-2.18.1-py3-none-any.whl.metadata (10 kB)
Collecting retrying (from jupyter-dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Collecting ansi2html (from jupyter-dash)
  Downloading ansi2html-1.9.2-py3-none-any.whl.metadata (3.7 kB)
Collecting dash-html-components==2.0.0 (from dash->jupyter-dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash->jupyter-dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash->jupyter-dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting jedi>=0.16 (from ipython->jupyter-dash)
  Downloading jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)
Downloading jupyter_dash-0.4.2-py3-none-any.whl (23 kB)
Downloading ansi2html-1.9.2-