In [1]:
#pip install dash plotly


In [2]:
import xgboost as xgb
import pandas as pd
import numpy as np

df = pd.read_csv('creditcard_2023.csv')  

df.drop(columns= 'id', inplace= True)


X = df.drop('Class', axis=1)  
y = df['Class']


model = xgb.XGBClassifier()  
model.fit(X, y)

importance = model.feature_importances_

# Extract feature importance and convert to percentages
booster = model.get_booster()
importance_dict = booster.get_score(importance_type='weight')
importance = np.array(list(importance_dict.values()))
importance_percentage = 100 * importance / np.sum(importance) 

# Convert importance to a DataFrame for easier handling
importance_df = pd.DataFrame({
    'Feature': importance_dict.keys(),
    'Importance_Percentage': importance_percentage
})

features_to_keep = importance_df[importance_df['Importance_Percentage'] >= 3.5]['Feature'].tolist()

X_filtered = X[features_to_keep]

# Display the filtered dataset
print(f"Original number of features: {X.shape[1]}")
print(f"Number of features retained: {X_filtered.shape[1]}")
print(f"Features retained: {features_to_keep}")


Original number of features: 29
Number of features retained: 13
Features retained: ['V1', 'V2', 'V3', 'V4', 'V7', 'V10', 'V11', 'V12', 'V14', 'V15', 'V17', 'V19', 'V26']


In [None]:
features_to_keep = importance_df[importance_df['Importance_Percentage'] >= 3.5]['Feature'].tolist()

features_to_drop = [feature for feature in df.columns if feature not in features_to_keep and feature != 'Class'] 

da_filtered = df.drop(columns=features_to_drop)

print(f"Original number of features: {df.shape[1]}")
print(f"Number of features retained: {da_filtered.shape[1]}")
print(f"Retained features: {features_to_keep}")
print(f"Dropped features: {features_to_drop}")


Original number of features: 30
Number of features retained: 14
Retained features: ['V1', 'V2', 'V3', 'V4', 'V7', 'V10', 'V11', 'V12', 'V14', 'V15', 'V17', 'V19', 'V26']
Dropped features: ['V5', 'V6', 'V8', 'V9', 'V13', 'V16', 'V18', 'V20', 'V21', 'V22', 'V23', 'V24', 'V25', 'V27', 'V28', 'Amount']


In [4]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import xgboost as xgb
import pandas as pd
import numpy as np

# Load your dataset
df = pd.read_csv('creditcard_2023.csv')  # Replace with the path to your data

df.drop(columns='id', inplace=True)

# Separate features and target
X = df.drop('Class', axis=1)  # Assuming 'Class' is the target
y = df['Class']

# Train XGBoost model
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X, y)

# Get feature importance
booster = model.get_booster()
importance_dict = booster.get_score(importance_type='weight')
importance = np.array(list(importance_dict.values()))
importance_percentage = 100 * importance / np.sum(importance)

# Convert importance to a DataFrame for easier handling
importance_df = pd.DataFrame({
    'Feature': importance_dict.keys(),
    'Importance_Percentage': importance_percentage
})

features_to_keep = importance_df[importance_df['Importance_Percentage'] >= 3.5]['Feature'].tolist()

# Filter dataset based on important features
features_to_drop = [feature for feature in df.columns if feature not in features_to_keep and feature != 'Class']
da_filtered = df.drop(columns=features_to_drop)

# Initialize the Dash app
app = dash.Dash(__name__)

# Create Plotly figures using the filtered DataFrame
fig1 = px.histogram(df, x='Amount', title='Transaction Amount Distribution', template='plotly_dark')
fig2 = px.scatter(da_filtered, x='V1', y='V2', color='Class', title='V1 vs V2 Scatter Plot', template='plotly_dark')
fig3 = px.box(df, y='Amount', title='Transaction Amount Box Plot', template='plotly_dark')

# Heatmap for correlation of the filtered DataFrame
corr = da_filtered.corr()
fig4 = go.Figure(data=go.Heatmap(
    z=corr.values,
    x=corr.columns,
    y=corr.columns,
    colorscale='Viridis'
))
fig4.update_layout(title='Feature Correlation Heatmap (Filtered Features)', template='plotly_dark')

# Pie chart for class distribution
fig5 = px.pie(df, names='Class', title='Class Distribution', template='plotly_dark', 
              color_discrete_sequence=px.colors.sequential.RdBu)

# Feature Importance Plot
fig6 = px.bar(
    importance_df.sort_values(by='Importance_Percentage', ascending=False).head(10),
    x='Importance_Percentage',
    y='Feature',
    orientation='h',
    title='Top 10 Most Important Features',
    template='plotly_dark'
).update_layout(yaxis={'categoryorder': 'total ascending'})

# Layout of the Dash app
app.layout = html.Div([
    html.H1('Credit Card Fraud Detection Dashboard', style={'text-align': 'center', 'padding': '20px', 'color': 'white'}),

    html.Div([
        html.H3('Transaction Amount Distribution'),
        dcc.Graph(figure=fig1)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3('V1 vs V2 Scatter Plot'),
        dcc.Dropdown(
            id='x-axis-feature',
            options=[{'label': feature, 'value': feature} for feature in da_filtered.columns if feature != 'Class'],
            value=da_filtered.columns[0],  # Default x-axis feature
            clearable=False
        ),
        dcc.Dropdown(
            id='y-axis-feature',
            options=[{'label': feature, 'value': feature} for feature in da_filtered.columns if feature != 'Class'],
            value=da_filtered.columns[1],  # Default y-axis feature
            clearable=False
        ),
        dcc.Graph(id='scatter-plot')
    ], style={'margin': '20px'}),

    html.Div([
        html.H3('Transaction Amount Box Plot'),
        dcc.Graph(figure=fig3)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3('Feature Correlation Heatmap (Filtered Features)'),
        dcc.Graph(figure=fig4)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3('Class Distribution'),
        dcc.Graph(figure=fig5)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3('Top 10 Most Important Features'),
        dcc.Graph(figure=fig6)
    ], style={'margin': '20px'}),

    html.Div([
        html.H3('Feature Histogram'),
        dcc.Dropdown(
            id='feature-dropdown',
            options=[{'label': feature, 'value': feature} for feature in da_filtered.columns if feature != 'Class'],
            value=da_filtered.columns[0]
        ),
        dcc.Graph(id='histogram-plot')
    ], style={'margin': '20px'})
], style={'backgroundColor': '#2c3e50', 'padding': '10px'})

# Callback for updating scatter plot based on selected features
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('x-axis-feature', 'value'),
     Input('y-axis-feature', 'value')]
)
def update_scatter_plot(x_feature, y_feature):
    fig = px.scatter(da_filtered, x=x_feature, y=y_feature, color='Class', 
                     title=f'{x_feature} vs {y_feature} Scatter Plot', 
                     template='plotly_dark')
    return fig

# Callback for updating histogram based on selected feature
@app.callback(
    Output('histogram-plot', 'figure'),
    [Input('feature-dropdown', 'value')]
)
def update_histogram(selected_feature):
    fig = go.Figure()
    for cls in da_filtered['Class'].unique():
        filtered_data = da_filtered[da_filtered['Class'] == cls]
        fig.add_trace(go.Histogram(
            x=filtered_data[selected_feature],
            name=f'Class {cls}',
            opacity=0.7
        ))
    
    # Adding mean and median lines
    mean_value = da_filtered[selected_feature].mean()
    median_value = da_filtered[selected_feature].median()
    
    fig.add_vline(x=mean_value, line=dict(color='red', dash='dash'), name='Mean')
    fig.add_vline(x=median_value, line=dict(color='green', dash='dash'), name='Median')

    fig.update_layout(
        barmode='overlay',
        title=f'Distribution of {selected_feature} with Mean and Median',
        xaxis_title=selected_feature,
        yaxis_title='Count',
        template='plotly_dark'
    )
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


Parameters: { "use_label_encoder" } are not used.

