<a href="https://colab.research.google.com/github/mdarnowski/airdash/blob/main/airdash.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [None]:
!pip install scikit-learn
!pip install requests
!pip install pandas
!pip install tensorflow
!pip install jupyter-dash
!pip install dash
!pip install plotly
!pip install seaborn
!pip install numpy
!pip install tensorflow

In [None]:
import requests
import zipfile
import io
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from keras.layers import MaxPooling1D, Conv1D, LSTM, Dense, Dropout
from keras.models import Sequential
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output, State
from dash import dash_table
from dash.exceptions import PreventUpdate

#Data Loading

In [None]:
def load_data(url):
    df = pd.DataFrame
    response = requests.get(url)
    if response.status_code == 200:
        zip_file = zipfile.ZipFile(io.BytesIO(response.content))
        data_file = zip_file.namelist()[0]
        df = pd.read_csv(zip_file.open(data_file), delimiter=';')

        # Convert columns to numerical if possible
        for col in df.columns:
            df[col] = df[col].apply(
                lambda x: pd.to_numeric(x.replace(',', '.'), errors='coerce')
                if isinstance(x, str) else x)

    return df

def clean_data(df, fill_value):
    df = df.dropna(axis=1, how='all')
    df = df.replace(-200, np.nan)
    df = df.fillna(df.median())

    return df

#Model

In [None]:
def create_and_compile_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


def train_and_predict_model(X_train, y_train, X_test, y_test):
    model = create_and_compile_model((X_train.shape[1], X_train.shape[2]))
    model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
    y_pred = model.predict(X_test)
    return y_pred, model

#Plotting

In [None]:
def get_target_and_features(df, target_prefix='target'):
    target_columns = [col for col in df.columns if col.startswith(target_prefix)]
    if not target_columns:
        return None, None
    target = target_columns[0]
    features = df.drop(columns=target_columns)
    return target, features

def create_data_preview_table(df):
    return dash_table.DataTable(
        data=df.head(15).to_dict('records'),
        columns=[{'name': i, 'id': i} for i in df.columns],
        style_table={'overflowX': 'scroll'}
    )

def create_heatmap_graph(df):
    correlation = df.corr()
    heatmap = px.imshow(correlation)
    return dcc.Graph(figure=heatmap)

def create_histogram(df):
    fig = go.Figure()
    for col in df.select_dtypes(include=[np.number]).columns:
        fig.add_trace(go.Histogram(x=df[col], name=col))
    fig.update_layout(barmode='overlay')
    fig.update_traces(opacity=0.75)
    return dcc.Graph(figure=fig)

def create_boxplot(df):
    fig = go.Figure()
    for col in df.select_dtypes(include=[np.number]).columns:
        fig.add_trace(go.Box(y=df[col], name=col))
    return dcc.Graph(figure=fig)

def create_feature_importance_plot(df):
    target, features = get_target_and_features(df)
    if target is None or features is None:
        return html.Div("No target column found")

    if not df[target].dtype.kind in 'biufc':
        return html.Div("Target column is not numerical")

    model = RandomForestRegressor(n_estimators=100)
    model.fit(features, df[target])

    importances = model.feature_importances_
    feature_importances = pd.DataFrame({
        'feature': features.columns,
        'importance': importances
    })

    fig = px.bar(feature_importances.sort_values(by='importance', ascending=False),
                 x='feature',
                 y='importance',
                 labels={
                     'importance':'Feature Importance',
                     'feature': 'Feature'
                 },
                 title='Feature Importance for {}'.format(target))

    return dcc.Graph(figure=fig)

def create_residuals_plot(df):
    target, features = get_target_and_features(df)
    if target is None or features is None:
        return html.Div("No target column found")

    if not df[target].dtype.kind in 'biufc':
        return html.Div("Target column is not numerical")

    X_train, X_test, y_train, y_test = train_test_split(features, df[target], test_size=0.3, random_state=0)

    model = LinearRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    residuals = y_test - predictions

    fig = px.scatter(x=predictions,
                     y=residuals, labels={'x': 'Predicted', 'y': 'Residuals'})
    fig.add_trace(go.Scatter(x=predictions,
                             y=[0]*len(predictions),
                             mode='lines',
                             name='Residuals=0', line=dict(color='red')))

    fig.update_layout(title='Residuals vs Predicted values for {}'.format(target))
    return dcc.Graph(figure=fig)

def create_scatter_plots(df):
    fig = go.Figure()
    target, features = get_target_and_features(df)
    if target is None or features is None:
        return html.Div("No target column found")

    numeric_columns = features.select_dtypes(include=[np.number]).columns

    for col in numeric_columns:
        fig.add_trace(go.Scatter(x=df[col], y=df[target], mode='markers', name=col))

    fig.update_layout(title='Scatter Plots', xaxis_title='Features', yaxis_title='Target')

    return dcc.Graph(figure=fig)

def crnn_prediction(df):
    target, features = get_target_and_features(df)
    if target is None or features is None:
        return html.Div("No target column found")

    x = features.values
    y = df[target].values

    scaler = MinMaxScaler(feature_range=(0, 1))
    x = scaler.fit_transform(x)
    y = scaler.fit_transform(y.reshape(-1, 1))

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.35, random_state=42)

    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    y_pred, model = train_and_predict_model(x_train, y_train, x_test, y_test)

    y_pred = scaler.inverse_transform(y_pred)
    y_test = scaler.inverse_transform(y_test)

    mse = mean_squared_error(y_test, y_pred)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.arange(len(y_test)), y=y_test.flatten(), mode='lines', name='Actual'))
    fig.add_trace(go.Scatter(x=np.arange(len(y_pred)), y=y_pred.flatten(), mode='lines', name='Predicted'))
    fig.update_layout(
        title='Actual vs. Predicted {}, Mean Squared Error: {}'.format(target, mse),
        xaxis=dict(title='Index'),
        yaxis=dict(title='Value'),
    )

    return dcc.Graph(figure=fig)

In [None]:
GRAPH_CONFIG = {
    'data-preview': {
        'label': 'Data Preview',
        'function': create_data_preview_table
    },
    'heatmap': {
        'label': 'Heatmap',
        'function': create_heatmap_graph
    },
    'histogram': {
        'label': 'Histogram',
        'function': create_histogram
    },
    'boxplot': {
        'label': 'Boxplot',
        'function': create_boxplot
    },
    'feature-importance': {
        'label': 'Feature Importance',
        'function': create_feature_importance_plot
    },
    'residuals-plot': {
        'label': 'Residuals Plot',
        'function': create_residuals_plot
    },
    'scatter-plot': {
        'label': 'Scatter Plot',
        'function': create_scatter_plots
    },
    'crnn': {
        'label': 'CRNN',
        'function': crnn_prediction
    }
}

#App

In [None]:
app = JupyterDash(__name__)

default_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00360/AirQualityUCI.zip'

app.layout = html.Div(
    children=[
        html.Div(
            children=[
                dcc.Dropdown(
                    id="action-dropdown",
                    options=[{'label': cfg['label'], 'value': key}
                             for key, cfg in GRAPH_CONFIG.items()],
                    value=None,
                    placeholder="Select an action",
                    style={'width': '200px', 'marginRight': '0.5em'}
                ),
                dcc.Dropdown(
                    id="column-dropdown",
                    placeholder="Select a column",
                    style={'width': '200px', 'marginRight': '0.5em'}
                ),
                html.Button(
                    "Go",
                    id="action-button",
                    n_clicks=0,
                    style={'marginLeft': '0.5em'}
                ),
            ],
            style={'display': 'flex', 'alignItems': 'center', 'marginBottom': '1em'}
        ),
        html.Div(
            children=[
                dcc.Input(
                    id="url-input",
                    type="text",
                    placeholder="Enter URL",
                    value=default_url,
                    style={'width': '100%'}
                )
            ],
            style={'display': 'flex', 'alignItems': 'center', 'marginBottom': '1em'}
        ),

        dcc.Loading(
            id="loading",
            type="circle",
            children=[
                html.Div(id="output-container")
            ],
            className="loading-container",
            style={"display": "none"}
        )
    ],
    style={'margin': '1em'}
)

In [None]:
@app.callback(
    [Output("output-container", "children"), Output('column-dropdown', 'options')],
    [Input("action-button", "n_clicks")],
    [State("url-input", "value"), State("action-dropdown", "value"), State('column-dropdown', 'value')],
    prevent_initial_call=True
)
def update_output(n_clicks, url, action, target):
    if n_clicks is None:
        raise PreventUpdate

    try:
        df = load_data(url)
    except:
        return html.Div('try another URL'), []

    if df.empty:
        return html.Div('the dataset is empty'), []

    if target is not None and target in df.columns:
        df.rename(columns={target: f'target_{target}'}, inplace=True)
    df = clean_data(df, 0)
    columns = [{'label': col, 'value': col} for col in df.columns]
    if action in GRAPH_CONFIG:
        graph_function = GRAPH_CONFIG[action]['function']
        return graph_function(df), columns
    else:
        return html.Div([]), columns

@app.callback(
    Output("loading", "style"),
    [Input("action-button", "n_clicks")]
)
def toggle_loading(n_clicks):
    if n_clicks is not None and n_clicks > 0:
        return {"display": "block"}
    return {"display": "none"}

#Run server

In [None]:
app.run_server(mode='inline')