### Load and Explore Data

Titanic dataaset dashboard visualization

In [2]:
# Import libraries
import pandas as pd

# Load data
train_df = pd.read_csv('data/raw/train.csv')
test_df = pd.read_csv('data/raw/test.csv')
gender_df = pd.read_csv('data/raw/gender_submission.csv')

In [3]:
print(train_df.shape)
print(test_df.shape)
print(gender_df.shape)

(891, 12)
(418, 11)
(418, 2)


In [4]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [5]:
train_df.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [6]:
# Fill missing values of age with the mean
train_df['Age'].fillna(train_df['Age'].mean(), inplace=True)
# Drop cain column (too many missing values)
train_df.drop('Cabin', axis=1, inplace=True)
# Fill missing values of Embarked with the mode
train_df['Embarked'].fillna(train_df['Embarked'].mode()[0], inplace=True)
train_df.isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Age'].fillna(train_df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train_df['Embarked'].fillna(train_df['Embarked'].mode()[0], inplace=True)


PassengerId    0
Survived       0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       0
dtype: int64

In [7]:
test_df.isnull().sum()

PassengerId      0
Pclass           0
Name             0
Sex              0
Age             86
SibSp            0
Parch            0
Ticket           0
Fare             1
Cabin          327
Embarked         0
dtype: int64

In [8]:
# Fill missing values of age with the mean
test_df['Age'].fillna(test_df['Age'].mean(), inplace=True)
# Drop cain column (too many missing values)
test_df.drop('Cabin', axis =1, inplace=True)
# Fill missing values of Fare with the mean
test_df['Fare'].fillna(test_df['Fare'].mean(), inplace=True)
test_df.isnull().sum()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['Age'].fillna(test_df['Age'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  test_df['Fare'].fillna(test_df['Fare'].mean(), inplace=True)


PassengerId    0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Embarked       0
dtype: int64

In [None]:
gender_df.isnull().sum()
gender_df.head()

NameError: name 'gender_df' is not defined

In [None]:
gender_df.describe()
gender_df.describe(include='all')

In [10]:
from dash import Dash, dcc, html, Input, Output
import plotly.express as px

In [11]:
# Initalize Dash app
app  = Dash(__name__)

In [12]:
# Layout for the dashboard
app.layout = html.Div([
    html.H1('Titanic dashboard', style={'textaAlign': 'center'}),
    dcc.Dropdown(
        id='dataset-dropdown',
        options=[
            {'label': 'Train Dataset', 'value': 'train'},
            {'label': 'Test Dataset', 'value': 'test'}
        ],
        value='train',
        placeholder='Select Datadset'
    ),
    dcc.Graph(id='age-distribution'),
    dcc.Graph(id='fare-distribution')
])

In [13]:
# Callbacks
@app.callback(
    Output('age-distribution', 'figure'),
    Input('dataset-dropdown', 'value')
)
def update_age_distribution(selected_dataset):
    data = train_df if selected_dataset == 'train' else test_df
    fig = px.histogram(data, x='Age', title='Age Distribution')
    return fig

In [14]:
# Callbacks
@app.callback(
    Output('fare-distribution', 'figure'),
    Input('dataset-dropdown', 'value')
)
def update_fare_distributions(selected_dataset):
    data = train_df if selected_dataset == 'train' else test_df
    fig = px.histogram(data, x='Fare', title='Fare Distribution')
    return fig

In [15]:
# Run the app
if __name__ == '__main__':
    app.run_server(debug=False)

[2025-01-12 18:45:06,679] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/home/jparep/miniconda3/envs/dash-env/lib/python3.11/site-packages/flask/app.py", line 1511, in wsgi_app
    response = self.full_dispatch_request()
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/jparep/miniconda3/envs/dash-env/lib/python3.11/site-packages/flask/app.py", line 919, in full_dispatch_request
    rv = self.handle_user_exception(e)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/jparep/miniconda3/envs/dash-env/lib/python3.11/site-packages/flask/app.py", line 917, in full_dispatch_request
    rv = self.dispatch_request()
         ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/jparep/miniconda3/envs/dash-env/lib/python3.11/site-packages/flask/app.py", line 902, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^