# Kpop Dashboard Project

## Import libraries

In [None]:
from dash import Dash, dcc, html, Output, Input
import dash_bootstrap_components as dbc
import pandas as pd
# from plotly_calplot import calplot
import plotly.express as px
from summarytools import dfSummary

## Exploratory data analysis

### Load dataset

In [None]:
# Data retrieved from https://www.kaggle.com/datasets/nicolsalayoarias/all-kpop-idols/?select=kpopidolsv3.csv
data = pd.read_csv('kpopidolsv3.csv')
data.head()

### Data cleaning

In [None]:
# Extract required columns
df = data[["Full Name", "Group", "Former Group", "Date of Birth", "Debut", "Country", "Gender"]]

dfSummary(df)

In [None]:
# Drop rows where Full Name is missing (unable to identify) and Debut is filled with a placeholder date (not yet debuted)
df = df.loc[df["Full Name"].isnull() == False]
df = df.loc[df["Debut"] != "0/01/1900"]

# # Convert columns to required datatypes
df = df.astype({"Full Name": str, "Group": str, "Former Group": str, "Country": str, "Gender": str})
df["Date of Birth"] = pd.to_datetime(df["Date of Birth"], format = "%d/%m/%Y")
df["Debut"] = pd.to_datetime(df["Debut"], format = "%d/%m/%Y")

df.dtypes

In [None]:
dfSummary(df)

### Feature engineering

In [None]:
# Add new columns
df["Debut Year"] = df["Debut"].dt.year
df["Debut Month"] = df["Debut"].dt.month
df["Birth Year"] = df["Date of Birth"].dt.year
df["Birth Month"]  =df["Date of Birth"].dt.month
df["Debut Age"] = df["Debut Year"] - df["Birth Year"]

In [None]:
# Convert data types of the new columns
# Fill Nan values in the new columns with 0 in order to allow conversion to int
df[["Debut Year", "Debut Month", "Birth Year", "Birth Month", "Debut Age"]] = df[["Debut Year", "Debut Month", "Birth Year", "Birth Month", "Debut Age"]].fillna(0).astype(int)

df.dtypes

## Data visualisation

### Idol birthday analysis

1. Calendar plot: idol birthday distribution

2. Histogram plot: idol birth year distribution (all / male / female)

3. Histogram plot: idol birth month distribution (all / male / female)

### Debut analysis

1. Calendar plot: debut anniversary distribution

2. Histogram plot: debut anniversary distribution by month

3. Bubble plot: debut age distribution (all / male / female)

4. Bar plot: number of debuted idols per year

5. Bar plot: number of debuted groups per year

### Meta-analysis

1. Pie chart: distribution of groups by gender

2. Bar plot: number of members per group

3. Pie chart: idol's country of origin distribution

## Dashboard

### Dash app setup

In [None]:
app = Dash(__name__, external_stylesheets=[dbc.themes.VAPOR])

### Dashboard layout and callback

### Run dashboard 

In [None]:
if __name__ == '__main__':
    app.run_server(debug=True)