## __Instalasi Plotly__

In [124]:
pip install plotly

Note: you may need to restart the kernel to use updated packages.


## __Impor Modul__

In [125]:
import plotly.graph_objects as go
import numpy as np 
import pandas as pd

## __Load Dataset Insurance__

In [126]:
df = pd.read_csv('insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## __Data Visualisasi dengan Plotly Graph Object__

### __1. Distribusi Usia (Pie Chart)__

In [127]:
# 1. Distribusi Usia (Pie Chart)
age_bins = [0, 18, 30, 40, 50, 60, 100]
age_labels = ['0-18', '19-30', '31-40', '41-50', '51-60', '61+']
df['age_group'] = pd.cut(df['age'], bins=age_bins, labels=age_labels, right=False)
age_distribution = df['age_group'].value_counts()
fig = go.Figure(data=[go.Pie(labels=age_distribution.index, values=age_distribution.values)])
fig.update_layout(title='Distribusi Usia')
fig.show()

### __2. Distribusi Usia dan Jenis Kelamin berdasarkan kebiasaan merokok (stacked bar chart)__

In [128]:
# 2. Distribusi Usia dan Jenis Kelamin berdasarkan kebiasaan merokok (stacked bar chart)
# Menghitung frekuensi untuk stacked bar chart
df_grouped = df.groupby(['age', 'sex', 'smoker']).size().reset_index(name='count')
fig2 = go.Figure()

# Menambahkan trace untuk setiap kategori Sex dan Smoker
for sex in df_grouped['sex'].unique():
    df_sex = df_grouped[df_grouped['sex'] == sex]
    for smoker_status in df_sex['smoker'].unique():
        df_smoker = df_sex[df_sex['smoker'] == smoker_status]
        fig2.add_trace(go.Bar(
            x=df_smoker['age'],
            y=df_smoker['count'],
            name=f'{sex} - {smoker_status}',
            hoverinfo='x+y+name'
        ))
fig2.update_layout(
    title='Distribusi Usia berdasarkan Jenis Kelamin dan Kebiasaan Merokok',
    xaxis_title='Usia',
    yaxis_title='Frekuensi',
    barmode='stack')
fig2.show()

### __3. Distribusi BMI (Histogram)__

In [129]:
# 3.Distribusi BMI (Histogram)
fig3 = go.Figure()
fig3.add_trace(go.Histogram(
    x=df['bmi'],
    name='BMI',
    opacity=0.75,
    marker=dict(line=dict(width=0.5)),  # Garis tepi pada bar
))
fig3.update_layout(
    title='Distribusi BMI',
    xaxis_title='BMI',
    yaxis_title='Jumlah',
)
fig3.show()

### __4. Rata-rata biaya berdasarkan status merokok (Bar Chart)__

In [130]:
# 4. Rata-rata biaya berdasarkan status merokok (Bar Chart)
avg_charges_smoker = df.groupby('smoker')['charges'].mean().reset_index()
fig4 = go.Figure()
fig4.add_trace(go.Bar(x=avg_charges_smoker['smoker'], y=avg_charges_smoker['charges']))
fig4.update_layout(title='Rata-rata Biaya Berdasarkan Status Merokok', xaxis_title='Status Merokok', yaxis_title='Rata-rata Biaya')
fig4.show()

### __5. Biaya berdasarkan jumlah anak (Box Plot)__

In [None]:
# 5. Biaya berdasarkan jumlah anak (Box Plot)
fig5 = go.Figure()
fig5.add_trace(go.Box(x=df['children'], y=df['charges']))
fig5.update_layout(title='Biaya Berdasarkan Jumlah Anak', xaxis_title='Jumlah Anak', yaxis_title='Biaya')
fig5.show()

### __6. Rata-rata biaya (charges) per region (Line chart)__

In [132]:
# 6. Rata-rata biaya (charges) per region (Line chart)
average_charges = df.groupby('region')['charges'].mean().reset_index()
fig6 = go.Figure()
fig6.add_trace(go.Scatter(
    x=average_charges['region'],
    y=average_charges['charges'],
    mode='lines+markers',
    name='Rata-rata Charges',
    line=dict(shape='linear'),
    marker=dict(size=8)
))
fig6.update_layout(
    title='Rata-rata Charges Berdasarkan Region',
    xaxis_title='Region',
    yaxis_title='Rata-rata Charges',
)
fig6.show()

### __7. Rata-rata charges untuk setiap kombinasi region dan smoker (Heatmap)__

In [None]:
# 7. Rata-rata charges untuk setiap kombinasi region dan smoker (Heatmap)
heatmap_data = df.groupby(['region', 'smoker'])['charges'].mean().unstack()
fig7 = go.Figure(data=go.Heatmap(
    z=heatmap_data.values,
    x=heatmap_data.columns,
    y=heatmap_data.index,
    colorscale='Plasma'
))
fig7.update_layout(
    title='Rata-rata Charges berdasarkan Region dan Status Merokok',
    xaxis_title='Status Merokok',
    yaxis_title='Region',
)
fig7.show()

### __8. Usia Vs Biaya (scatter plot)__

In [134]:
# 8. Usia Vs Biaya (scatter plot)
fig8 = go.Figure()
fig8.add_trace(go.Scatter(
    x=df['age'],
    y=df['charges'],
    mode='markers',
    marker=dict(
        size=10,
        color=df['bmi'],  # Warna berdasarkan BMI
        colorscale='Plasma',  # Skala warna
        colorbar=dict(title='BMI'),  # Menambahkan color bar
        showscale=True
    ),
))
fig8.update_layout(
    title='Scatter Plot: Usia vs Biaya',
    xaxis_title='Usia',
    yaxis_title='Biaya',
)
fig8.show()

### __9. Total Biaya per Usia (Area Chart)__

In [135]:
# 9. Total Biaya per Usia (Area Chart)
total_charges = df.groupby('age')['charges'].sum().reset_index()
fig9 = go.Figure()
fig9.add_trace(go.Scatter(
    x=total_charges['age'],
    y=total_charges['charges'],
    mode='lines',
    fill='tozeroy',  # Mengisi area di bawah garis
    name='Total Charges',
    line=dict(color='Blue'),
    marker=dict(size=8)
))
fig9.update_layout(
    title='Area Chart: Total Charges per Age',
    xaxis_title='Age',
    yaxis_title='Total Charges',
)
fig9.show()

### __10. Usia vs Biaya berdasarkan Region dan Status Merokok (Bubble Chart)__

In [136]:
# 10. Usia vs Biaya berdasarkan Region dan Status Merokok (Bubble Chart)
fig10 = go.Figure()
fig10.add_trace(go.Scatter(
    x=df['age'],
    y=df['charges'],
    mode='markers',
    marker=dict(
        size=df['children'] * 10,  # Ukuran bubble berdasarkan jumlah anak, dikalikan untuk memperbesar tampilan
        color=df['smoker'].map({'yes': 'yellow', 'no': 'blue'}),  # Warna berdasarkan status merokok
        opacity=0.5,
        line=dict(width=1, color='DarkSlateGrey')
    ),
    text=df['region'],  # Menampilkan region saat hover
    name='Data Points'
))
fig10.update_layout(
    title='Bubble Chart: Age vs Charges',
    xaxis_title='Age',
    yaxis_title='Charges',
    showlegend=False  # Menyembunyikan legend karena sudah ada informasi di hover
)
fig10.show()

### __11. Biaya berdasarkan Usia dan BMI (Surface Plot)__

In [137]:
# 11. Biaya berdasarkan Usia dan BMI (Surface Plot)
# Membuat grid untuk surface plot
age_range = np.linspace(df['age'].min(), df['age'].max(), 100)
bmi_range = np.linspace(df['bmi'].min(), df['bmi'].max(), 100)
age_grid, bmi_grid = np.meshgrid(age_range, bmi_range)

# Menghitung charges berdasarkan interpolasi (andaikan linear)
charges_grid = np.zeros_like(age_grid)

for i in range(len(age_range)):
    for j in range(len(bmi_range)):
        # Menggunakan nilai terdekat untuk interpolasi
        closest = ((df['age'] - age_range[i])**2 + (df['bmi'] - bmi_range[j])**2).idxmin()
        charges_grid[j, i] = df['charges'].iloc[closest]

# Membuat surface plot
fig11 = go.Figure(data=[go.Surface(z=charges_grid, x=age_grid, y=bmi_grid)])
fig11.update_layout(
    title='Surface Plot: Charges based on Age and BMI',
    scene=dict(
        xaxis_title='Age',
        yaxis_title='BMI',
        zaxis_title='Charges'
    )
)
fig11.show()

### __12. 3D Scatter Plot: Age, BMI, and Charges__

In [138]:
# 12. 3D Scatter Plot: Age, BMI, and Charges
fig12 = go.Figure()
fig12.add_trace(go.Scatter3d(
    x=df['age'],
    y=df['bmi'],
    z=df['charges'],
    mode='markers',
    marker=dict(
        size=10,
        color=df['smoker'].map({'yes': 'yellow', 'no': 'blue'}),  # Warna berdasarkan status merokok
        opacity=0.7,
        line=dict(width=1)
    ),
    text=df['region'],  # Menampilkan region saat hover
))
fig12.update_layout(
    title='3D Scatter Plot: Age, BMI, and Charges',
    scene=dict(
        xaxis_title='Age',
        yaxis_title='BMI',
        zaxis_title='Charges'
    )
)
fig12.show()

### __13. Stages Participants, total peserta berdasarkan kategori Status Merokok dan Jumlah Anak (Funnel Chart)__

In [139]:
# 13. Stages Participants, total peserta berdasarkan kategori Status Merokok dan Jumlah Anak (Funnel Chart)
# Menghitung jumlah berdasarkan kategori
total_count = len(df)
smoker_count = df['smoker'].value_counts().get('yes', 0)
non_smoker_count = df['smoker'].value_counts().get('no', 0)

# Menghitung total berdasarkan jumlah anak
children_counts = df['children'].value_counts().sort_index()

# Menyiapkan data untuk funnel chart
stages = ['Total Participants', 'Smokers', 'Non-Smokers', 'Participants with 1 Child', 'Participants with 2+ Children']
values = [total_count, smoker_count, non_smoker_count, children_counts.get(1, 0), children_counts[2:].sum()]

fig13 = go.Figure()
fig13.add_trace(go.Funnel(
    name='Funnel Chart',
    y=stages,
    x=values,
    textinfo='value+percent initial',  # Menampilkan nilai dan persentase
))
fig13.update_layout(
    title='Funnel Chart: Participant Stages',
)
fig13.show()

In [140]:
layout = go.Layout(margin=go.layout.Margin(t=10, b=10, r=10, l=10))
fig = go.Figure(layout=layout)