In [11]:
import pandas as pd 
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [12]:
print(alt.data_transformers.active)

default


In [13]:
df = pd.read_csv("Breast_Cancer.csv")
df.head()

Unnamed: 0,Age,Race,Marital Status,T Stage,N Stage,6th Stage,differentiate,Grade,A Stage,Tumor Size,Estrogen Status,Progesterone Status,Regional Node Examined,Reginol Node Positive,Survival Months,Status
0,68,White,Married,T1,N1,IIA,Poorly differentiated,3,Regional,4,Positive,Positive,24,1,60,Alive
1,50,White,Married,T2,N2,IIIA,Moderately differentiated,2,Regional,35,Positive,Positive,14,5,62,Alive
2,58,White,Divorced,T3,N3,IIIC,Moderately differentiated,2,Regional,63,Positive,Positive,14,7,75,Alive
3,58,White,Married,T1,N1,IIA,Poorly differentiated,3,Regional,18,Positive,Positive,2,1,84,Alive
4,47,White,Married,T2,N1,IIB,Poorly differentiated,3,Regional,41,Positive,Positive,3,1,50,Alive


In [14]:
# Filter and prepare data
df_filtered = df[['Survival Months', '6th Stage']].copy()
df_filtered = df_filtered[df_filtered['6th Stage'].isin(['IIA', 'IIB', 'IIIA', 'IIIB', 'IIIC'])]

# Define a color scale for the stages
color_scale = alt.Scale(
    domain=['IIA', 'IIB', 'IIIA', 'IIIB', 'IIIC'],
    range=['blue', 'green', 'yellow', 'orange', 'red']
)

# Create a bar chart
chart = alt.Chart(df_filtered).mark_bar().encode(
    x=alt.X('6th Stage:N', sort=['IIA', 'IIB', 'IIIA', 'IIIB', 'IIIC'], title='Stage'),
    y=alt.Y('mean(Survival Months):Q', title='Average Survival Months'),
    color=alt.Color('6th Stage:N', scale=color_scale, legend=None),
    tooltip=['6th Stage', 'mean(Survival Months)']
).properties(
    title="Average Survival Months by Stage",
    width=600,
    height=400
)

chart


In [16]:
# Create a histogram for Age distribution
histogram = alt.Chart(df).mark_bar().encode(
    x=alt.X('Age:Q', bin=alt.Bin(maxbins=20), title='Age (Binned)'),  # Bin ages into intervals
    y=alt.Y('count():Q', title='Total Breast Cancer Cases'),
    tooltip=[
        alt.Tooltip('count():Q', title='Total Cases'),
        alt.Tooltip('Age:Q', bin=True, title='Age Range')
    ]
).properties(
    title="Histogram of Age vs Total Breast Cancer Cases",
    width=800,
    height=400
)

histogram

In [24]:
# Bar plot for total cases by race
bar_plot_cases = alt.Chart(df).mark_bar().encode(
    x=alt.X('Race:N', title='Race', sort='-y'),  # Categorical X-axis for Race
    y=alt.Y('count():Q', title='Total Cases'),  # Count the number of cases for each race
    color=alt.Color('Race:N', legend=None),  # Different colors for each Race
    tooltip=['Race', 'count():Q']  # Tooltip to show the race and total cases
).properties(
    title="Total Cases by Race",
    width=600,
    height=400
)

bar_plot_cases