In [1]:
import pandas as pd
import numpy as np
import altair as alt
import warnings
alt.data_transformers.disable_max_rows()
warnings.filterwarnings('ignore')

In [3]:
f = r'C:\Users\Stern\Desktop\Netflix Dataset\netflix.csv'
df= pd.read_csv(f)
df["Year_Released"]=df["Year_Released"].astype('str')

In [4]:
chart = alt.Chart(df).mark_bar().encode(
    color=alt.Color('Genre',legend=None,
                    sort=['Drama','Comedy','Action','Family','Documentary','Other'],
                    scale=alt.Scale(domain=['Drama','Comedy','Action','Family','Documentary','Other'],
                                    range=['darkred', 'darkorange','dodgerblue','dimgrey','saddlebrown','black'])),
    x=alt.X('count()', title = ''),
    y=alt.Y('Genre', sort=['Drama','Comedy','Action','Family','Documentary','Other'], title = '')    
).configure_axis(
    labelFontSize=20,
    titleFontSize=20
).properties(
    title={
        "text": ["# of Netflix Movies by Genre"], 
        "subtitle": [""]
    },
    width=800,
    height=300
).configure_title(fontSize=24)

chart

In [58]:
alt.Chart(df).mark_boxplot().encode(
    color=alt.Color('Genre',legend=None,
                    sort=['Drama','Comedy','Action','Documentary','Family','Other'],
                    scale=alt.Scale(domain=['Drama','Comedy','Action','Documentary','Family','Other'],
                                    range=['darkred', 'darkorange','dodgerblue','dimgrey','saddlebrown','black'])),
    y=alt.Y('Genre', sort=['Drama','Comedy','Action','Documentary','Family','Other'], title = ''),  
    x=alt.X('Duration', title = 'Minutes')
).configure_axis(
    labelFontSize=18,
    titleFontSize=18
).properties(
    title={
        "text": ["Netflix Movie Duration by Genre"], 
        "subtitle": [""]
    },
    width=800,
    height=300
).configure_title(fontSize=24)

In [7]:
alt.Chart(df).transform_density(
    'Duration',
    as_=['Duration', 'density'],
    extent=[0,300],
    groupby=['Genre']
).mark_area(orient='horizontal').encode(
    y='Duration:Q',
    color=alt.Color('Genre',
                    sort=['Drama','Comedy','Action','Documentary','Family','Other'],
                    scale=alt.Scale(domain=['Drama','Comedy','Action','Documentary','Family','Other'],
                                    range=['darkred', 'darkorange','dodgerblue','dimgrey','saddlebrown','black'])),
    x=alt.X(
        'density:Q',
        stack='center',
        impute=None,
        title=None,
        axis=alt.Axis(labels=False, values=[0],grid=False, ticks=True),
    ),
    column=alt.Column(
        'Genre:N',
        header=alt.Header(
            titleOrient='bottom',
            labelOrient='bottom',
            labelPadding=0,
        ),
    )
).properties(
    title={
        "text": ["Netflix Movie Duration by Genre - Alternate Chart"], 
        "subtitle": [""]
    },
    width = 100
).configure_facet(
    spacing=0
).configure_view(
    stroke=None
)

In [19]:
scatter = alt.Chart(df).mark_circle(size=45).encode(
    color=alt.Color('Rating', legend=alt.Legend(),
                    sort=['G','PG','R','MA','NR'],
                    scale=alt.Scale(domain=['G', 'PG', 'R','MA','NR'],
                                    range=['forestgreen', 'darkolivegreen', 'indianred', 'crimson', 'black'])),
    x=alt.X('Duration:Q', title = 'Minutes'),
    y=alt.Y('IMDB_Rating:Q', title = 'IMDB Rating', scale=alt.Scale(domain=[0, 10])),
    tooltip=['Title', 'Year_Released','Primary_Genre','Rating', 'Duration', 'IMDB_Rating']
).properties(
    title={
        "text": ["NETFLIX MOVIES BY SELECTED GENRE"],
        "subtitle": ["Mouse scroll to zoom in/out - Select point for film details - Filter on MPAA rating using legend and genre using dropdown box"]
        },
    width=600,
    height=250
).interactive()

In [20]:
# Genre filter
ratings = df['Rating'].unique()
genres = df['Genre'].unique()

genre_dropdown = alt.binding_select(options=genres)
genre_select = alt.selection_single(fields=['Genre'], bind=genre_dropdown, name="Select the ", init={'Genre': 'Drama'})

legend_select = alt.selection_multi(fields=['Rating'], bind='legend')

filter__scatter_genres = scatter.add_selection(
    genre_select,
    legend_select
).transform_filter(
    genre_select
).transform_filter(
    legend_select
)

In [22]:

# Base chart for data tables
ranked_text = alt.Chart(df).mark_text().encode(
    y=alt.Y('row_number:O',axis=None)
).transform_window(
    row_number='row_number()'
).add_selection(
    genre_select
).transform_filter(
    genre_select
).transform_filter(
    legend_select    
).transform_window(
    rank='rank(IMDB_Rating)',
    sort=[alt.SortField(field='IMDB_Rating', order='descending')]
).transform_filter(
    (alt.datum.rank < 11)
)

# Data Tables
a = ranked_text.encode(text='Title').properties(title='')
b = ranked_text.encode(text='Duration').properties(title='Minutes')
text = alt.hconcat(a,b).properties(
    title={
        "text": [""],
        "subtitle": ["Try one of these top rated selections..."]
        })

# Build chart
alt.hconcat(
    filter__scatter_genres,
    text
).resolve_legend(
    color="independent"
).configure_view(
    strokeWidth=0
)