<a href="https://colab.research.google.com/github/noONE17o/Data-Visualization-/blob/main/DataVisualizationipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Dataset link and import libraries**

In [2]:
from google.colab import drive
drive.mount('/content/drive')
path = 'drive/My Drive/Cloab Notebooks/'

Mounted at /content/drive


In [3]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/the_oscar_award.csv')

## **Number of Awards Over the Years**


In [4]:

awards_over_years = df[df['winner']].groupby("year_ceremony")['winner'].count().reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=awards_over_years['year_ceremony'],
    y=awards_over_years['winner'],
    mode='lines+markers',
    name='Awards',
    line=dict(color='blue', width=2),
    marker=dict(size=8, color='blue', symbol='circle')
))

fig.update_layout(
    title={
        'text': "Number of Awards Over the Years",
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title="Year",
    yaxis_title="Number of Awards",
    xaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=True,
        gridcolor='lightgrey',
        tickmode='linear',
        tick0=1928,
        dtick=5
    ),
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=True,
        gridcolor='lightgrey'
    ),
    plot_bgcolor='white',
    hovermode='x unified'
)

fig.update_traces(hoverinfo="text", text=awards_over_years['winner'])

fig.show()


# **Top** **Award**-**Winning** **Films**

In [7]:

top_winning_films = (
    df[df['winner']]
    .groupby('film')
    .agg(
        number_of_awards=('winner', 'count'),
        years_of_wins=('year_film', lambda x: ', '.join(map(str, sorted(x))))
    )
    .sort_values(by='number_of_awards', ascending=False)
    .head(10)
    .reset_index()
)

fig = go.Figure()

fig.add_trace(go.Bar(
    x=top_winning_films['film'],
    y=top_winning_films['number_of_awards'],
    text=top_winning_films['number_of_awards'],
    textposition='outside',
    marker=dict(color='green'),
    name='Awards',
    hovertext=top_winning_films['years_of_wins'],
    hoverinfo='text',
    width=0.4
))

fig.update_layout(
    title={
        'text': "Top Award-Winning Films",
        'x': 0.5,
        'xanchor': 'center'
    },
    xaxis_title="Film",
    yaxis_title="Number of Awards",
    xaxis=dict(
        showgrid=False,
        zeroline=False,
        showline=True,
        tickangle=45,
        tickfont=dict(size=10)
    ),
    yaxis=dict(
        showgrid=True,
        zeroline=False,
        showline=True,
        gridcolor='lightgrey',
        title_standoff=10
    ),
    plot_bgcolor='white',
    hovermode='x unified',
    height=600,
    width=1000
)

fig.show()


# **Who are the actors with the most Oscar wins?**


In [10]:


actor_actress_winners = df[(df['category'].str.contains('ACTOR|ACTRESS', case=False, na=False)) & (df['winner'] == True)]

top_actors_actresses = (
    actor_actress_winners.groupby(['name', 'category'])
    .size()
    .reset_index(name='wins')
    .sort_values(by='wins', ascending=False)
    .head(10)
)

categories_per_person = actor_actress_winners.groupby('name')['category'].apply(lambda x: ', '.join(sorted(x))).reset_index()
categories_per_person.rename(columns={'category': 'categories'}, inplace=True)

top_actors_actresses = top_actors_actresses.merge(categories_per_person, on='name', how='left')

top_actors_actresses['gender'] = top_actors_actresses['category'].apply(
    lambda x: 'Actress' if 'ACTRESS' in x.upper() else 'Actor'
)

top_actors_actresses = top_actors_actresses.sort_values(by='wins', ascending=False)

fig = px.bar(
    top_actors_actresses,
    x='wins',
    y='name',
    orientation='h',
    title='Top 10 Actors and Actresses with the Most Oscar Wins',
    labels={'wins': 'Number of Wins', 'name': 'Name', 'gender': 'Gender'},
    hover_data={'wins': True, 'categories': True},
    color='gender',
    color_discrete_map={'Actress': 'pink', 'Actor': 'blue'}
)

fig.update_traces(textposition='outside')
fig.update_layout(
    xaxis_title="Number of Wins",
    yaxis_title="Name",
    yaxis=dict(autorange="reversed"),
    plot_bgcolor="white",
    legend_title="Gender",
    height=400
)

fig.show()


# **Number of nominees per category**

In [13]:

nominee_count_by_category = (
    df.groupby('category')['name']
    .count()
    .sort_values(ascending=False)
    .reset_index()
    .rename(columns={'name': 'nominee_count'})
)

fig = px.bar(
    nominee_count_by_category,
    x='nominee_count',
    y='category',
    orientation='h',
    title='Number of Nominees per Category',
    labels={'nominee_count': 'Number of Nominees', 'category': 'Category'},
    color='nominee_count',
    color_continuous_scale='Viridis',
    text='nominee_count'
)

fig.update_traces(
    textposition='outside',
    marker_line_width=0.5,
)

fig.update_layout(
    height=1400,
    xaxis_title="Number of Nominees",
    yaxis_title="Category",
    yaxis=dict(
        tickmode="linear",
        automargin=True,
        tickfont=dict(size=10)
    ),
    plot_bgcolor="white",
    coloraxis_colorbar=dict(title="Nominees")
)

fig.show()


# **Nominees and Winners Percentage per Category**

In [15]:

category_counts = (
    df.groupby(['category', 'winner'])
    .size()
    .unstack(fill_value=0)
    .reset_index()
    .rename(columns={True: 'winners', False: 'nominees'})
)

category_counts['winner_percentage'] = (category_counts['winners'] / category_counts['nominees']) * 100

fig = go.Figure()

fig.add_trace(go.Bar(
    x=category_counts['nominees'],
    y=category_counts['category'],
    orientation='h',
    name='Total Nominees',
    marker=dict(color='blue', opacity=0.6),
    text=category_counts['nominees'],
    textposition='inside'
))

fig.add_trace(go.Bar(
    x=category_counts['winners'],
    y=category_counts['category'],
    orientation='h',
    name='Winners',
    marker=dict(color='orange', opacity=0.8),
    text=category_counts['winner_percentage'].apply(lambda x: f'{x:.1f}%'),
    textposition='outside'
))

fig.update_layout(
    title='Nominees and Winners Percentage per Category',
    xaxis_title='Number of Nominees',
    yaxis_title='Category',
    yaxis=dict(
        tickmode='linear',
        automargin=True,
        tickfont=dict(size=10)
    ),
    barmode='overlay',
    plot_bgcolor='white',
    height=1400
)

fig.show()


# **Percentage Distribution of Awards by Decades**

In [16]:

df['decade'] = (df['year_film'] // 10) * 10

decade_distribution = df['decade'].value_counts().reset_index()
decade_distribution.columns = ['decade', 'count']
decade_distribution = decade_distribution.sort_values(by='decade')

fig = px.pie(
    decade_distribution,
    values='count',
    names='decade',
    title='Percentage Distribution of Awards by Decades',
    hole=0.4,
    labels={'decade': 'Decade', 'count': 'Count'},
)

fig.update_traces(
    textinfo='percent+label',
    hoverinfo='label+value+percent'
)
fig.update_layout(
    title_x=0.5,
    showlegend=True,
    legend_title_text="Decades"
)

fig.show()
