In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()


import plotly.graph_objs as go
import plotly.express as px

 # Constructors Section

## Importing data, simple preliminary analysis

In [None]:
cons = pd.read_csv('../input/fia-f1-19502019-data/constructors_championship_1958-2020.csv')

In [None]:
cons.head()

In [None]:
cons.isna().sum()

In [None]:
cons.info()

### Check details about any Dataframe

In [None]:
def master_dataframe(dataframe):
    df_matadata = pd.DataFrame({'Datatype' : dataframe.dtypes, 
                                      "Total Elements": dataframe.count(),
                                      "Null Count": dataframe.isna().sum(), 
                                      "Null Percentage": round(dataframe.isna().sum()/len(dataframe)*100, 3), 
                                      'No Of Unique Values': dataframe.nunique()})
        
    return df_matadata

In [None]:
master_dataframe(cons)

### Check descriptive Analysis

In [None]:
cons.describe(include='all').T

### Get only those teams whose who have won a Grand Prix

In [None]:
grpx_w = cons
grpx_w["Position"] = pd.to_numeric(grpx_w["Position"], errors='coerce')
grpx_w = grpx_w[grpx_w["Position"] == 1]
grpx_w.head()

### Get top 10 teams who won the Grand Prix

In [None]:
top_ten_teams = grpx_w.groupby(by=['Team']).Position.count()
top_ten_teams = pd.DataFrame(data=top_ten_teams).reset_index()
top_ten_teams.rename(columns={'Position': 'Wins'}, inplace=True)
top_ten_teams.sort_values(by=['Wins'], ascending=False, inplace=True)
top_ten_teams = top_ten_teams.head(10)
top_ten_teams.sort_values(by=['Wins'], ascending=True, inplace=True)

fig, ax = plt.subplots(figsize = (20, 6))

plots=sns.barplot(ax=ax,
                  data=top_ten_teams, 
                  x='Team', 
                  y='Wins', 
                  palette='flare', 
                  saturation=0.5)



ax.set_title('Top 10 Teams That Won The Grand Prix', fontsize = 16)
ax.set_xlabel('Team', fontsize = 16)
ax.set_ylabel('Wins', fontsize = 16)
ax.set_xticklabels(top_ten_teams['Team'], rotation=-45, fontsize=12)




for bar in plots.patches:
    plots.annotate(text=bar.get_height(),
                   xy=(bar.get_x() + bar.get_width()/2, bar.get_height()),
                   size=15,
                   ha='center',
                   va='bottom',
                   xytext=(0,8),
                   textcoords="offset points")
    

 

In [None]:
fig = px.bar(data_frame=top_ten_teams, 
             x='Team', 
             y='Wins', 
             color='Wins', 
             height=500, 
             width=750)

fig.update_layout(title = {'text' : 'Teams with most Wins', 'x' : 0.5, 'y' : 0.95})

fig.show()

### Lets check the total points scored by Teams.

In [None]:
cons['Points'].unique()

In [None]:
cons['Points'] = pd.to_numeric(cons['Points'], errors='coerce')
total_points_by_team = pd.DataFrame(cons.groupby(by=['Team']).Points.sum()).reset_index()
total_points_by_team.sort_values(by=['Points'], ascending=True, inplace=True)
total_points_by_team = total_points_by_team.loc[lambda d : d['Points'].gt(100)]


fig = px.bar(data_frame=total_points_by_team, 
             x='Team', 
             y='Points', 
             color='Points',
             width=750,
             height=500)

fig.update_layout(title = {'text' : 'Points scored by Teams', 'x' : 0.5, 'y': 0.95})

fig.show()


### Lets see how many drivers from each team got the podium

In [None]:
teams_to_podium = cons.loc[:, ['Team', 'Position', 'Points']]
teams_to_podium['Position'] = pd.to_numeric(teams_to_podium['Position'], errors='coerce')
count_pod_by_team = teams_to_podium[['Points', 'Team']].groupby(by=['Team']).sum()
count_pod_by_team= teams_to_podium[teams_to_podium['Position'].isin([1,2,3])]

fig = px.treemap(data_frame=count_pod_by_team, 
                 path=['Team'], 
                 height=500)

fig.update_layout(title={'text': 'Number of Podiums per Team', 'x': 0.5})

fig.show()

### Lets check how the teams did in the last 2020 season

In [None]:
season_2020 = cons[cons['Year'] == 2020]
season_2020 = season_2020.groupby(by=['Team']).Points.sum()
season_2020 = pd.DataFrame(data=season_2020).reset_index()
season_2020.sort_values(by=['Points'], inplace=True)
#season_2020 = season_2020.loc[lambda d:d['Points'].gt(100)]
fig = px.bar(data_frame=season_2020, 
             x='Team', 
             y='Points', 
             color='Team', 
             height=500)

fig.update_layout(title = {'text': 'Team performance in 2020 season', 'x': 0.5})

fig.show()

### Lets check top four constructors achievments during years

In [None]:
top_four_teams = pd.DataFrame(cons.groupby(by=['Team']).Points.sum()).reset_index()
top_four_teams.sort_values(by=['Points'], ascending=False, inplace=True)
top_four_teams = top_four_teams['Team'].head(4)

df_ferrari = cons[cons['Team'] == 'Ferrari'].sort_values(by='Year', ascending=False)
df_Mercedes = cons[cons['Team'] == 'Mercedes'].sort_values(by='Year', ascending=False)
df_McLaren_Mercedes = cons[cons['Team'] == "McLaren Mercedes"].sort_values(by='Year', ascending=False)
df_Red_Bull = cons[cons['Team'] == "Red Bull Racing Renault"].sort_values(by='Year', ascending=False)

fig = make_subplots(rows=2, cols=2, x_title='Year', y_title='Points')

fig.add_trace(go.Scatter(x=df_ferrari['Year'], y=df_ferrari['Points'], name='Ferrari'), 
                         row = 1, 
                         col = 1)

fig.add_trace(go.Scatter(x=df_Mercedes['Year'], y=df_Mercedes['Points'], name='Mercedes'), 
                         row = 1, 
                         col = 2, )

fig.add_trace(go.Scatter(x=df_McLaren_Mercedes['Year'], y=df_McLaren_Mercedes['Points'], name='McLaren Mercedes'), 
                         row = 2, 
                         col = 1)

fig.add_trace(go.Scatter(x=df_Red_Bull['Year'], y=df_Red_Bull['Points'], name = 'Red Bull Racing Renault'), 
                         row = 2, 
                         col = 2)


fig.update_layout(title = {'text': 'Top 4 Constructors', 'x': 0.5})

fig.show()

# Drivers Section

In [None]:
drivers = pd.read_csv('../input/fia-f1-19502019-data/drivers_championship_1950-2020.csv')
drivers.head()

In [None]:
master_dataframe(drivers)

In [None]:
drivers.info()

In [None]:
drivers.describe(include='all').T

In [None]:
drivers.isna().sum()

### Checking which driver have won the biggest amount of championships and points

In [None]:
champ = pd.DataFrame(data=drivers.groupby(by=['Name']).Points.sum()).reset_index()
champ.sort_values(by=['Points'], ascending=False, inplace=True)
champ = champ.head(20)
champ.sort_values(by=['Points'], ascending=True, inplace=True)

fig = px.bar(data_frame=champ, 
             x='Name', 
             y='Points', 
             color='Points', 
             width=800, 
             height = 500)

fig.update_layout(title = {'text': 'Drivers with most Championship Points', 'x': 0.5})

fig.show()

In [None]:
most_wins = drivers
most_wins['Position'] = pd.to_numeric(most_wins['Position'], errors='coerce')
most_wins = most_wins[most_wins['Position'] == 1]
most_wins = pd.DataFrame(data=most_wins.groupby(by=['Name']).Position.sum()).reset_index()
most_wins.rename(columns={'Position': 'Number Of Wins'}, inplace=True)
most_wins.sort_values(by=['Number Of Wins'], ascending=False, inplace=True)
most_wins = most_wins.head(10)
most_wins.sort_values(by=['Number Of Wins'], ascending=True, inplace=True)

fig = px.bar(data_frame=most_wins, 
             x='Name', 
             y='Number Of Wins', 
             color='Number Of Wins', 
             width=800, 
             height = 500)

fig.update_layout(title = {'text': 'Drivers with most Wins', 'x': 0.5})

fig.show()

### Lets see how many drivers from each country got the podium

In [None]:
podium_wins_country = drivers.loc[:,['Name', 'Nationality', 'Position', 'Points']]
podium_wins_country['Position'] = pd.to_numeric(podium_wins_country['Position'], errors='coerce')
podium_wins_country = podium_wins_country[podium_wins_country['Position'].isin([1,2,3])]

fig = px.treemap(podium_wins_country, 
                 path=['Nationality'])

fig.update_layout(title = {'text': 'Podiums won by Countries', 'x': 0.5})

fig.show()

### Lets check how the drivers did in the last 2020 season.

In [None]:
season_2020 = drivers.loc[:,['Name', 'Year', 'Points']]
season_2020 = season_2020[season_2020['Year'] == 2020]
season_2020 = pd.DataFrame(data=season_2020.groupby(by=['Name']).Points.sum()).reset_index()
season_2020.sort_values(by=['Points'], ascending=False, inplace=True)
season_2020 = season_2020.head(20)
season_2020.sort_values(by=['Points'], ascending=True, inplace=True)

fig = px.bar(data_frame=season_2020, 
             x='Name', 
             y='Points',
             color='Points',
             height=500, 
             width=800)

fig.update_layout(title = {'text': 'Top Drivers of 2020', 'x': 0.5})

fig.show()

In [None]:
fig = px.histogram(data_frame=drivers, x='Points', opacity=0.8, )

fig.show()