In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import warnings
import time
import scipy.stats as stats
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows',2000, 'display.max_columns',100)
World_Marathon = pd.read_csv("../input/world-marathons-majors/world_marathon_majors.csv",encoding= 'unicode_escape')

In [None]:
World_Marathon.head()

In [None]:
#Creating New Columns wrt to time
Winner_Time_in_Min = []
for i in World_Marathon['time']:
    time_obj = time.strptime(i, '%H:%M:%S')
    Winner_Time_in_Min.append(time_obj.tm_hour * 60 + time_obj.tm_min +time_obj.tm_sec/60)
World_Marathon['Winner_Time_in_Min'] = Winner_Time_in_Min

In [None]:
#Restructuring Columns
World_Marathon = World_Marathon[['year','marathon','winner','gender','country','time','Winner_Time_in_Min']]
World_Marathon.head()

In [None]:
World_Marathon.isna().sum()

## Overall Genderwise Winner Marathon

In [None]:
labels = World_Marathon['gender'].value_counts().keys().tolist()
values = World_Marathon['gender'].value_counts().tolist()

colors = ['Blue', 'Red']

fig = go.Figure(data=[go.Pie(labels=labels,
                             values=values)])
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=25,
                  marker=dict(colors=colors, line=dict(color='#000000', width=1)))
fig.update_layout(title_text='Overall Genderwise Winner Marathon ')
fig.show()


In [None]:
fig = px.histogram(World_Marathon, x="year",color="gender")
fig.update_layout(title_text='Yearly Marathon organized for Male/Female')
fig.show()

## Marathon Organized By Cities

In [None]:
fig = px.histogram(World_Marathon,x='marathon',color="marathon")
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Marathon Organized By Cities')
fig.show()


## Most Countries Winning Marathon

In [None]:
fig = px.histogram(World_Marathon,x='country')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Most Countries Winning Marathon')
fig.update_traces(marker_color='rgb(255,0,0)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6)
fig.show()


## Marathon Winner throughout the Years

In [None]:
fig = px.sunburst(World_Marathon, path=['marathon', 'gender', 'year','country','winner'])
fig.update_layout(
    title='Marathon Winner throughout the Years ')
fig.show()

## TIME

Let's Check the **time** taken by winners and its relation with other factors.

In [None]:
World_Marathon_male = World_Marathon[World_Marathon['gender']=='Male']
World_Marathon_male.reset_index(inplace = True)

World_Marathon_female = World_Marathon[World_Marathon['gender']=='Female']
World_Marathon_female.reset_index(inplace = True)

In [None]:
fig = px.box(World_Marathon_male, y="Winner_Time_in_Min")
fig.update_layout(
    title='Male Marathon Time in Minutes ')
fig.show()

Generally Male Marathon winners completed their Race 02hrs and 07 Minutes to 02hrs and 19minutes

In [None]:
fig = px.box(World_Marathon_female, y="Winner_Time_in_Min")
fig.update_layout(
    title='Female Marathon Time in Minutes ')
fig.show()

Generally Female Marathon winners completed their Race 02hrs and 23 Minutes to 02hrs and 30minutes

## MALE PARTICIPATION

In [None]:
World_Marathon_male = World_Marathon[World_Marathon['gender']=='Male']
World_Marathon_male.head()
World_Marathon_male.reset_index(inplace = True)

## Tokyo Men's Marathon and their Time

In [None]:
Tokyo_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='Tokyo']
Tokyo_marathon_men
Tokyo_marathon_men.reset_index(inplace = True) ##If you will not do this you will get inbound error

In [None]:
Tokyo_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='Tokyo']
fig = px.scatter(Tokyo_marathon_men, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Tokyo Men Marathon Winners with their Time')
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the Tokyo Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Tokyo_Record=Tokyo_marathon_men.iloc[Tokyo_marathon_men.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Tokyo_Record)
print("                                    ")
print('=================================================================================')

## Berlin Men's Marathon and their Time

In [None]:
Berlin_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='Berlin']
Berlin_marathon_men.reset_index(inplace = True) 
fig = px.scatter(Berlin_marathon_men, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_layout(
    title='Berlin Men Marathon Winners with their Time')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the Berlin Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Berlin_Record=Berlin_marathon_men.iloc[Berlin_marathon_men.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Berlin_Record)
print("                                    ")
print('=================================================================================')

## Boston Men's Marathon and their Time

In [None]:
Boston_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='Boston']
Boston_marathon_men.reset_index(inplace = True) 
fig = px.scatter(Boston_marathon_men, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Boston Men Marathon Winners with their Time')
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the Boston Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Boston_Record=Boston_marathon_men.iloc[Boston_marathon_men.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Boston_Record)
print("                                    ")
print('=================================================================================')

## NYC Men's Marathon and their Time

In [None]:
NYC_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='NYC']
NYC_marathon_men.reset_index(inplace = True) 

fig = px.scatter(Boston_marathon_men, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='NYC Men Marathon Winners with their Time')
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the NYC Marathon")
print('_________________________________________________________________________________')
print("                                    ")
NYC_Record=NYC_marathon_men.iloc[NYC_marathon_men.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(NYC_Record)
print("                                    ")
print('=================================================================================')

## London Men's Marathon and their Time

In [None]:
London_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='London']
London_marathon_men.reset_index(inplace = True) 

fig = px.scatter(London_marathon_men, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='London Men Marathon Winners with their Time')
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the Lonndon Marathon")
print('_________________________________________________________________________________')
print("                                    ")
London_Record=London_marathon_men.iloc[London_marathon_men.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(London_Record)
print("                                    ")
print('=================================================================================')

## Chicago Men's Marathon and their Time

In [None]:
Chicago_marathon_men = World_Marathon_male[World_Marathon_male['marathon']=='Chicago']
Chicago_marathon_men.reset_index(inplace = True) 

fig = px.scatter(London_marathon_men, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Chicago Men Marathon Winners with their Time')
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the Chicago Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Chicago_Record=Chicago_marathon_men.iloc[Chicago_marathon_men.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Chicago_Record)
print("                                    ")
print('=================================================================================')

From Above Analysis 

1. Every Men Marathon Record Belongs to **Kenya**
2. From above analysis of countrywise time Marathon winner, We can see **KENYA** is ruling Men's Marathon competition, and this guys are having the Best records of finishing Marathon in early time.
3. During earlier time of Marathon **USA** was the leader in Marathon Competition

## Most Marathon Male winner wrt City and Year

In [None]:
fig = px.bar(World_Marathon_male, x="winner", y="year", color="marathon", title="Marathon Winner")
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=10))
fig.update_layout(
    title='Most Marathon Male winner wrt City and Year')
fig.show()

#### BILL RODGERS from USA is one of the successful Marathon Runners Winning 8 Marathons

## Individual Male Winners with Countries

In [None]:
fig = px.treemap(World_Marathon_male, path=['winner'],hover_data=['winner','country'],color='country')
fig.update_layout(
    title='Statewise Urban Population and their Hospital Facilities for Covid19')
fig.update_layout(
    title='Individual Male Winners with Countries')
fig.show()

In [None]:
print("Male Marathon Winner with the Fastest Record to Win the Marathon")
print('_________________________________________________________________________________')
Fastest_Marathon_Runner=World_Marathon_male.iloc[World_Marathon_male.groupby(['gender'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Fastest_Marathon_Runner)
print("                                    ")
print('=================================================================================')
print('                                                                                 ')
print("Male Marathon Winner with the Slowest Record to Win the Marathon")
print('_________________________________________________________________________________')
Slowest_Marathon_Runner=World_Marathon_male.iloc[World_Marathon_male.groupby(['gender'])['Winner_Time_in_Min'].idxmax()][['year','marathon','winner','country','gender','time']]
print(Slowest_Marathon_Runner)
print("                                    ")
print('=================================================================================')

## FEMALE PARTICIPATION

In [None]:
World_Marathon_female = World_Marathon[World_Marathon['gender']=='Female']
World_Marathon_female.head()
World_Marathon_female.reset_index(inplace = True)

## Tokyo Female's Marathon and their Time

In [None]:
Tokyo_marathon_women = World_Marathon_female[World_Marathon_female['marathon']=='Tokyo']
Tokyo_marathon_women.reset_index(inplace = True)

fig = px.scatter(Tokyo_marathon_women, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Boston Women Marathon Winners with their Time')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the Tokyo Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Tokyo_Record=Tokyo_marathon_women.iloc[Tokyo_marathon_women.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Tokyo_Record)
print("                                    ")
print('=================================================================================')

## Boston Female's Marathon and their Time

In [None]:
Boston_marathon_women = World_Marathon_female[World_Marathon_female['marathon']=='Boston']
Boston_marathon_women.reset_index(inplace = True)

fig = px.scatter(Boston_marathon_women, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Boston Women Marathon Winners with their Time')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the Boston Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Boston_Record=Boston_marathon_women.iloc[Boston_marathon_women.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Boston_Record)
print("                                    ")
print('=================================================================================')

## Berlin Female's Marathon and their Time

In [None]:
Berlin_marathon_women = World_Marathon_female[World_Marathon_female['marathon']=='Berlin']
Berlin_marathon_women.reset_index(inplace = True)

fig = px.scatter(Berlin_marathon_women, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Boston Women Marathon Winners with their Time')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the Berlin Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Berlin_Record=Berlin_marathon_women.iloc[Berlin_marathon_women.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Berlin_Record)
print("                                    ")
print('=================================================================================')

## NYC Female's Marathon and their Time

In [None]:
NYC_marathon_women = World_Marathon_female[World_Marathon_female['marathon']=='NYC']
NYC_marathon_women.reset_index(inplace = True)

fig = px.scatter(NYC_marathon_women, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='NYC Women Marathon Winners with their Time')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the NYC Marathon")
print('_________________________________________________________________________________')
print("                                    ")
NYC_Record=NYC_marathon_women.iloc[NYC_marathon_women.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(NYC_Record)
print("                                    ")
print('=================================================================================')

## London Female's Marathon and their Time

In [None]:
London_marathon_women = World_Marathon_female[World_Marathon_female['marathon']=='London']
London_marathon_women.reset_index(inplace = True)

fig = px.scatter(London_marathon_women, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='NYC Women Marathon Winners with their Time')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the London Marathon")
print('_________________________________________________________________________________')
print("                                    ")
London_Record=London_marathon_women.iloc[London_marathon_women.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(London_Record)
print("                                    ")
print('=================================================================================')

## Chicago Female's Marathon and their Time

In [None]:
Chicago_marathon_women = World_Marathon_female[World_Marathon_female['marathon']=='Chicago']
Chicago_marathon_women.reset_index(inplace = True)

fig = px.scatter(Chicago_marathon_women, x="country", y="Winner_Time_in_Min", color = 'year')
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=14))
fig.update_layout(
    title='Chicago Women Marathon Winners with their Time')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the Chicago Marathon")
print('_________________________________________________________________________________')
print("                                    ")
Chicago_Record=Chicago_marathon_women.iloc[Chicago_marathon_women.groupby(['marathon'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Chicago_Record)
print("                                    ")
print('=================================================================================')

From Above Analysis 

1. Most Femen Marathon Record Belongs to **Kenya**
2. From above analysis of countrywise time Marathon winner, We can see **KENYA** and **Ethopia** are ruling Female's Marathon competition, and this guys are having the Best records of finishing Marathon in early time.
3. During earlier time of Marathon **USA** was the leader in Marathon Competition

## Most Marathon Female winner wrt City and Year

In [None]:
fig = px.bar(World_Marathon_female, x="winner", y="year", color="marathon", title="Marathon Winner")
fig.update_xaxes(categoryorder="total descending",tickangle=270, tickfont=dict(size=10))
fig.update_layout(
    title='Most Marathon Female winner wrt City and Year')
fig.show()

#### Grete Waitz is one of the successful Female Marathon Runners Winning 11 Marathons

## Individual Female Winners with Countries

In [None]:
fig = px.treemap(World_Marathon_female, path=['winner'],hover_data=['winner','country'],color='country')
fig.update_layout(
    title='Statewise Urban Population and their Hospital Facilities for Covid19')
fig.update_layout(
    title='Individual Female Winners with Countries')
fig.show()

In [None]:
print("Female Marathon Winner with the Fastest Record to Win the Marathon")
print('_________________________________________________________________________________')
Fastest_Marathon_Runner=World_Marathon_female.iloc[World_Marathon_female.groupby(['gender'])['Winner_Time_in_Min'].idxmin()][['year','marathon','winner','country','gender','time']]
print(Fastest_Marathon_Runner)
print("                                    ")
print('=================================================================================')
print('                                                                                 ')
print("Female Marathon Winner with the Slowest Record to Win the Marathon")
print('_________________________________________________________________________________')
Slowest_Marathon_Runner=World_Marathon_female.iloc[World_Marathon_female.groupby(['gender'])['Winner_Time_in_Min'].idxmax()][['year','marathon','winner','country','gender','time']]
print(Slowest_Marathon_Runner)
print("                                    ")
print('=================================================================================')

## Currently Working on it...If you liked this much Upvote it