In [241]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

# EDA

## Read data

In [242]:
df = pd.read_csv('/kaggle/input/fifa-world-cup-winners/WorldCups - WorldCups.csv')

In [243]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 0 to 20
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Year            21 non-null     int64 
 1   Country         21 non-null     object
 2   Winner          21 non-null     object
 3   Runners-Up      21 non-null     object
 4   Third           21 non-null     object
 5   Fourth          21 non-null     object
 6   GoalsScored     21 non-null     int64 
 7   QualifiedTeams  21 non-null     int64 
 8   MatchesPlayed   21 non-null     int64 
dtypes: int64(4), object(5)
memory usage: 1.6+ KB


In [244]:
df

Unnamed: 0,Year,Country,Winner,Runners-Up,Third,Fourth,GoalsScored,QualifiedTeams,MatchesPlayed
0,1930,Uruguay,Uruguay,Argentina,USA,Yugoslavia,70,13,18
1,1934,Italy,Italy,Czechoslovakia,Germany,Austria,70,16,17
2,1938,France,Italy,Hungary,Brazil,Sweden,84,15,18
3,1950,Brazil,Uruguay,Brazil,Sweden,Spain,88,13,22
4,1954,Switzerland,Germany FR,Hungary,Austria,Uruguay,140,16,26
5,1958,Sweden,Brazil,Sweden,France,Germany FR,126,16,35
6,1962,Chile,Brazil,Czechoslovakia,Chile,Yugoslavia,89,16,32
7,1966,England,England,Germany FR,Portugal,Soviet Union,89,16,32
8,1970,Mexico,Brazil,Italy,Germany FR,Uruguay,95,16,32
9,1974,Germany,Germany FR,Netherlands,Poland,Brazil,97,16,38


## Replace Data
#### I replaced Germany FR with Germany because the Federal Republic of Germany was admitted to the United Nations on September 18, 1973. Through the accession of the German Democratic Republic (1990), the two German States united to form one sovereign State, Germany.

In [245]:
df = df.replace('Germany FR', 'Germany', regex=True)

df

Unnamed: 0,Year,Country,Winner,Runners-Up,Third,Fourth,GoalsScored,QualifiedTeams,MatchesPlayed
0,1930,Uruguay,Uruguay,Argentina,USA,Yugoslavia,70,13,18
1,1934,Italy,Italy,Czechoslovakia,Germany,Austria,70,16,17
2,1938,France,Italy,Hungary,Brazil,Sweden,84,15,18
3,1950,Brazil,Uruguay,Brazil,Sweden,Spain,88,13,22
4,1954,Switzerland,Germany,Hungary,Austria,Uruguay,140,16,26
5,1958,Sweden,Brazil,Sweden,France,Germany,126,16,35
6,1962,Chile,Brazil,Czechoslovakia,Chile,Yugoslavia,89,16,32
7,1966,England,England,Germany,Portugal,Soviet Union,89,16,32
8,1970,Mexico,Brazil,Italy,Germany,Uruguay,95,16,32
9,1974,Germany,Germany,Netherlands,Poland,Brazil,97,16,38


## Adding the latest data (World Cup 2022)

In [246]:
new_data = {
    'Year': [2022],
    'Country': ['Qatar'],
    'Winner': ['Argentina'],
    'Runners-Up': ['France'],
    'Third': ['Croatia'],
    'Fourth': ['Morocco'],
    'GoalsScored':[172],
    'QualifiedTeams':[32],
    'MatchesPlayed':[64]
}

df = df._append(pd.DataFrame(new_data), ignore_index=True)
df

Unnamed: 0,Year,Country,Winner,Runners-Up,Third,Fourth,GoalsScored,QualifiedTeams,MatchesPlayed
0,1930,Uruguay,Uruguay,Argentina,USA,Yugoslavia,70,13,18
1,1934,Italy,Italy,Czechoslovakia,Germany,Austria,70,16,17
2,1938,France,Italy,Hungary,Brazil,Sweden,84,15,18
3,1950,Brazil,Uruguay,Brazil,Sweden,Spain,88,13,22
4,1954,Switzerland,Germany,Hungary,Austria,Uruguay,140,16,26
5,1958,Sweden,Brazil,Sweden,France,Germany,126,16,35
6,1962,Chile,Brazil,Czechoslovakia,Chile,Yugoslavia,89,16,32
7,1966,England,England,Germany,Portugal,Soviet Union,89,16,32
8,1970,Mexico,Brazil,Italy,Germany,Uruguay,95,16,32
9,1974,Germany,Germany,Netherlands,Poland,Brazil,97,16,38


## Year-to-year statistics

In [247]:
fig = px.line(
              df, x='Year', y=['GoalsScored', 'QualifiedTeams', 'MatchesPlayed'],
              labels={'value': 'Total'},
              line_dash_sequence=['solid', 'dash', 'dot']
              )

fig.update_layout(
                  title='FIFA World Cup Stats (1930-2022)',
                  title_x=0.5,
                  template='plotly_dark'
                  )
fig.show()

## Which country has won the most?

In [248]:
most_win = df['Winner'].value_counts()

fig = px.bar(
             x=most_win.index,
             y=most_win.values,
             color=most_win.index,
             labels={'y': 'Number of Wins', 'x': 'Country'}
             )

fig.update_layout(
                  title='Fifa World Cup Wins (1930-2022)',
                  title_x=0.5,
                  xaxis_title='Country',
                  yaxis_title='Number of Wins',
                  template='plotly_dark'
                  )
fig.show()

## Which country has the highest frequency of participation in the finals?

In [249]:
finalists_df = pd.DataFrame({'Country': df[['Winner', 'Runners-Up']].stack(), 'Result': 'Finalist'})

finalists_counts = finalists_df['Country'].value_counts()

fig = px.bar(
             x=finalists_counts.index,
             y=finalists_counts.values,
             color=finalists_counts.index,
             labels={'y': 'Number of Finalist Appearances', 'x': 'Country'},
             title='Countries with Most Finalist Appearances (1930-2022)',
             template='plotly_dark'
            )

fig.update_layout(
                  title_x=0.5,
                  xaxis_title='Country',
                  yaxis_title='Number of Finalist Appearances'
                  )

fig.show()

# Thank You :)