In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
fifa_csv = "data/fifa_ranking_2020_11_26.csv"

fifa_df = pd.read_csv(fifa_csv)

fifa_df.head()

Unnamed: 0,id,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
0,43948,1,Germany,GER,57,0,0,UEFA,1992-12-31
1,43873,107,Mozambique,MOZ,9,0,0,CAF,1992-12-31
2,43816,108,Indonesia,IDN,9,0,0,AFC,1992-12-31
3,1882218,109,Antigua and Barbuda,ATG,8,0,0,CONCACAF,1992-12-31
4,43820,110,Jordan,JOR,8,0,0,AFC,1992-12-31


In [3]:
# Drop rows with any empty cells
fifa_df.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)

In [4]:
fifa_df.columns

Index(['id', 'rank', 'country_full', 'country_abrv', 'total_points',
       'previous_points', 'rank_change', 'confederation', 'rank_date'],
      dtype='object')

In [5]:
# Extract the following columns: "id", "rank", "country_full", "confederation", "rank_date"

fifa_data_df = fifa_df[["id", "rank", "country_full", "confederation", "rank_date"]]

fifa_data_df.head()

Unnamed: 0,id,rank,country_full,confederation,rank_date
0,43948,1,Germany,UEFA,1992-12-31
1,43873,107,Mozambique,CAF,1992-12-31
2,43816,108,Indonesia,AFC,1992-12-31
3,1882218,109,Antigua and Barbuda,CONCACAF,1992-12-31
4,43820,110,Jordan,AFC,1992-12-31


In [6]:
# # fifa_data_df = fifa_data_df[(fifa_data_df["rank_date"] >= 2015-1-1 | fifa_data_df["rank_date"] <= 2019-12-31)]
# from_date = '2015-01-1'
# to_date = '2019-12-31'

# fifa_data_clean = fifa_data_df[(fifa_data_df['rank_date'] >= from_date) | (fifa_data_df['rank_date'] <= to_date)]

In [7]:
# Clean up data from > 2015. 

fifa_data_clean2015 = fifa_data_df.loc[fifa_data_df["rank_date"] >= "2015-01-01", :]
fifa_data_clean2015

Unnamed: 0,id,rank,country_full,confederation,rank_date
48970,43961,67,Norway,UEFA,2015-01-08
48971,43910,75,Jamaica,CONCACAF,2015-01-08
48972,43908,73,Haiti,CONCACAF,2015-01-08
48973,43906,73,Guatemala,CONCACAF,2015-01-08
48974,43909,72,Honduras,CONCACAF,2015-01-08
...,...,...,...,...,...
62209,1930407,76,Curaçao,CONCACAF,2020-11-26
62210,43914,78,Panama,CONCACAF,2020-11-26
62211,43889,79,Uganda,CAF,2020-11-26
62212,43850,81,Cabo Verde,CAF,2020-11-26


In [8]:
# Clean up data from < 2019. 

fifa_data_clean = fifa_data_clean2015.loc[fifa_data_clean2015["rank_date"] <= "2019-12-31", :]
fifa_data_clean

Unnamed: 0,id,rank,country_full,confederation,rank_date
48970,43961,67,Norway,UEFA,2015-01-08
48971,43910,75,Jamaica,CONCACAF,2015-01-08
48972,43908,73,Haiti,CONCACAF,2015-01-08
48973,43906,73,Guatemala,CONCACAF,2015-01-08
48974,43909,72,Honduras,CONCACAF,2015-01-08
...,...,...,...,...,...
60739,43814,141,Hong Kong,AFC,2019-12-19
60740,43919,141,Suriname,CONCACAF,2019-12-19
60741,43980,141,Solomon Islands,OFC,2019-12-19
60742,43842,144,Yemen,AFC,2019-12-19


In [9]:
# Fifa unique countries on the list. 

country_count = len(fifa_data_clean["country_full"].unique())
country_count

211

In [10]:
# Fifa rank goes from 1-211 in some instances. Verified in Excel

rank_count = len(fifa_data_clean["rank"].unique())
rank_count

211

In [11]:
# Number of confederations in Fifa World Rankings

confederation_count = len(fifa_data_clean["confederation"].unique())
confederation_count

6

In [12]:
# Count & Name of every country that's been listed into the database.

df_country_groupby = fifa_data_clean.groupby("country_full")
df_country_nunique = df_country_groupby["country_full"].nunique()
df_country_nunique

country_full
Afghanistan       1
Albania           1
Algeria           1
American Samoa    1
Andorra           1
                 ..
Vietnam           1
Wales             1
Yemen             1
Zambia            1
Zimbabwe          1
Name: country_full, Length: 211, dtype: int64

AFC - Asian Football Confederation
CAF - Confederation of African Football
CONCACAF - Confederation of North, Central America and Caribbean Association Football
CONMEBOL - South American Football Confederation
OFC - Oceania Football Confederation
UEFA - Union of European Football Associations

In [13]:
# Number of national teams in each confederation. 

df_confederation_groupby = fifa_data_clean.groupby("confederation")
df_confederation_groupby.nunique()

Unnamed: 0_level_0,id,rank,country_full,confederation,rank_date
confederation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AFC,46,186,46,1,56
CAF,54,190,54,1,56
CONCACAF,35,200,35,1,56
CONMEBOL,10,88,10,1,56
OFC,11,89,11,1,56
UEFA,55,205,55,1,56


In [14]:
df_AFC_average_rank = fifa_data_clean[fifa_data_clean["confederation"] == "AFC"].mean()
print("AFC")
print(df_AFC_average_rank)
print("----------")

df_CAF_average_rank = fifa_data_clean[fifa_data_clean["confederation"] == "CAF"].mean()
print("CAF")
print(df_CAF_average_rank)
print("----------")

df_CONCACAF_average_rank = fifa_data_clean[fifa_data_clean["confederation"] == "CONCACAF"].mean()
print("CONCACAF")
print(df_CONCACAF_average_rank)
print("----------")

df_CONMEBOL_average_rank = fifa_data_clean[fifa_data_clean["confederation"] == "CONMEBOL"].mean()
print("CONMEBOL")
print(df_CONMEBOL_average_rank)
print("----------")

df_OFC_average_rank = fifa_data_clean[fifa_data_clean["confederation"] == "OFC"].mean()
print("OFC")
print(df_OFC_average_rank)
print("----------")

df_UEFA_average_rank = fifa_data_clean[fifa_data_clean["confederation"] == "UEFA"].mean()
print("UEFA")
print(df_UEFA_average_rank)
print("----------")

AFC
id      81921.760870
rank      131.996506
dtype: float64
----------
CAF
id      112328.796296
rank       106.419643
dtype: float64
----------
CONCACAF
id      200120.771429
rank       132.582653
dtype: float64
----------
CONMEBOL
id      43926.500000
rank       27.467857
dtype: float64
----------
OFC
id      207844.058824
rank       174.774510
dtype: float64
----------
UEFA
id      123117.298488
rank        64.958580
dtype: float64
----------


In [15]:
df_germany_average_rank = fifa_data_clean[fifa_data_clean["country_full"] == "Germany"].mean()
df_germany_average_rank

id      43948.000000
rank        5.160714
dtype: float64

In [16]:
df_france_average_rank = fifa_data_clean[fifa_data_clean["country_full"] == "France"].mean()
df_france_average_rank

id      43946.000000
rank        9.517857
dtype: float64

In [17]:
df_usa_average_rank = fifa_data_clean[fifa_data_clean["country_full"] == "USA"].mean()
df_usa_average_rank

id      43921.000
rank       26.625
dtype: float64

In [18]:
df_brazil_average_rank = fifa_data_clean[fifa_data_clean["country_full"] == "Brazil"].mean()
df_brazil_average_rank

id      43924.000000
rank        3.785714
dtype: float64

In [19]:
df_russia_average_rank = fifa_data_clean[fifa_data_clean["country_full"] == "Russia"].mean()
df_russia_average_rank

id      43965.000000
rank       45.178571
dtype: float64

In [20]:
df_india_average_rank = fifa_data_clean[fifa_data_clean["country_full"] == "India"].mean()
df_india_average_rank

id      43815.000000
rank      126.357143
dtype: float64

In [23]:
# Convert to .csv file
fifa_data_clean = fifa_data_clean.to_csv("data/fifa_data_clean.csv", index=True)