In [None]:
# !pip3 install -r requirements.txt
# !pip3 install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip

In [196]:
import pandas as pd
import streamlit
import altair as alt
from vega_datasets import data
# import pandas_profiling as pp

In [197]:
# Load in the datasets
fifa = pd.read_csv('fifa19.csv')
gps = pd.read_csv('worldcities.csv')

In [198]:
# Add new columns in FIFA 19 and GPS datasets to match player's countries of origin (nationalities)
fifa['Nationality Country'] = fifa['Nationality'].copy()
gps['Nationality Country'] = gps['country'].copy()

In [199]:
# Count of nationalities that we don't have country gps data for
nationalities = fifa['Nationality Country'].unique()
countries = gps['Nationality Country'].unique()
print(len(set(nationalities)-set(countries)))

26


In [200]:
# Replace country names in GPS dataset based on geographically equivalent but differently named country in FIFA 19 dataset
gps['Nationality Country'] = gps['Nationality Country'].replace('Congo (Kinshasa)', 'DR Congo')
gps['Nationality Country'] = gps['Nationality Country'].replace('Congo (Brazzaville)', 'Congo')
gps['Nationality Country'] = gps['Nationality Country'].replace('Gambia, The', 'Gambia')
gps['Nationality Country'] = gps['Nationality Country'].replace('Sao Tome And Principe', 'São Tomé & Príncipe')
gps['Nationality Country'] = gps['Nationality Country'].replace('Antigua And Barbuda', 'Antigua & Barbuda')
gps['Nationality Country'] = gps['Nationality Country'].replace('Czechia', 'Czech Republic')
gps['Nationality Country'] = gps['Nationality Country'].replace('Korea, South', 'Korea Republic')
gps['Nationality Country'] = gps['Nationality Country'].replace('Korea, North', 'Korea DPR')
gps['Nationality Country'] = gps['Nationality Country'].replace('China', 'China PR')
gps['Nationality Country'] = gps['Nationality Country'].replace('Cabo Verde', 'Cape Verde')
gps['Nationality Country'] = gps['Nationality Country'].replace('Ireland', 'Republic of Ireland')
gps['Nationality Country'] = gps['Nationality Country'].replace('Saint Kitts And Nevis', 'Saint Kitts & Nevis')
gps['Nationality Country'] = gps['Nationality Country'].replace('Trinidad And Tobago', 'Trinidad & Tobago')
gps['Nationality Country'] = gps['Nationality Country'].replace('Bosnia And Herzegovina', 'Bosnia & Herzegovina')

In [201]:
# Replace the 4 countries that make up the UK with 'United Kingdom' (FIFA => GPS)
fifa['Nationality Country'] = fifa['Nationality Country'].replace('England', 'United Kingdom')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Scotland', 'United Kingdom')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Wales', 'United Kingdom')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Northern Ireland', 'United Kingdom')

In [202]:
# Replace country names in FIFA 19 dataset based on geographically equivalent but differently named country in GPS dataset
fifa['Nationality Country'] = fifa['Nationality Country'].replace('FYR Macedonia', 'Macedonia')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Central African Rep.', 'Central African Republic')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Guinea Bissau', 'Guinea-Bissau')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Curacao', 'Curaçao')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Bosnia Herzegovina', 'Bosnia & Herzegovina')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('St Kitts Nevis', 'Saint Kitts & Nevis')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('St Lucia', 'Saint Lucia')
fifa['Nationality Country'] = fifa['Nationality Country'].replace('Ivory Coast', 'Côte D’Ivoire')

In [204]:
# Count of nationalities that we don't have country gps data for
nationalities = fifa['Nationality Country'].unique()
countries = gps['Nationality Country'].unique()
x = set(nationalities)-set(countries)
print(len(x))
x

2


{'Montserrat', 'Palestine'}

In [233]:
# Get average latitude and longitude for each country in GPS dataset
country_avg_gps = gps.groupby('Nationality Country').agg({'lat': ['mean'], 'lng': ['mean']})
country_avg_gps = country_avg_gps.reset_index()

gps_avg = pd.DataFrame({'Nationality Country': country_avg_gps['Nationality Country'],
                        'Lat': country_avg_gps['lat']['mean'],
                        'Lng': country_avg_gps['lng']['mean']})

In [251]:
# Manually add GPS info for Montserrat and Palestine based on Google search result 
gps_avg = gps_avg.append({'Nationality Country': 'Montserrat', 'Lat': 16.7425, 'Lng': -62.1874}, ignore_index=True)
gps_avg = gps_avg.append({'Nationality Country': 'Palestine', 'Lat': 31.9522, 'Lng': 35.2332}, ignore_index=True)

In [255]:
# Save updated dfs to csv files
# fifa.to_csv('clean_fifa.csv')
# gps_avg.to_csv('clean_worldcities.csv')

In [267]:
# Create world map
# source: https://altair-viz.github.io/gallery/world_map.html

# Data generators for the background
sphere = alt.sphere()
graticule = alt.graticule()

# Source of land data
source = alt.topo_feature(data.world_110m.url, 'countries')

# Layering and configuring the components
background = alt.layer(
    alt.Chart(sphere).mark_geoshape(fill='lightblue'),
    alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.2),
    alt.Chart(source).mark_geoshape(fill='lightgray', stroke='black')
).project(
    'equirectangular'
).properties(width=600, height=400).configure_view(stroke=None)

hover = alt.selection(type='single', on='mouseover', nearest=True, fields=['Lat', 'Lng'])

base = alt.Chart(gps_avg).encode(
    longitude='Lng:Q',
    latitude='Lat:Q'
)

text = base.mark_text(dy=-5, align='right').encode(
    alt.Text('Nationality Country', type='nominal'),
    opacity=alt.condition(~hover, alt.value(0), alt.value(1))
)

points = base.mark_point().encode(
    color=alt.value('black'),
    size=alt.condition(~hover, alt.value(30), alt.value(100))
).add_selection(hover)

background + points + text

In [238]:
fifa_and_gps = fifa.merge(gps_avg, how='left', left_on='Nationality Country', right_on='Nationality Country')
fifa_and_gps 

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause,Nationality Country,Lat,Lng
0,0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,...,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M,Argentina,-34.305020,-62.605795
1,1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,...,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M,Portugal,39.585171,-9.315866
2,2,190871,Neymar Jr,26,https://cdn.sofifa.org/players/4/19/190871.png,Brazil,https://cdn.sofifa.org/flags/54.png,92,93,Paris Saint-Germain,...,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M,Brazil,-18.886964,-47.965362
3,3,193080,De Gea,27,https://cdn.sofifa.org/players/4/19/193080.png,Spain,https://cdn.sofifa.org/flags/45.png,91,93,Manchester United,...,13.0,90.0,85.0,87.0,88.0,94.0,€138.6M,Spain,39.541228,-3.773221
4,4,192985,K. De Bruyne,27,https://cdn.sofifa.org/players/4/19/192985.png,Belgium,https://cdn.sofifa.org/flags/7.png,91,92,Manchester City,...,51.0,15.0,13.0,5.0,10.0,13.0,€196.4M,Belgium,50.694714,4.416038
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18202,18202,238813,J. Lundstram,19,https://cdn.sofifa.org/players/4/19/238813.png,England,https://cdn.sofifa.org/flags/14.png,47,65,Crewe Alexandra,...,47.0,10.0,13.0,7.0,8.0,9.0,€143K,United Kingdom,52.751392,-1.663178
18203,18203,243165,N. Christoffersson,19,https://cdn.sofifa.org/players/4/19/243165.png,Sweden,https://cdn.sofifa.org/flags/46.png,47,63,Trelleborgs FF,...,19.0,10.0,9.0,9.0,5.0,12.0,€113K,Sweden,59.352114,15.446151
18204,18204,241638,B. Worman,16,https://cdn.sofifa.org/players/4/19/241638.png,England,https://cdn.sofifa.org/flags/14.png,47,67,Cambridge United,...,11.0,6.0,5.0,10.0,6.0,13.0,€165K,United Kingdom,52.751392,-1.663178
18205,18205,246268,D. Walker-Rice,17,https://cdn.sofifa.org/players/4/19/246268.png,England,https://cdn.sofifa.org/flags/14.png,47,66,Tranmere Rovers,...,27.0,14.0,6.0,14.0,8.0,9.0,€143K,United Kingdom,52.751392,-1.663178


In [63]:
# Get average age, overall rating, and potential rating for each country of player origin
nationality_avg = df.groupby('Nationality').mean().reset_index()[["Nationality", "Age", "Overall", "Potential"]]

In [64]:
alt.Chart(nationality_avg).mark_point().encode(
    x='Age',
    y='Overall',
    color='Nationality',
    tooltip=['Nationality', 'Age', 'Overall', 'Potential']
).interactive()

In [65]:
# Count number of players of each nationality
nationality_counts = df.groupby('Nationality')['Nationality'].count()
nationality_counts

Nationality
Afghanistan      4
Albania         40
Algeria         60
Andorra          1
Angola          15
              ... 
Uzbekistan       2
Venezuela       67
Wales          129
Zambia           9
Zimbabwe        13
Name: Nationality, Length: 164, dtype: int64

In [60]:
stadiums_df = pd.read_csv("stadiums-with-GPS-coordinates.csv")

In [61]:
teams = set(stadiums['Team'])
clubs = set(df['Club'])
print("Teams without Clubs: %d" % len(teams-clubs))
print("Clubs without Teams: %d" % len(clubs-teams))
print("Missing pairs: %d" % len(teams ^ clubs))

Teams without Clubs: 136
Clubs without Teams: 652
Missing pairs: 788


In [5]:
# Create world map
# source: https://altair-viz.github.io/gallery/world_map.html

# Data generators for the background
sphere = alt.sphere()
graticule = alt.graticule()

# Source of land data
source = alt.topo_feature(data.world_110m.url, 'countries')

# Layering and configuring the components
alt.layer(
    alt.Chart(sphere).mark_geoshape(fill='lightblue'),
    alt.Chart(graticule).mark_geoshape(stroke='white', strokeWidth=0.5),
    alt.Chart(source).mark_geoshape(fill='ForestGreen', stroke='black')
).project(
    'naturalEarth1'
).properties(width=600, height=400).configure_view(stroke=None)

In [11]:
states = alt.topo_feature(data.us_10m.url, 'states')
capitals = data.us_state_capitals.url

In [15]:
pd.read_json(capitals)

Unnamed: 0,lon,lat,state,city
0,-86.279118,32.361538,Alabama,Montgomery
1,-134.41974,58.301935,Alaska,Juneau
2,-112.073844,33.448457,Arizona,Phoenix
3,-92.331122,34.736009,Arkansas,Little Rock
4,-121.468926,38.555605,California,Sacramento
5,-104.984167,39.739167,Colorado,Denver
6,-72.677,41.767,Connecticut,Hartford
7,-75.526755,39.161921,Delaware,Dover
8,-84.27277,30.4518,Florida,Tallahassee
9,-84.39,33.76,Georgia,Atlanta


In [None]:
# US states background
background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    title='US State Capitols',
    width=650,
    height=400
).project('albersUsa')

# Points and text
hover = alt.selection(type='single', on='mouseover', nearest=True,
                      fields=['lat', 'lon'])

base = alt.Chart(capitals).encode(
    longitude='lon:Q',
    latitude='lat:Q',
)

text = base.mark_text(dy=-5, align='right').encode(
    alt.Text('city', type='nominal'),
    opacity=alt.condition(~hover, alt.value(0), alt.value(1))
)

points = base.mark_point().encode(
    color=alt.value('black'),
    size=alt.condition(~hover, alt.value(30), alt.value(100))
).add_selection(hover)

background + points + text