In [1]:
import pandas as pd
data= pd.read_csv('premierLeague.csv')
data.head()
import altair as alt

In [2]:
data.head()

Unnamed: 0,club_name,player_name,age,position,fee_millions,year,season
0,Chelsea FC,Eden Hazard,21.0,Winger,35.0,2012,2012/2013
1,Chelsea FC,Oscar,20.0,Attacking Midfield,32.0,2012,2012/2013
2,Chelsea FC,Victor Moses,21.0,Winger,11.5,2012,2012/2013
3,Chelsea FC,César Azpilicueta,22.0,Full-Back,8.8,2012,2012/2013
4,Chelsea FC,Marko Marin,23.0,Attacking Midfield,8.0,2012,2012/2013


In [3]:
selection = alt.selection(type="multi", fields=["season"], on = 'mouseover')

# Create a container for our two different views
base =  alt.Chart(data).properties(width=700, height=250)

# Let's specify our overview chart
overview = alt.Chart(data).mark_bar().encode(
    y = "sum(fee_millions)",
    x = "season",
    color=alt.condition(selection, alt.value("orange"), alt.value("lightgrey"))
).add_selection(selection).properties(height=250, width=250)

# Create a detail chart
detail = hist = base.mark_bar().encode(
    y = "sum(fee_millions)", 
    x = "club_name"
).transform_filter(selection).properties(height=250, width=500)

overview | detail

In [4]:
selection = alt.selection(type="multi", fields=["position"], on = 'mouseover')

# Create a container for our two different views
base =  alt.Chart(data).properties(width=500, height=250)

# Let's specify our overview chart
overview = alt.Chart(data).mark_bar().encode(
    y = "mean(fee_millions)",
    x = "position",
    color=alt.condition(selection, alt.value("red"), alt.value("lightgrey"))
).add_selection(selection).properties(height=250, width=250)

# Create a detail chart
detail = alt.Chart(data).mark_circle().encode(
    x = "season",
    y    = "fee_millions",
    color = "position",
    
    tooltip=["player_name", "club_name"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.0001))
).add_selection(selection)

mean_line = alt.Chart(data).mark_line(color="blue").encode(
    x="season",
    y="mean(fee_millions)",
    color="position"
).transform_filter(selection)

mean_line = alt.Chart(data).mark_line(color="blue").encode(
    x="season",
    y="mean(fee_millions)",
    color="position"
).transform_filter(selection)

# Create a separate chart for the mean line
mean_chart = mean_line.properties(width=500, height=250)

# Combine the overview chart, the detail chart, and the mean chart using | for side-by-side layout


# Combine the overview chart, the detail chart, and the mean chart using & for top-to-bottom layout
combined_chart = (overview | detail) & mean_chart

combined_chart




In [6]:
dropdown = alt.binding_select (options=data["season"].unique(), name="Select season:")

selection = alt.selection(type="single", fields=["age"], on = 'mouseover')
selection2 = alt.selection(type="single", fields=["season"], bind=dropdown)

overview = alt.Chart(data).mark_circle().encode(
    x = "age",
    y    = "fee_millions",
   
    
    tooltip=["player_name", "club_name"],
    opacity=alt.condition(selection,alt.value(1),alt.value(.05))
).add_selection(selection).add_selection(selection2)

detail = histogram = alt.Chart(data).mark_bar().encode(
    x=alt.X('fee_millions:Q', bin=alt.Bin(step=5), title='Transfer Fee (in millions)'),
    y='count():Q',
    color=alt.value('steelblue')
).properties(
    width=600,
    height=400,
    title=
        "Histogram of Transfer Fees"
        
    
).transform_filter(selection).transform_filter(selection2)

overview | detail

In [11]:

# Calculate average fee per season
avg_fee_per_season = data.groupby('season')['fee_millions'].mean().reset_index()

# Calculate average fee per club
avg_fee_per_club = data.groupby('club_name')['fee_millions'].mean().reset_index()

# Merge average fee per season and per club with the main data
data_merged = pd.merge(data, avg_fee_per_season, on='season', suffixes=('', '_avg_season'))
data_merged = pd.merge(data_merged, avg_fee_per_club, on='club_name', suffixes=('', '_avg_club'))

# Calculate fee differences
data_merged['fee_diff_season'] = data_merged['fee_millions'] - data_merged['fee_millions_avg_season']
data_merged['fee_diff_club'] = data_merged['fee_millions'] - data_merged['fee_millions_avg_club']



# Create the scatter plot
plot1 = scatter_plot = alt.Chart(data_merged).mark_circle().encode(
    x=alt.X('fee_diff_season:Q', title='Player Fee - Average Season Fee'),
    y=alt.Y('fee_diff_club:Q', title='Player Fee - Average Club Fee'),
    color=alt.Color('club_name:N', legend=alt.Legend(title='Club Name')),
    tooltip=['player_name:N', 'club_name:N', 'season:N', 'fee_millions:Q']
).properties(
    width=600,
    height=400,
    title='Player Fees vs. Average Season and Club Fees'
)



facet_grid = alt.Chart(data_merged).mark_circle().encode(
    x=alt.X('fee_diff_season:Q', title='Player Fee - Average Season Fee'),
    y=alt.Y('fee_diff_club:Q', title='Player Fee - Average Club Fee'),
    color=alt.Color('club_name:N', legend=alt.Legend(title='Club Name')),
    tooltip=['player_name:N', 'club_name:N', 'season:N', 'fee_millions:Q']
).properties(
    width=300,
    height=220,
    title='Player Fees vs. Average Season and Club Fees'
).facet(
    row='season:N'
)



best_fit_line = scatter_plot.transform_regression(
    'fee_diff_season', 'fee_diff_club', method='poly', order=1
).mark_line(color='red')

((plot1 + best_fit_line).interactive() & facet_grid)


 




