In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from constants import city_list, nba_teams
import preprocessing
import plotly.express as px 
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_dark"
output_dir = "/Users/joeschlessinger/Documents/jschless.github.io/_includes/assets/conservationofwins"
import os
df = preprocessing.preprocess()
df = df.query('year < 2021')
df.head()

Unnamed: 0,win_pct,place,sport,team_name,area,year,n_teams
0,0.0,Columbus,NFL,Columbus Tigers,Ohio,1922,1
1,0.0,Evansville,NFL,Evansville Crimson Giants,Chicago,1922,1
2,0.0,Hammond,NFL,Hammond Pros,Ohio,1922,1
3,0.0,Rochester,NFL,Rochester Jeffersons,New York,1922,1
4,0.25,Louisville,NFL,Louisville Colonels,Louisville,1922,1


## Chicago Case Study

In [96]:
sports = set(["nfl", "mlb", "nba"])
city = "Chicago"
chicago = df.query(f"area == '{city}' and year > 1920").groupby("year").win_pct.mean().reset_index()
def temp(df):
    acc = ""
    for i, row in df.iterrows():
        acc += row.team_name + ": " + str(row.win_pct)[:5] + "<br>"
    return acc

blah = chicago.merge(df.query("area == 'Chicago'").groupby(["year"]).apply(temp).reset_index(), on="year")

fig = go.Figure(go.Scatter(
    x = blah.year.tolist(),
    y = blah.win_pct.tolist(),
    text= blah[0].tolist(),
    hovertemplate =
    '<br><b>Year</b>: %{x}<br>'+
    '<b>Winning Pct.</b>: %{y:.2f}<br>'+
    '%{text}' +
    '<extra></extra>'
    ))
fig.update_layout(
    title="Average Winning Percentage for Chicago Sports Teams",
    xaxis_title="Year",
    yaxis_title="Winning Percentage",
)
fig.add_hline(y=.5, line_width=1, line_dash="dash", line_color="white")
fig.write_html(os.path.join(output_dir,"chicago_avg.html"))
fig.show()

In [97]:
print(f"Total Seasons:", chicago.query("year > 1940").count())
print(f"Seasons with winning percentage above .500:", chicago.query("year > 1940 and win_pct >= .5").count())

Total Seasons: year       80
win_pct    80
dtype: int64
Seasons with winning percentage above .500: year       36
win_pct    36
dtype: int64


## Visualizing Multiple Cities Together

In [129]:
def temp(df):
    return "".join([row.team_name + ": " + str(row.win_pct)[:5] + "<br>" for i, row in df.iterrows()])
temp_df = df.query('year > 1920').groupby(["year", "area"]).win_pct.mean().reset_index()
temp_df = temp_df.merge(df.groupby(["year", "area"]).apply(temp).reset_index(), on=["year", "area"]).rename(columns={0: "teams"})
cities = ["Chicago", "New York", "Boston", "Los Angeles", "Atlanta", "Cleveland", "Philadelphia", "Detroit", "Minnesota"]
fig = go.Figure()
for city in cities:
    temp = temp_df.query(f"area == '{city}'")
    win_pct = str(temp.win_pct.mean())[:5]
    print(city, win_pct)
    fig.add_trace(
        go.Scatter(x=temp.year.tolist(), 
        y=temp.win_pct.tolist(),
        text=temp.teams.tolist(),
        mode="lines",
        hovertemplate =
        '<br><b>Year</b>: %{x}<br>'+
        '<b>Winning Pct.</b>: %{y:.2f}<br>'+
        '%{text}' +
        '<extra></extra>',
        name=city + f" ({win_pct})"))
fig.add_hline(y=.5, line_width=1, line_dash="dash", line_color="white")
fig.update_layout(
    title= "Winning Percentage by City Over Time",
    xaxis_title="Year",
    yaxis_title="Winning Percentage",
    legend_title_text="City (Lifetime Winning Pct.)"
)
fig.write_html(os.path.join(output_dir,"city_avg.html"))
fig.show()


Chicago 0.505
New York 0.490
Boston 0.516
Los Angeles 0.499
Atlanta 0.478
Cleveland 0.503
Philadelphia 0.473
Detroit 0.486
Minnesota 0.454


## Breakdown By Sport

In [99]:
city = "Chicago"
temp = df.query(f"area == '{city}'").groupby(["team_name", "year"]).win_pct.mean().reset_index()
teams = ['Chicago Bears', 'Chicago Blackhawks',
       'Chicago Bulls', 'Chicago Cubs','Chicago White Sox']
fig = go.Figure()
for t in teams: 
    win_pct = str(temp.query(f"team_name == '{t}'").win_pct.mean())[:5]
    fig.add_trace(go.Scatter(x=temp.query(f"team_name == '{t}'").year.tolist(), 
    y=temp.query(f"team_name == '{t}'").win_pct.tolist(),
                        mode='lines',
                        name=t+ f" ({win_pct})"))
    
    print("average winning percentage for", t, win_pct)

fig.update_layout(
    title= "Winning Percentage of Chicago Teams Over Time",
    xaxis_title="Year",
    yaxis_title="Winning Percentage",
    legend_title_text="Team (Lifetime Winning Pct.)"
)
fig.add_hline(y=.5, line_width=1, line_dash="dash", line_color="white")

fig.write_html(os.path.join(output_dir,"chicago_by_sport.html"))
fig.show()


average winning percentage for Chicago Bears 0.576
average winning percentage for Chicago Blackhawks 0.441
average winning percentage for Chicago Bulls 0.520
average winning percentage for Chicago Cubs 0.507
average winning percentage for Chicago White Sox 0.502


### Exploring Cities with 3 + teams 

In [101]:
teams_per_place = df.groupby(["year", "area"]).team_name.count()
teams_3 = teams_per_place[teams_per_place > 2].index
def temp(df):
    acc = ""
    for i, row in df.iterrows():
        acc += row.team_name + ": " + str(row.win_pct)[:5] + "<br>"
    return acc
teams = df.groupby(["year", "area"]).apply(temp)

In [103]:
temp = df.groupby(["year", "area"]).win_pct.mean()[teams_3].reset_index()
temp = temp.set_index(["year", "area"])
temp['teams'] = teams
temp = temp.reset_index()
temp
#temp[temp.year > 1950].sort_values("win_pct").head(30)

Unnamed: 0,year,area,win_pct,teams
0,1903,New York,0.552112,Brooklyn Superbas: 0.514<br>New York Giants: 0...
1,1904,New York,0.556032,Brooklyn Superbas: 0.366<br>New York Giants: 0...
2,1905,New York,0.492858,Brooklyn Superbas: 0.315<br>New York Giants: 0...
3,1906,New York,0.553939,Brooklyn Superbas: 0.434<br>New York Giants: 0...
4,1907,New York,0.482703,Brooklyn Superbas: 0.439<br>New York Giants: 0...
...,...,...,...,...
1034,2020,Philadelphia,0.447290,Philadelphia Eagles: 0.281<br>Philadelphia Phi...
1035,2020,Pittsburgh,0.548792,Pittsburgh Steelers: 0.75<br>Pittsburgh Pirate...
1036,2020,San Francisco,0.468155,San Francisco 49ers: 0.375<br>San Francisco Gi...
1037,2020,Tampa Bay,0.656317,Tampa Bay Buccaneers: 0.688<br>Tampa Bay Rays:...


In [104]:
blah = temp[temp.year>1950]
fig = go.Figure(go.Scatter(
    x = temp.year.tolist(),
    y = temp.win_pct.tolist(),
    text = temp.teams.tolist(),
    mode="markers",
    hovertemplate =
    '<br><b>Year</b>: %{x}<br>'+
    '<b>Winning Pct.</b>: %{y:.2f}<br>'+
    '%{text}' +
    '<extra></extra>')
    )
fig.update_layout(
    title="Average Winning Percentage for Sports Locales Over Time",
    xaxis_title="Year",
    yaxis_title="Winning Percentage"
)
fig.add_hline(y=.5, line_width=.5, line_dash="dash", line_color="white")
fig.write_html(os.path.join(output_dir,"season_scatter.html"))
fig.show()

Almost every season is in the .4-.6 range. The truly exceptional (and unexceptional)

## Best and Worst Seasons

In [60]:
print(temp.sort_values('win_pct', ascending=False).head(10).to_markdown(index=False))

|   year | place        |   win_pct | teams                                                                                                      |
|-------:|:-------------|----------:|:-----------------------------------------------------------------------------------------------------------|
|   1927 | New York     |  0.733563 | New York Giants: 0.889<br>New York Giants: 0.597<br>New York Yankees: 0.714<br>                            |
|   1998 | Atlanta      |  0.71644  | Atlanta Falcons: 0.875<br>Atlanta Braves: 0.654<br>Atlanta Hawks: 0.62<br>                                 |
|   1972 | Boston       |  0.689898 | Boston Red Sox: 0.548<br>Boston Celtics: 0.829<br>Boston Bruins: 0.692<br>                                 |
|   1937 | Chicago      |  0.687446 | Chicago Bears: 0.9<br>Chicago White Sox: 0.558<br>Chicago Cubs: 0.603<br>                                  |
|   1926 | Chicago      |  0.687293 | Chicago Bears: 1.0<br>Chicago White Sox: 0.529<br>Chicago Cubs: 0.532<br>       

In [58]:
print(temp.sort_values('win_pct').head(10).to_markdown(index=False))

|   year | place        |   win_pct | teams                                                                                                                    |
|-------:|:-------------|----------:|:-------------------------------------------------------------------------------------------------------------------------|
|   1971 | Buffalo      |  0.215564 | Buffalo Bills: 0.071<br>Buffalo Braves: 0.268<br>Buffalo Sabres: 0.307<br>                                               |
|   1972 | Philadelphia |  0.250135 | Philadelphia Eagles: 0.179<br>Philadelphia Phillies: 0.378<br>Philadelphia 76ers: 0.11<br>Philadelphia Flyers: 0.333<br> |
|   1939 | Philadelphia |  0.253285 | Philadelphia Eagles: 0.1<br>Philadelphia Athletics: 0.361<br>Philadelphia Phillies: 0.298<br>                            |
|   1940 | Philadelphia |  0.256149 | Philadelphia Eagles: 0.091<br>Philadelphia Athletics: 0.350<br>Philadelphia Phillies: 0.326<br>                          |
|   1936 | Philadelphia |  0.26001

## All Places by Historical Win Percentage

In [128]:
fig = go.Figure()

SEASONS_THRESH = 40
places_to_include = df.groupby('area').sport.count().reset_index()
places_to_include = set(places_to_include[places_to_include.sport > SEASONS_THRESH].area)

temp_df = df.groupby(["area"]).win_pct.mean().reset_index()
temp_df = temp_df[temp_df.area.isin(places_to_include)].sort_values("win_pct", ascending=False)

def temp(df):
    acc = ""
    for i, row in df.iterrows():
        acc += row.team_name + ": " + str(row.win_pct)[:5] + "<br>"
    return acc

average_performances = df.groupby(["area", "team_name"]).win_pct.mean().reset_index().groupby("area").apply(temp).reset_index()

temp_df = temp_df.merge(average_performances, on="area")
customdata = temp_df[0].tolist()
fig = go.Figure()



fig.add_trace(go.Bar(
    x=temp_df.area.tolist(),
    y=temp_df.win_pct.tolist(),
    text=[str(x)[:5] for x in temp_df.win_pct.tolist()]))
hovertemplate = ('<br><b>Area</b>: %{x}<br>'+
    '<b>Winning Pct.</b>: %{y:.2f}<br>'+
    '%{customdata}' +
    '<extra></extra>')


fig.update_traces(customdata=customdata, hovertemplate=hovertemplate)
fig.update_layout(
    title="Lifetime Winning Percentage By City",
    yaxis_title="Winning Percentage",
)
fig.write_html(os.path.join(output_dir,"alltime_bar.html"))
fig.show()

### Largest Improvements and Declines

# Winning PCT

In [8]:
fig = px.histogram(df, x="win_pct", nbins=30)
fig.update_layout(
    title="Histogram of Winning Percentages",
    xaxis_title="Winning Percentage",
    yaxis_title="# Seasons",
)
fig.write_image(os.path.join(output_dir,"hist.jpeg"))
print(f"Mean win pct is {df.win_pct.mean()}")
print(f"Std dev is {df.win_pct.std()}")

Mean win pct is 0.4883278372078791
Std dev is 0.15160039855772944
