In [32]:
# import libraries
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

# set working directory
os.chdir('/Users/ryan-saloma/Python Projects/fantasy_football')

# load data
data = pd.read_csv('fantasy_2013.csv')


In [None]:
# clean Player column, remove special characters
data['Player'] = data['Player'].str.replace(r'[^A-Za-z0-9\s]+', '', regex=True)

# set Player column as string
data['Player'] = data['Player'].astype(str)

In [None]:
# plot Player vs. PPR points

# sort data by PPR points
sorted_data = data.sort_values('PPR', ascending=False)

# extract top 100 players
top_100 = sorted_data.head(100)

# create color map for positions
color_map = {'QB': 'red', 'RB': 'blue', 'WR': 'green', 'TE': 'purple'}

# plot data, Player (categorical) by PPR points (numerical)
# color by position, top 100 players
plt.figure(figsize=(20, 10))
sns.barplot(x='Player', y='PPR', data=top_100, hue='FantPos')
plt.xticks(rotation=90)
plt.show()

# plot ppr by position using boxplot, ALL player
plt.figure(figsize=(20, 10))
sns.boxplot(x='FantPos', y='PPR', data=data)
plt.show()

# plot histogram of PPR points, ALL players
plt.figure(figsize=(20, 10))
sns.histplot(data['PPR'], kde=True)

# plot histogram of PPR points by position, ALL players
plt.figure(figsize=(20, 10))
sns.histplot(data, x='PPR', hue='FantPos', kde=True)

It looks like there are differences in PPR by position. The minimum points seems similar for all positions, but the maximum and average are higher for quarterbacks. The spread looks bigger for wide receivers than any other group, while the spread for tight ends is small.

In [None]:
# group data by position
# calculate average PPR points by position
# sort by PPR points
ppr_by_pos_all = data.groupby('FantPos')['PPR'].mean().sort_values(ascending=False)