In [None]:
# Example:
# 1. Read a CSV file into a Pandas DataFrame.
# 2. Don't repeat yourself - create functions!
# 3. Create a function to clean the data.
# 4. Create a function to draw a scatter plot with a trend line
# 5. Create a function to draw a Box-n-Whiskers plot
# 6. Call the functions you created to draw the plots.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy.polynomial.polynomial import polyfit
%matplotlib inline

In [None]:
#Read in the CSV file
file = "Resources/pass_att.csv"
df_original = pd.read_csv(file)
df_original.head()

In [None]:
# Function to clean the data
def clean(orig_df):
    df2 = df_original[df_original["yards_per_pass_attempt"] > -100]
    df3 = df2[df2["passes_had_intercepts"] >= 0]
    df4 = df3[df3["qb_sacks"] >= 0]
    return df4

In [None]:
# Draw a scatter plot
def draw_scatter_plot(df, x_var_name, y_var_name, title, display_grid=False):
    df.plot(kind='scatter',x=x_var_name, y=y_var_name,color='blue',s=1.2, label="NFL 1993 to 2020")
    #plt.rcParams['font.size'] = 14
    #plt.rcParams['figure.figsize'] = (10,4)
    
    # Set the plot texts as described in Module 5.1.4
    plt.title(clean_label(title))
    plt.xlabel(clean_label(x_var_name))
    plt.ylabel(clean_label(y_var_name))

    #Make the regression line
    b, m = polyfit(df[x_var_name], df[y_var_name], 1)
    plt.plot(df[x_var_name], b + m * df[x_var_name], '-', color='red')

    plt.legend()
    
    #plt.yticks(np.arange(0, 60, 10))
    
    if display_grid == True:
        plt.grid()
        
    plt.show()

In [None]:
# Draw a box plot
def draw_box_plot(df, x_var_name, y_var_name, title, display_grid=False):
    x_labels = ["NFL 1993 to 2020"]
    fig, ax = plt.subplots()
    ax.boxplot(df[x_var_name], labels=x_labels)
    #plt.rcParams['font.size'] = 14
    #plt.rcParams['figure.figsize'] = (6,3)
    
    # Set the plot texts as described in Module 5.1.4
    plt.title(clean_label(title))
    plt.ylabel(clean_label(x_var_name))
    
    if display_grid == True:
        plt.grid()
        
    plt.show()

In [None]:
# Clean the data
df = clean(df_original)
df.describe()

In [None]:
# Convert a label into nice text. Remove underscores and 
# capitalize the first letter of each word.
# Hinte: The title() function already exists to capitalize.
def clean_label(label):
    return label.replace("_", " ").title()

In [None]:
# Specify the size of the plots
plt.rcParams['font.size'] = 14
plt.rcParams['figure.figsize'] = (6,3)

In [None]:
# Draw the Yards per pass attempt plot. With a GRID!
df2 = df[df["winning_margin"] >= 0]
#df2 = df
draw_scatter_plot(df2, 'yards_per_pass_attempt', 'winning_margin', "yards_per_pass_attempt difference", True)

In [None]:
# Draw a box plot of the Yards per pass attempt.
draw_box_plot(df2, 'yards_per_pass_attempt', 'winning_margin', "yards_per_pass_attempt difference", True)

In [None]:
# Draw the Total Rushing plot
draw_scatter_plot(df, 'total_rushing', 'winning_margin', "Number of Rushing Attempts")

In [None]:
# Draw the QB Sacks plot
draw_scatter_plot(df, 'qb_sacks', 'winning_margin', "Quarterback Sacks")

In [None]:
# Draw the Passes Had Intercepts plot
draw_scatter_plot(df, 'passes_had_intercepts', 'winning_margin', "Passes Had Intercepts")