In [107]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [144]:
def pca_per_year(year, df):
    pca_df = df[df['Year'] == year]
    names = pca_df['PLAYER']
    pca_df = pca_df.drop(['PLAYER', 'TEAM', 'Year', 'Unnamed: 0', 'W', 'L'], axis=1)

    #Perform PCA analysis
    n = len(pca_df.columns)
    dat = StandardScaler().fit_transform(pca_df)
    pca = PCA(n_components=n)
    pca.fit(dat)

    x_vector, y_vector = pca.components_[0], pca.components_[1]
    xs, ys = pca.transform(dat)[:, 0], pca.transform(dat)[:, 1]
    plt.figure(figsize=(15,10))
    for i in range(len(x_vector)):
        plt.arrow(0, 0,
                x_vector[i] * max(xs),
                y_vector[i] * max(ys),
                color='r',
                width=0.005,
                head_width=0.0025)
        plt.text(x_vector[i] * max(xs) * 1,
                y_vector[i] * max(ys) * 1,
                list(pca_df.columns.values)[i],
                color='r')
        
    for i in range(len(xs)):
        plt.plot(xs[i],
                ys[i],
                'bo',
                alpha=0.5)
    plt.show()
    pca_out = pd.DataFrame({'Player': names,'Component 1': xs, 'Component 2' :ys})
    pca_components = pd.DataFrame({'Variables':pca_df.columns.values,'Loadings X': x_vector, 'Loadings Y': y_vector})
    return pca_out, pca_components

In [146]:
def main():
    player_data = pd.read_csv("WNBA - Players - General.csv")
    for i in range(2014, 2025):
        pca_out, principal_components = pca_per_year(i, player_data)
        pca_out.to_csv(f"/Users/madiforman/Desktop/WNBA/pca_out/PCA_out{i}.csv")
        principal_components.to_csv(f"/Users/madiforman/Desktop/WNBA/loadings/principal_component_loadings{i}.csv")
main()

                                      Variables  Loadings X  Loadings Y
0                                           AGE    0.064255    0.119820
1                                  games played    0.167931    0.067166
2                          avg minutes per game    0.272794    0.062747
3                                           PTS    0.275860    0.069903
4   FGM (field goals made AKA num baskets made)    0.275596    0.015298
5                                           FGA    0.269802    0.094802
6                                           FG%    0.140879   -0.157811
7                                           3PM    0.112247    0.407906
8                                           3PA    0.107672    0.416435
9                                           3P%    0.067510    0.306427
10                                          FTM    0.251956    0.070285
11                                          FTA    0.256148    0.033391
12                                          FT%    0.111337    0