In [None]:
import os
import sys
parentdir = os.path.dirname(os.getcwd())
sys.path.insert(0, os.path.join(parentdir, "Scrapers"))
sys.path.insert(0, os.path.join(parentdir, "Analysis")) 
import numpy as np
import pandas as pd
from sklearn.svm import SVC
import scrape_game_stats as game_scrape
import analysis

=============== Scraping NBA.com for game statistics ==============================

In [None]:
day_url = game_scrape.get_scores_url(10, 1, 2018)

https://www.nba.com/games?date=2018-01-10

In [None]:
print(day_url)

In [None]:
response = game_scrape.get_response("scores", 10, 1, 2018)

In [None]:
response_data = response.json()

In [None]:
game_ids = game_scrape.data_to_game_ids(response_data)

In [None]:
print(game_ids)

In [None]:
observation = game_scrape.get_observation(game_ids[0])

In [None]:
columns = ["game_id", "home_flag", "team_id", "abb", "wins", "loss",
           "wl%", "asts", "rebs", "orebs", "tovs", "fga", "fg%", "2pa",
           "2p%", "3pa","3p%","fta", "ft%", "pfs", "pts", "ref1",
           "ref2", "ref3", "net_score", "won",
          ]

In [None]:
df = pd.DataFrame(data=observation, columns=columns)
df.set_index(["game_id", "home_flag"], inplace=True)
df.sort_index(inplace=True)

In [None]:
df

=============== Running Classification ==============================

In [None]:
data_path = os.path.join(os.path.dirname(os.getcwd()), "Data/SuccessfulScrape")

In [None]:
train, test, predict, control, finish = analysis.main(2018, data_path, True, "SVM")

In [None]:
print(f"  Train Score: {train*100:3.2f}")
print(f"   Test Score: {test*100:3.2f}")
print(f"Predict Score: {predict*100:3.2f}")
print(f"Control Score: {control*100:3.2f}")

In [None]:
game_df = analysis.get_single_season_df(2018, data_path)

In [None]:
game_df.head()

In [None]:
features = list(game_df.columns[6:-6])
regression = game_df.columns[-2]
label = game_df.columns[-1]
cols = features + [regression] + [label]

game_df = game_df.set_index(["game_id", "home_flag"])
game_df.head()

In [None]:
game_ids = np.array(game_df.index.levels[0])
train_df, val_df, test_df = analysis.split_dfs(game_df, game_ids, 0.75, 0, 0.25)
X_train, y_train = analysis.get_x_y(train_df, cols)
X_test, y_test = analysis.get_x_y(test_df, cols)

In [None]:
train_df.loc[:, cols].head()

In [None]:
test_df.loc[:, cols].head()

In [None]:
team_df = analysis.get_team_df(data_path)
team_df_year = team_df.loc[team_df["season"] == "2017-18"]
team_df_year.set_index("team_id", inplace=True)
team_df_year = team_df_year.loc[:, features]

In [None]:
team_df_year.head()

In [None]:
X_predict, y_predict = analysis.get_predict_x_y(test_df, team_df_year, features)

In [None]:
print(np.shape(X_train))
print(np.shape(X_test))
print(np.shape(X_predict))

In [None]:
clf =  SVC(kernel="rbf", gamma="scale")
clf.fit(X_train, y_train)
train_score = clf.score(X_train, y_train)
test_score = clf.score(X_test, y_test)
predict_score = clf.score(X_predict, y_predict)
control_score = analysis.test_control_score(test_df, team_df_year)

In [None]:
print(f"  Train Score: {train_score*100:3.2f}")
print(f"   Test Score: {test_score*100:3.2f}")
print(f"Predict Score: {predict_score*100:3.2f}")
print(f"Control Score: {control_score*100:3.2f}")