In [None]:
#Libraries
import numpy as np
import pandas as pd 
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [None]:
#DataSet
NBA= pd.read_csv("../input/d/mahshad/nba-20192020-season/basketball.csv", encoding='utf-16', parse_dates=["Date"] )
NBA.columns= ["Date", "Start ET", "Visitor Team", "VisitorPts", "Home Team", "HomePts", "OT?", "Score Type", "Notes", "?"]

NBA.head()

In [None]:
ST= pd.read_csv("../input/d/mahshad/nba-20192020-season/standing.csv", encoding='utf-16')

ST.head()

# **Extracting New Features**

In [None]:
#Home Win
NBA["HomeWin"]= NBA["HomePts"]>NBA["VisitorPts"]
NBA["HomeWin"].mean()

In [None]:
# Home Last Win & Visitor Last Win
NBA["HomeLastWin"]= 0
NBA["VisitorLastWin"]= 0

from collections import defaultdict
won_last=defaultdict(int)

for index, row in NBA.iterrows():
    home_team= row["Home Team"]
    visitor_team= row["Visitor Team"]
    NBA.at[index, "HomeLastWin"]= won_last[home_team]
    NBA.at[index, "VisitorLastWin"]= won_last[visitor_team]
    won_last[home_team]= int(row["HomeWin"])
    won_last[visitor_team]= 1-int(row["HomeWin"])

NBA.head(15)

In [None]:
#Home Team Rank Higher
NBA["HomeTeamRankHigher"]= 0
for index, row in NBA.iterrows():
    home_team= row["Home Team"]
    visitor_team= row["Visitor Team"]
    home_rank= ST[ST["Team"] == home_team]["Rk"].values[0]
    visitor_rank= ST[ST["Team"] == visitor_team]["Rk"].values[0]
    NBA.at[index, "HomeTeamRankHigher"]= int(home_rank < visitor_rank)
  
  
NBA.head(15)

In [None]:
#Home Team Won Last
last_match_winner= defaultdict(int)
NBA["HomeTeamWonLast"]= 0
for index, row in NBA.iterrows():
    home_team= row["Home Team"]
    visitor_team= row["Visitor Team"]
    teams= tuple(sorted([home_team, visitor_team]))
    home_team_wone_last= 1 if last_match_winner[teams] == home_team else 0
    NBA.at[index, "HomeTeamWonLast"]= home_team_wone_last
    winner= home_team if row["HomeWin"] else visitor_team
    last_match_winner[teams] = winner
    
NBA["HomeTeamWonLast"].mean()

In [None]:
# Teams 
encoding= LabelEncoder()
encoding.fit(NBA["Home Team"].values)
home_teams = encoding.transform(NBA["Home Team"].values)
visitor_teams = encoding.transform(NBA["Visitor Team"].values)
X_teams = np.vstack([home_teams, visitor_teams]).T
X_teams

In [None]:
onehot = OneHotEncoder()
X_teams_expanded = onehot.fit_transform(X_teams).todense()
X_teams_expanded

# **Decision Tree**

In [None]:
y_true = NBA["HomeWin"].values
y_true

In [None]:
#1
X_previouswins = NBA[["HomeLastWin", "VisitorLastWin"]].values
X_previouswins

In [None]:
CLF= DecisionTreeClassifier(random_state= 14)
scores = cross_val_score(CLF, X_previouswins, y_true, scoring='accuracy')
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))

In [None]:
#2
X_homehigher = NBA[[ "HomeTeamRankHigher", "HomeLastWin", "VisitorLastWin"]].values
X_homehigher

In [None]:
CLF= DecisionTreeClassifier(random_state= 14)
scores = cross_val_score(CLF, X_homehigher, y_true, scoring='accuracy')
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))

In [None]:
#3
X_lastwinner = NBA[[ "HomeTeamWonLast", "HomeTeamRankHigher", "HomeLastWin", "VisitorLastWin"]].values
X_lastwinner

In [None]:
CLF= DecisionTreeClassifier(random_state= 14)
scores = cross_val_score(CLF, X_lastwinner, y_true, scoring='accuracy')
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))

In [None]:
#4
CLF= DecisionTreeClassifier(random_state= 14)
scores = cross_val_score(CLF, X_teams_expanded, y_true, scoring='accuracy')
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))

# **Random Forests**

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(random_state=14)
scores = cross_val_score(clf, X_teams, y_true, scoring='accuracy')
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))