In [None]:
# coding: utf-8
import os
import numpy as np
import pandas as pd
import pymysql
from collections import defaultdict
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier

if __name__ == '__main__':
    print("START INIT")
    
    db = pymysql.connect("cs540.cbxoiclfiyyp.us-west-2.rds.amazonaws.com", "masterUsername", "1234567890")
    results = pd.read_sql(('SELECT * FROM project.`TeamGameLog_2014-2015`'), db)
    ladder = pd.read_sql(('SELECT * FROM project.`2014-2015`'), db)
    db.close()
    
    columns = ['Date', 'VisitorTeam', 'VisitorPts', 'HomeTeam', 'HomePts']
    l = int(len(results.index)/2)
    index = list(range(0, l))
    df = pd.DataFrame(index=index , columns = columns)

    for index, row in results.iterrows():
        if index % 2 == 0:
            if "@" in row["MATCHUP"]: 
                visitor_team, vistorPts = row["TEAM_NAME"], row["PTS"]
                if index % 2 == 0:
                    home_team = results.loc[[index+1]]["TEAM_NAME"].values[0]
                    homePts = results.loc[[index+1]]["PTS"].values[0]
                else:
                    home_team = results.loc[[index-1]]["TEAM_NAME"].values[0]
                    homePts = results.loc[[index-1]]["PTS"].values[0]
            else:
                home_team, homePts = row["TEAM_NAME"], row["PTS"]
                if index % 2 == 0:
                    visitor_team = results.loc[[index+1]]["TEAM_NAME"].values[0]
                    visitorPts = results.loc[[index+1]]["PTS"].values[0]
                else:
                    visitor_team = results.loc[[index-1]]["TEAM_NAME"].values[0]
                    visitorPts = results.loc[[index-1]]["PTS"].values[0]
                
            df.ix[index/2]['Date'] = row['GAME_DATE']
            df.ix[index/2]['VisitorTeam'] = visitor_team
            df.ix[index/2]['VisitorPts'] = visitorPts
            df.ix[index/2]['HomeTeam'] = home_team
            df.ix[index/2]['HomePts'] = homePts

    df["HomeWin"] = df["VisitorPts"] < df["HomePts"]
    # Our "class values"
    y_true = df["HomeWin"].values

    df["HomeLastWin"] = False
    df["VisitorLastWin"] = False

    won_last = defaultdict(int)

    for index, row in df.iterrows():  # Note that this is not efficient
        home_team = row["HomeTeam"]
        visitor_team = row["VisitorTeam"]
        row["HomeLastWin"] = won_last[home_team]
        row["VisitorLastWin"] = won_last[visitor_team]
        df.ix[index] = row    
        # Set current win
        won_last[home_team] = row["HomeWin"]
        won_last[visitor_team] = not row["HomeWin"]

    # Create a dataset with just the neccessary information
    X_previouswins = df[["HomeLastWin", "VisitorLastWin"]].values

    # What about win streaks?
    df["HomeWinStreak"] = 0
    df["VisitorWinStreak"] = 0
    # Did the home and visitor teams win their last game?
    win_streak = defaultdict(int)

    for index, row in df.iterrows():  # Note that this is not efficient
        home_team = row["HomeTeam"]
        visitor_team = row["VisitorTeam"]
        row["HomeWinStreak"] = win_streak[home_team]
        row["VisitorWinStreak"] = win_streak[visitor_team]
        df.ix[index] = row    
        # Set current win
        if row["HomeWin"]:
            win_streak[home_team] += 1
            win_streak[visitor_team] = 0
        else:
            win_streak[home_team] = 0
            win_streak[visitor_team] += 1

    X_winstreak =  df[["HomeLastWin", "VisitorLastWin", "HomeWinStreak", "VisitorWinStreak"]].values


    # Let's try see which team is better on the ladder. Using the previous year's ladder
    ladder['Rank'] = 0
    for index, row in ladder.iterrows():
        ladder.set_value(index, 'Rank', index+1)

    # We can create a new feature -- HomeTeamRanksHigher\
    df["HomeTeamRanksHigher"] = 0
    for index, row in df.iterrows():
        home_team = row["HomeTeam"]
        visitor_team = row["VisitorTeam"]
        
        if home_team == "New Orleans Hornets":
            home_team = "New Orleans Pelicans"
        elif visitor_team == "New Orleans Hornets":
            visitor_team = "New Orleans Pelicans"      
        
        home_rank = ladder[ladder["TEAM_NAME"] == home_team]["Rank"].values[0]
        visitor_rank = ladder[ladder["TEAM_NAME"] == visitor_team]["Rank"].values[0]
        row["HomeTeamRanksHigher"] = int(home_rank > visitor_rank)
        df.ix[index] = row

    X_homehigher =  df[["HomeLastWin", "VisitorLastWin", "HomeTeamRanksHigher"]].values

    
    last_match_winner = defaultdict(int)
    df["HomeTeamWonLast"] = 0

    for index, row in df.iterrows():
        home_team = row["HomeTeam"]
        visitor_team = row["VisitorTeam"]
        teams = tuple(sorted([home_team, visitor_team]))  # Sort for a consistent ordering
        # Set in the row, who won the last encounter
        row["HomeTeamWonLast"] = 1 if last_match_winner[teams] == row["HomeTeam"] else 0
        df.ix[index] = row
        # Who won this one?
        winner = row["HomeTeam"] if row["HomeWin"] else row["VisitorTeam"]
        last_match_winner[teams] = winner

    X_home_higher =  df[["HomeTeamRanksHigher", "HomeTeamWonLast"]].values

    encoding = LabelEncoder()
    encoding.fit(df["HomeTeam"].values)
    home_teams = encoding.transform(df["HomeTeam"].values)
    visitor_teams = encoding.transform(df["VisitorTeam"].values)
    X_teams = np.vstack([home_teams, visitor_teams]).T

    onehot = OneHotEncoder()
    X_teams = onehot.fit_transform(X_teams).todense()
    X_all = np.hstack([X_home_higher, X_teams])

    parameter_space = {
                       "max_features": [2, 10, 'auto'],
                       "n_estimators": [100,],
                       "criterion": ["gini", "entropy"],
                       "min_samples_leaf": [2, 4, 6],
                       }
    clf = RandomForestClassifier()
    grid = GridSearchCV(clf, parameter_space)
    grid.fit(X_all, y_true)

    print("INIT DONE")
    while(True):
        m_home = input("Enter home team: ")
        if m_home == "end":
            break
        m_visitor =  input("Enter visitor team: ") 
        if m_visitor == "end":
            break

        if m_home == "New Orleans Hornets":
            m_home = "New Orleans Pelicans"
        elif m_visitor == "New Orleans Hornets":
            m_visitor = "New Orleans Pelicans"  

        home_value = encoding.transform([m_home])
        #print home_value[0]
        visitor_value = encoding.transform([m_visitor])
        #print visitor_value[0]

        hom = vis = [0.] * 30
        hom[home_value[0]] = 1.
        vis[visitor_value[0]] = 1.

      
        h_rank = ladder[ladder["TEAM_NAME"] == m_home]["Rank"].values[0]
        v_rank = ladder[ladder["TEAM_NAME"] == m_visitor]["Rank"].values[0]
        h_higer = int(home_rank > visitor_rank)

        teams = tuple(sorted([m_home, m_visitor]))  # Sort for a consistent ordering
            # Set in the row, who won the last encounter
        m_higher = 1 if last_match_winner[teams] == m_home else 0

        res = np.matrix([h_higer, m_higher] + hom + vis)
        result = grid.predict(res)
        if result[0] == True:
            print(m_home + " would Win")
        elif result[0] == False:
            print(m_visitor + " would Win")

START INIT
INIT DONE
Enter home team: Golden State Warriors
Enter visitor team: Portland Trail Blazers
Golden State Warriors would Win
