# Part 1

#### Completing `bust_probability` function

In [1]:
import numpy as np
import copy
import matplotlib.pyplot as plt
import torch.nn as nn
import torch
import torch.optim as optim
import sklearn
from sklearn import metrics as metrics
import pandas as pd
import math

In [2]:
def data_loader(path, table_idx, player_or_dealer):
    #utility for loading train.csv, example use in the notebook
    data = pd.read_csv(path, header=[0,1,2])
    spy = data[(f'table_{table_idx}', player_or_dealer, 'spy')]
    card = data[(f'table_{table_idx}', player_or_dealer, 'card')]
    return np.array([spy, card]).T

In [3]:
def bust_probability(dealer_cards):
    """
    dealer_cards: list -> integer series of player denoting value of cards observed upto this point

    Current body is random for now, change it accordingly
    
    output: probability of going bust on this table
    """
    bust_count = 0
    total_cards = len(dealer_cards)
    score = 0
    for card in dealer_cards:
        score += card
        if score > 16:
            if score > 21:
                bust_count += 1
            score = 0
    return bust_count/total_cards

In [4]:
for table_idx in range(0, 5):
    dealer_data = data_loader("data/train.csv", table_idx, "dealer")
    dealer_cards = dealer_data[:,1]
    print(table_idx, bust_probability(dealer_cards))

0 0.18865
1 0.10155
2 0.09175
3 0.07995
4 0.00055


### Two additional points for "willingness to play"

### **1. Effective House Edge (EHE)**  
#### **Definition:**  
This metric estimates the actual house edge at a given table based on observed outcomes rather than theoretical expectations. It measures the average percentage of a player’s bet lost per round.  

#### **Formula:**  
$$
EHE = 1 - \frac{\text{Total player winnings}}{\text{Total player bets}}
$$
where:  
- **Total player winnings** = sum of all amounts won by players.  
- **Total player bets** = sum of all bets placed by players.  

#### **Strengths:**  
- Captures the real-world profitability of a table.  
- Accounts for both dealer performance and player behavior.  

#### **Weaknesses:**  
- Requires tracking actual bet sizes and payouts.  
- Can be skewed if a few players make bad decisions.  

---

### **2. Volatility Score (VS)**  
#### **Definition:**  
Measures how unpredictable the dealer’s bust rate is across different hands.  
A high volatility score suggests that the dealer’s performance is inconsistent.

#### **Formula:**  
$$
VS = \sigma_b = \sqrt{\frac{1}{N} \sum_{i=1}^{N} (B_i - \mu_b)^2}
$$
where:  
- \( $B_i$ \) is the bust probability observed in past hands.  
- \( $\mu_b$ \) is the mean bust probability.  
- \( $\sigma_b$ \) is the standard deviation of bust probabilities.

#### **Strengths:**  
- Identifies tables with consistent dealer behavior (lower volatility is preferable).  
- Helps risk-averse players avoid unpredictable situations.

#### **Weaknesses:**  
- Doesn’t directly indicate profitability.  
- Requires enough historical data for accurate estimation.

# Part 2

In [None]:
def get_card_value_from_spy_value(value : float) -> int:
    """
    Implement here. Please make sure that the output of this function is an integer.
    """
    value += 100
    value += 2.5
    value = math.trunc(value)
    value = value % 10
    if value <= 1:
        value += 10
    return value

## Motivation
We observed the card values for various spy values for each of the table indices. We divided the array based on the card values, and saw the spy values for each card index (we also sorted the spy values). From this, we were able to observe the pattern of jump of 10 for each card index, and also the bound of 0.5 range. 


# Part 3

In [9]:
import numpy as np
import copy
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
from sklearn import metrics as metrics
import pandas as pd
import math
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import pickle
import os

def data_loader(path, table_idx, player_or_dealer):
    """Loads the data for that table index."""
    data = pd.read_csv(path, header=[0,1,2])
    spy = data[(f'table_{table_idx}', player_or_dealer, 'spy')]
    card = data[(f'table_{table_idx}', player_or_dealer, 'card')]
    return np.array([spy, card]).T

class SimpleNN(nn.Module):
    """Simple NN Architecture"""
    def __init__(self, input_size, hidden_size=16):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )
    
    def forward(self, x):
        return self.model(x)

class MyPlayer:
    def __init__(self, table_index):
        self.table_index = table_index
        self.player_model = None
        self.dealer_model = None
        self.player_model_type = None
        self.dealer_model_type = None
        self.player_scaler = None
        self.dealer_scaler = None
        self._load_or_train_models()
    
    def _load_or_train_models(self):
        """Load pre-trained models or train new ones"""
        train_file = 'train.csv' 
        player_model_path = f'player_model_table_{self.table_index}.pkl'
        dealer_model_path = f'dealer_model_table_{self.table_index}.pkl'
        player_scaler_path = f'player_scaler_table_{self.table_index}.pkl'
        dealer_scaler_path = f'dealer_scaler_table_{self.table_index}.pkl'
        player_type_path = f'player_type_table_{self.table_index}.txt'
        dealer_type_path = f'dealer_type_table_{self.table_index}.txt'
        if os.path.exists(player_model_path) and os.path.exists(dealer_model_path):
            with open(player_model_path, 'rb') as f:
                self.player_model = pickle.load(f)
            with open(player_type_path, 'r') as f:
                self.player_model_type = f.read().strip()
            if os.path.exists(player_scaler_path):
                with open(player_scaler_path, 'rb') as f:
                    self.player_scaler = pickle.load(f)
            with open(dealer_model_path, 'rb') as f:
                self.dealer_model = pickle.load(f)
            with open(dealer_type_path, 'r') as f:
                self.dealer_model_type = f.read().strip()
            if os.path.exists(dealer_scaler_path):
                with open(dealer_scaler_path, 'rb') as f:
                    self.dealer_scaler = pickle.load(f)
        else:
            if os.path.exists(train_file):
                self._train_models(train_file)
            else:
                self.player_model_type = 'linear'
                self.dealer_model_type = 'linear'
                self.player_model = LinearRegression()
                self.dealer_model = LinearRegression()
    
    def _train_models(self, train_file):
        """Train models using data from train.csv"""
        try:
            data = pd.read_csv(train_file, header=[0, 1, 2])
            player_spy = data[(f'table_{self.table_index}', 'player', 'spy')].values
            dealer_spy = data[(f'table_{self.table_index}', 'dealer', 'spy')].values
            X_player, y_player = self._create_sequence_data(player_spy)
            X_dealer, y_dealer = self._create_sequence_data(dealer_spy)
            player_model_info = self._select_and_train_best_model(X_player, y_player, 'player')
            dealer_model_info = self._select_and_train_best_model(X_dealer, y_dealer, 'dealer')
            self.player_model = player_model_info['model']
            self.player_model_type = player_model_info['type']
            self.player_scaler = player_model_info.get('scaler')
            self.dealer_model = dealer_model_info['model']
            self.dealer_model_type = dealer_model_info['type']
            self.dealer_scaler = dealer_model_info.get('scaler')
            self._save_models()
        except Exception as e:
            print(f"Error during training: {e}")
            self.player_model_type = 'linear'
            self.dealer_model_type = 'linear'
            self.player_model = LinearRegression()
            self.dealer_model = LinearRegression()
    
    def _create_sequence_data(self, series, lag=5):
        """Create sequence data with lag features for time series prediction"""
        X, y = [], []
        for i in range(lag, len(series)):
            X.append(series[i-lag:i])
            y.append(series[i])
        return np.array(X), np.array(y)
    
    def _select_and_train_best_model(self, X, y, role):
        """Select and train the best model for the data based on simple validation"""
        if len(X) < 20:
            model = LinearRegression()
            model.fit(X, y)
            return {'model': model, 'type': 'linear'}
        split = int(0.8 * len(X))
        X_train, X_val = X[:split], X[split:]
        y_train, y_val = y[:split], y[split:]
        models = {
            'linear': LinearRegression(),
            'forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'nn': None
        }
        
        best_mse = float('inf')
        best_model_info = None
        for model_type, model in models.items():
            if model_type == 'nn':
                continue
                
            model.fit(X_train, y_train)
            y_pred = model.predict(X_val)
            mse = np.mean((y_val - y_pred) ** 2)
            
            if mse < best_mse:
                best_mse = mse
                best_model_info = {'model': model, 'type': model_type}
        if len(X_train) > 50:
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_val_scaled = scaler.transform(X_val)
            X_train_tensor = torch.FloatTensor(X_train_scaled)
            y_train_tensor = torch.FloatTensor(y_train.reshape(-1, 1))
            X_val_tensor = torch.FloatTensor(X_val_scaled)
            model = SimpleNN(X_train.shape[1])
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.01)
            epochs = 200
            for epoch in range(epochs):
                outputs = model(X_train_tensor)
                loss = criterion(outputs, y_train_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            model.eval()
            with torch.no_grad():
                y_pred = model(X_val_tensor).numpy().flatten()
            mse = np.mean((y_val - y_pred) ** 2)
            if mse < best_mse:
                best_mse = mse
                best_model_info = {'model': model, 'type': 'nn', 'scaler': scaler}
        if best_model_info['type'] == 'nn':
            X_scaled = best_model_info['scaler'].fit_transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            y_tensor = torch.FloatTensor(y.reshape(-1, 1))
            model = SimpleNN(X.shape[1])
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.01)
            epochs = 300
            for epoch in range(epochs):
                outputs = model(X_tensor)
                loss = criterion(outputs, y_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            best_model_info['model'] = model
        else:
            best_model_info['model'].fit(X, y)
        
        return best_model_info
    
    def _save_models(self):
        """Save trained models to disk"""
        with open(f'player_model_table_{self.table_index}.pkl', 'wb') as f:
            pickle.dump(self.player_model, f)
        with open(f'player_type_table_{self.table_index}.txt', 'w') as f:
            f.write(self.player_model_type)
        if self.player_scaler is not None:
            with open(f'player_scaler_table_{self.table_index}.pkl', 'wb') as f:
                pickle.dump(self.player_scaler, f)
        with open(f'dealer_model_table_{self.table_index}.pkl', 'wb') as f:
            pickle.dump(self.dealer_model, f)
        with open(f'dealer_type_table_{self.table_index}.txt', 'w') as f:
            f.write(self.dealer_model_type)
        if self.dealer_scaler is not None:
            with open(f'dealer_scaler_table_{self.table_index}.pkl', 'wb') as f:
                pickle.dump(self.dealer_scaler, f)
    
    def get_card_value_from_spy_value(self, value):
        """
        value: a value from the spy series as a float
        Output: return a scalar value of the prediction
        """
        value += 100
        value += 2.5
        value = math.trunc(value)
        value = value % 10
        if value <= 1:
            value += 10
        return value
    
    def get_player_spy_prediction(self, hist):
        """
        hist: a 1D numpy array of size (len_history,) len_history=5
        Output: return a scalar value of the prediction
        """
        X = hist.reshape(1, -1)
        if self.player_model_type == 'nn':
            X_scaled = self.player_scaler.transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            self.player_model.eval()
            with torch.no_grad():
                prediction = self.player_model(X_tensor).item()
        else:
            prediction = self.player_model.predict(X)[0]
        
        return prediction
    
    def get_dealer_spy_prediction(self, hist):
        """
        hist: a 1D numpy array of size (len_history,) len_history=5
        Output: return a scalar value of the prediction
        """
        X = hist.reshape(1, -1)
        if self.dealer_model_type == 'nn':
            X_scaled = self.dealer_scaler.transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            self.dealer_model.eval()
            with torch.no_grad():
                prediction = self.dealer_model(X_tensor).item()
        else:
            prediction = self.dealer_model.predict(X)[0] 
        return prediction

## Metric Chosen
For modeling both the player and dealer series, we optimized for Mean Squared Error (MSE). 
MSE is an appropriate choice for this time series prediction task because:
1. It heavily penalizes large prediction errors
2. It's differentiable, making it suitable for gradient-based optimization in our neural network models
3. It directly measures the prediction accuracy our models need for the spy value forecasting task

## Model Architecture Chosen
We implemented a model selection approach that evaluates three different architectures:
Linear Regression: A simple baseline model
Random Forest Regressor: A more complex, ensemble-based model
Neural Network: A simple feedforward network with one hidden layer

Our selection process uses validation-based performance to choose the best model for each series.

## Feature Engineering
The primary feature engineering technique we employed is sequence-based features through lag transformation.
This approach:
1. Uses the previous 5 values (lag=5) to predict the next value
2. Creates a sliding window over the time series
3. Transforms the original univariate time series into a supervised learning problem

## Results:
| Table Index | Player MSE           | Dealer MSE           |
|------------|----------------------|----------------------|
| 0          | 0.01434972237259315  | 0.089254206383908   |
| 1          | 0.015573171871968152 | 0.014661734169542386 |
| 2          | 147.19203721635256   | 0.04285106797249309  |
| 3          | 43.656949538811936   | 0.024870984553638842 |
| 4          | 14.226120387807345   | 0.0015798439714928886 |

## Note:
We tried LSTMs and `xdboost` but they were really time-consuming and hence not providing satisfactory results. Our current model doesn't provide the best results but we had to because of the accuracy and time trade-off.

# Part 4

Using our model from Part 3, we were able to get the next turn "player_total" and "dealer_total" and make decisions on it.

When player's turn, for any dealer's strategy, the player can 'hit' if its next turn predicted value is <= 21, else 'stand'.

When dealer's turn, for any dealer's strategy, the player can 'continue' if we predict that next turn dealer goes 'bust', otherwise if the dealer's next turn value is greater than our total, then the player can 'surrender' to avoid losing the entire money at the end of game.

Now in this part, we know the dealer's strategy in advance. So specifically for that we added a condition that if current dealer_total is > 16 (dealer will remain 'stand' till the end of game), we say that the player should continue to play till the end of game, otherwise the player should surrender.

Since our predicted next turn total values seemed quite reliable, we used them in our strategy to make decisions.

## Code:

In [7]:
import numpy as np
import copy
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
from sklearn import metrics as metrics
import pandas as pd
import math
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import pickle
import os

def data_loader(path, table_idx, player_or_dealer):
    """Loads the data for that table index."""
    data = pd.read_csv(path, header=[0,1,2])
    spy = data[(f'table_{table_idx}', player_or_dealer, 'spy')]
    card = data[(f'table_{table_idx}', player_or_dealer, 'card')]
    return np.array([spy, card]).T

class SimpleNN(nn.Module):
    """Simple NN Architecture"""
    def __init__(self, input_size, hidden_size=16):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )
    
    def forward(self, x):
        return self.model(x)

class MyPlayer:
    def __init__(self, table_index):
        self.table_index = table_index
        self.player_model = None
        self.dealer_model = None
        self.player_model_type = None
        self.dealer_model_type = None
        self.player_scaler = None
        self.dealer_scaler = None
        self._load_or_train_models()
    
    def _load_or_train_models(self):
        """Load pre-trained models or train new ones"""
        train_file = 'train.csv' 
        player_model_path = f'player_model_table_{self.table_index}.pkl'
        dealer_model_path = f'dealer_model_table_{self.table_index}.pkl'
        player_scaler_path = f'player_scaler_table_{self.table_index}.pkl'
        dealer_scaler_path = f'dealer_scaler_table_{self.table_index}.pkl'
        player_type_path = f'player_type_table_{self.table_index}.txt'
        dealer_type_path = f'dealer_type_table_{self.table_index}.txt'
        if os.path.exists(player_model_path) and os.path.exists(dealer_model_path):
            with open(player_model_path, 'rb') as f:
                self.player_model = pickle.load(f)
            with open(player_type_path, 'r') as f:
                self.player_model_type = f.read().strip()
            if os.path.exists(player_scaler_path):
                with open(player_scaler_path, 'rb') as f:
                    self.player_scaler = pickle.load(f)
            with open(dealer_model_path, 'rb') as f:
                self.dealer_model = pickle.load(f)
            with open(dealer_type_path, 'r') as f:
                self.dealer_model_type = f.read().strip()
            if os.path.exists(dealer_scaler_path):
                with open(dealer_scaler_path, 'rb') as f:
                    self.dealer_scaler = pickle.load(f)
        else:
            if os.path.exists(train_file):
                self._train_models(train_file)
            else:
                self.player_model_type = 'linear'
                self.dealer_model_type = 'linear'
                self.player_model = LinearRegression()
                self.dealer_model = LinearRegression()
    
    def _train_models(self, train_file):
        """Train models using data from train.csv"""
        try:
            data = pd.read_csv(train_file, header=[0, 1, 2])
            player_spy = data[(f'table_{self.table_index}', 'player', 'spy')].values
            dealer_spy = data[(f'table_{self.table_index}', 'dealer', 'spy')].values
            X_player, y_player = self._create_sequence_data(player_spy)
            X_dealer, y_dealer = self._create_sequence_data(dealer_spy)
            player_model_info = self._select_and_train_best_model(X_player, y_player, 'player')
            dealer_model_info = self._select_and_train_best_model(X_dealer, y_dealer, 'dealer')
            self.player_model = player_model_info['model']
            self.player_model_type = player_model_info['type']
            self.player_scaler = player_model_info.get('scaler')
            self.dealer_model = dealer_model_info['model']
            self.dealer_model_type = dealer_model_info['type']
            self.dealer_scaler = dealer_model_info.get('scaler')
            self._save_models()
        except Exception as e:
            print(f"Error during training: {e}")
            self.player_model_type = 'linear'
            self.dealer_model_type = 'linear'
            self.player_model = LinearRegression()
            self.dealer_model = LinearRegression()
    
    def _create_sequence_data(self, series, lag=5):
        """Create sequence data with lag features for time series prediction"""
        X, y = [], []
        for i in range(lag, len(series)):
            X.append(series[i-lag:i])
            y.append(series[i])
        return np.array(X), np.array(y)
    
    def _select_and_train_best_model(self, X, y, role):
        """Select and train the best model for the data based on simple validation"""
        if len(X) < 20:
            model = LinearRegression()
            model.fit(X, y)
            return {'model': model, 'type': 'linear'}
        split = int(0.8 * len(X))
        X_train, X_val = X[:split], X[split:]
        y_train, y_val = y[:split], y[split:]
        models = {
            'linear': LinearRegression(),
            'forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'nn': None
        }
        
        best_mse = float('inf')
        best_model_info = None
        for model_type, model in models.items():
            if model_type == 'nn':
                continue
                
            model.fit(X_train, y_train)
            y_pred = model.predict(X_val)
            mse = np.mean((y_val - y_pred) ** 2)
            
            if mse < best_mse:
                best_mse = mse
                best_model_info = {'model': model, 'type': model_type}
        if len(X_train) > 50:
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_val_scaled = scaler.transform(X_val)
            X_train_tensor = torch.FloatTensor(X_train_scaled)
            y_train_tensor = torch.FloatTensor(y_train.reshape(-1, 1))
            X_val_tensor = torch.FloatTensor(X_val_scaled)
            model = SimpleNN(X_train.shape[1])
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.01)
            epochs = 200
            for epoch in range(epochs):
                outputs = model(X_train_tensor)
                loss = criterion(outputs, y_train_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            model.eval()
            with torch.no_grad():
                y_pred = model(X_val_tensor).numpy().flatten()
            mse = np.mean((y_val - y_pred) ** 2)
            if mse < best_mse:
                best_mse = mse
                best_model_info = {'model': model, 'type': 'nn', 'scaler': scaler}
        if best_model_info['type'] == 'nn':
            X_scaled = best_model_info['scaler'].fit_transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            y_tensor = torch.FloatTensor(y.reshape(-1, 1))
            model = SimpleNN(X.shape[1])
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.01)
            epochs = 300
            for epoch in range(epochs):
                outputs = model(X_tensor)
                loss = criterion(outputs, y_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            best_model_info['model'] = model
        else:
            best_model_info['model'].fit(X, y)
        
        return best_model_info
    
    def _save_models(self):
        """Save trained models to disk"""
        with open(f'player_model_table_{self.table_index}.pkl', 'wb') as f:
            pickle.dump(self.player_model, f)
        with open(f'player_type_table_{self.table_index}.txt', 'w') as f:
            f.write(self.player_model_type)
        if self.player_scaler is not None:
            with open(f'player_scaler_table_{self.table_index}.pkl', 'wb') as f:
                pickle.dump(self.player_scaler, f)
        with open(f'dealer_model_table_{self.table_index}.pkl', 'wb') as f:
            pickle.dump(self.dealer_model, f)
        with open(f'dealer_type_table_{self.table_index}.txt', 'w') as f:
            f.write(self.dealer_model_type)
        if self.dealer_scaler is not None:
            with open(f'dealer_scaler_table_{self.table_index}.pkl', 'wb') as f:
                pickle.dump(self.dealer_scaler, f)
    
    def get_card_value_from_spy_value(self, value):
        """
        value: a value from the spy series as a float
        Output: return a scalar value of the prediction
        """
        value += 100
        value += 2.5
        value = math.trunc(value)
        value = value % 10
        if value <= 1:
            value += 10
        return value
    
    def get_player_spy_prediction(self, hist):
        """
        hist: a 1D numpy array of size (len_history,) len_history=5
        Output: return a scalar value of the prediction
        """
        X = hist.reshape(1, -1)
        if self.player_model_type == 'nn':
            X_scaled = self.player_scaler.transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            self.player_model.eval()
            with torch.no_grad():
                prediction = self.player_model(X_tensor).item()
        else:
            prediction = self.player_model.predict(X)[0]
        
        return prediction
    
    def get_dealer_spy_prediction(self, hist):
        """
        hist: a 1D numpy array of size (len_history,) len_history=5
        Output: return a scalar value of the prediction
        """
        X = hist.reshape(1, -1)
        if self.dealer_model_type == 'nn':
            X_scaled = self.dealer_scaler.transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            self.dealer_model.eval()
            with torch.no_grad():
                prediction = self.dealer_model(X_tensor).item()
        else:
            prediction = self.dealer_model.predict(X)[0] 
        return prediction
    def get_player_action(self,
                        curr_spy_history_player, 
                        curr_spy_history_dealer, 
                        curr_card_history_player, 
                        curr_card_history_dealer, 
                        curr_player_total, 
                        curr_dealer_total, 
                        turn,
                        game_index,
                        ):
        """
        Arguments:
        curr_spy_history_player: list -> real number spy value series of player observed upto this point
        curr_spy_history_dealer: list -> real number spy value series of dealer observed upto this point
        curr_card_history_player: list -> integer series of player denoting value of cards observed upto this point
        curr_card_history_dealer: list -> integer series of dealer denoting value of cards observed upto this point
        curr_player_total: integer score of player
        curr_dealer_total: integer score of dealer
        turn: string -> either "player" or "dealer" denoting if its the player drawing right now or the dealer opening her cards
        game_index: integer -> tells which game is going on. Can be useful to figure if a new game has started

        Note that correspopding series of card and spy values are of the same length

        The body is random for now, rewrite accordingly

        Output:
            if turn=="player" output either string "hit" or "stand" based on your decision
            else if turn=="dealer" output either string "surrender" or "continue" based on your decision
        """
        player_spy_prediction = 0
        if len(curr_spy_history_player) < 5:
            # we should concatenate 0 at the beginning to make it 5
            curr_spy_history_dealer = [0]*(5-len(curr_spy_history_player)) + curr_spy_history_player
        player_spy_prediction = self.get_player_spy_prediction(np.array(curr_spy_history_player[-5:]))
        player_card_prediction = self.get_card_value_from_spy_value(player_spy_prediction)
        modified_player_total = curr_player_total + player_card_prediction
        dealer_spy_prediction = 0
        if len(curr_spy_history_dealer) < 5:
            curr_spy_history_dealer = [0]*(5-len(curr_spy_history_dealer)) + curr_spy_history_dealer
        dealer_spy_prediction = self.get_dealer_spy_prediction(np.array(curr_spy_history_dealer[-5:]))
        dealer_card_prediction = self.get_card_value_from_spy_value(dealer_spy_prediction)
        modified_dealer_total = curr_dealer_total + dealer_card_prediction
        if turn=='player':
            if modified_player_total <= 21:
                return "hit"
            return "stand"
        else:
            if curr_dealer_total > 16:
                if curr_player_total >= curr_dealer_total:
                    return "continue"
                return "surrender"
            if modified_dealer_total > 21:
                return "continue"
            if modified_dealer_total > curr_player_total:
                return "surrender"
            return "continue"

## Note:
We also tried techniques like using replicating the dealer's strategy as player's strategy but we found that it resulted in loss since the player had to always play first.

## Results:
| Table Index | Earnings |
|------------|----------|
| 0          | 498.0    |
| 1          | 244.5    |
| 2          | 272.5    |
| 3          | -138.0   |
| 4          | -284.5   |

We see that the table index 4 is not so profitable as compared to other tables, as per our strategy.

# Part 5

Here we do not the strategy of the dealer in advance. So we have to make decisions to take care of as many rational possibilities as possible.

When player's turn, for any dealer's strategy, the player can 'hit' if its next turn predicted value is <= 21, else 'stand'.

When dealer's turn, for any dealer's strategy, the player can 'continue' if we predict that next turn dealer goes 'bust', otherwise if the dealer's next turn value is greater than our total, then the player can 'surrender' to avoid losing the entire money at the end of game.

In [None]:
import numpy as np
import copy
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
from sklearn import metrics as metrics
import pandas as pd
import math
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import pickle
import os

def data_loader(path, table_idx, player_or_dealer):
    """Loads the data for that table index."""
    data = pd.read_csv(path, header=[0,1,2])
    spy = data[(f'table_{table_idx}', player_or_dealer, 'spy')]
    card = data[(f'table_{table_idx}', player_or_dealer, 'card')]
    return np.array([spy, card]).T

class SimpleNN(nn.Module):
    """Simple NN Architecture"""
    def __init__(self, input_size, hidden_size=16):
        super(SimpleNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1)
        )
    
    def forward(self, x):
        return self.model(x)

class MyPlayer:
    def __init__(self, table_index):
        self.table_index = table_index
        self.player_model = None
        self.dealer_model = None
        self.player_model_type = None
        self.dealer_model_type = None
        self.player_scaler = None
        self.dealer_scaler = None
        self._load_or_train_models()
    
    def _load_or_train_models(self):
        """Load pre-trained models or train new ones"""
        train_file = 'train.csv' 
        player_model_path = f'player_model_table_{self.table_index}.pkl'
        dealer_model_path = f'dealer_model_table_{self.table_index}.pkl'
        player_scaler_path = f'player_scaler_table_{self.table_index}.pkl'
        dealer_scaler_path = f'dealer_scaler_table_{self.table_index}.pkl'
        player_type_path = f'player_type_table_{self.table_index}.txt'
        dealer_type_path = f'dealer_type_table_{self.table_index}.txt'
        if os.path.exists(player_model_path) and os.path.exists(dealer_model_path):
            with open(player_model_path, 'rb') as f:
                self.player_model = pickle.load(f)
            with open(player_type_path, 'r') as f:
                self.player_model_type = f.read().strip()
            if os.path.exists(player_scaler_path):
                with open(player_scaler_path, 'rb') as f:
                    self.player_scaler = pickle.load(f)
            with open(dealer_model_path, 'rb') as f:
                self.dealer_model = pickle.load(f)
            with open(dealer_type_path, 'r') as f:
                self.dealer_model_type = f.read().strip()
            if os.path.exists(dealer_scaler_path):
                with open(dealer_scaler_path, 'rb') as f:
                    self.dealer_scaler = pickle.load(f)
        else:
            if os.path.exists(train_file):
                self._train_models(train_file)
            else:
                self.player_model_type = 'linear'
                self.dealer_model_type = 'linear'
                self.player_model = LinearRegression()
                self.dealer_model = LinearRegression()
    
    def _train_models(self, train_file):
        """Train models using data from train.csv"""
        try:
            data = pd.read_csv(train_file, header=[0, 1, 2])
            player_spy = data[(f'table_{self.table_index}', 'player', 'spy')].values
            dealer_spy = data[(f'table_{self.table_index}', 'dealer', 'spy')].values
            X_player, y_player = self._create_sequence_data(player_spy)
            X_dealer, y_dealer = self._create_sequence_data(dealer_spy)
            player_model_info = self._select_and_train_best_model(X_player, y_player, 'player')
            dealer_model_info = self._select_and_train_best_model(X_dealer, y_dealer, 'dealer')
            self.player_model = player_model_info['model']
            self.player_model_type = player_model_info['type']
            self.player_scaler = player_model_info.get('scaler')
            self.dealer_model = dealer_model_info['model']
            self.dealer_model_type = dealer_model_info['type']
            self.dealer_scaler = dealer_model_info.get('scaler')
            self._save_models()
        except Exception as e:
            print(f"Error during training: {e}")
            self.player_model_type = 'linear'
            self.dealer_model_type = 'linear'
            self.player_model = LinearRegression()
            self.dealer_model = LinearRegression()
    
    def _create_sequence_data(self, series, lag=5):
        """Create sequence data with lag features for time series prediction"""
        X, y = [], []
        for i in range(lag, len(series)):
            X.append(series[i-lag:i])
            y.append(series[i])
        return np.array(X), np.array(y)
    
    def _select_and_train_best_model(self, X, y, role):
        """Select and train the best model for the data based on simple validation"""
        if len(X) < 20:
            model = LinearRegression()
            model.fit(X, y)
            return {'model': model, 'type': 'linear'}
        split = int(0.8 * len(X))
        X_train, X_val = X[:split], X[split:]
        y_train, y_val = y[:split], y[split:]
        models = {
            'linear': LinearRegression(),
            'forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'nn': None
        }
        
        best_mse = float('inf')
        best_model_info = None
        for model_type, model in models.items():
            if model_type == 'nn':
                continue
                
            model.fit(X_train, y_train)
            y_pred = model.predict(X_val)
            mse = np.mean((y_val - y_pred) ** 2)
            
            if mse < best_mse:
                best_mse = mse
                best_model_info = {'model': model, 'type': model_type}
        if len(X_train) > 50:
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_val_scaled = scaler.transform(X_val)
            X_train_tensor = torch.FloatTensor(X_train_scaled)
            y_train_tensor = torch.FloatTensor(y_train.reshape(-1, 1))
            X_val_tensor = torch.FloatTensor(X_val_scaled)
            model = SimpleNN(X_train.shape[1])
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.01)
            epochs = 200
            for epoch in range(epochs):
                outputs = model(X_train_tensor)
                loss = criterion(outputs, y_train_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            model.eval()
            with torch.no_grad():
                y_pred = model(X_val_tensor).numpy().flatten()
            mse = np.mean((y_val - y_pred) ** 2)
            if mse < best_mse:
                best_mse = mse
                best_model_info = {'model': model, 'type': 'nn', 'scaler': scaler}
        if best_model_info['type'] == 'nn':
            X_scaled = best_model_info['scaler'].fit_transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            y_tensor = torch.FloatTensor(y.reshape(-1, 1))
            model = SimpleNN(X.shape[1])
            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.01)
            epochs = 300
            for epoch in range(epochs):
                outputs = model(X_tensor)
                loss = criterion(outputs, y_tensor)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            best_model_info['model'] = model
        else:
            best_model_info['model'].fit(X, y)
        
        return best_model_info
    
    def _save_models(self):
        """Save trained models to disk"""
        with open(f'player_model_table_{self.table_index}.pkl', 'wb') as f:
            pickle.dump(self.player_model, f)
        with open(f'player_type_table_{self.table_index}.txt', 'w') as f:
            f.write(self.player_model_type)
        if self.player_scaler is not None:
            with open(f'player_scaler_table_{self.table_index}.pkl', 'wb') as f:
                pickle.dump(self.player_scaler, f)
        with open(f'dealer_model_table_{self.table_index}.pkl', 'wb') as f:
            pickle.dump(self.dealer_model, f)
        with open(f'dealer_type_table_{self.table_index}.txt', 'w') as f:
            f.write(self.dealer_model_type)
        if self.dealer_scaler is not None:
            with open(f'dealer_scaler_table_{self.table_index}.pkl', 'wb') as f:
                pickle.dump(self.dealer_scaler, f)
    
    def get_card_value_from_spy_value(self, value):
        """
        value: a value from the spy series as a float
        Output: return a scalar value of the prediction
        """
        value += 100
        value += 2.5
        value = math.trunc(value)
        value = value % 10
        if value <= 1:
            value += 10
        return value
    
    def get_player_spy_prediction(self, hist):
        """
        hist: a 1D numpy array of size (len_history,) len_history=5
        Output: return a scalar value of the prediction
        """
        X = hist.reshape(1, -1)
        if self.player_model_type == 'nn':
            X_scaled = self.player_scaler.transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            self.player_model.eval()
            with torch.no_grad():
                prediction = self.player_model(X_tensor).item()
        else:
            prediction = self.player_model.predict(X)[0]
        
        return prediction
    
    def get_dealer_spy_prediction(self, hist):
        """
        hist: a 1D numpy array of size (len_history,) len_history=5
        Output: return a scalar value of the prediction
        """
        X = hist.reshape(1, -1)
        if self.dealer_model_type == 'nn':
            X_scaled = self.dealer_scaler.transform(X)
            X_tensor = torch.FloatTensor(X_scaled)
            self.dealer_model.eval()
            with torch.no_grad():
                prediction = self.dealer_model(X_tensor).item()
        else:
            prediction = self.dealer_model.predict(X)[0] 
        return prediction
    def get_player_action(self,
                        curr_spy_history_player, 
                        curr_spy_history_dealer, 
                        curr_card_history_player, 
                        curr_card_history_dealer, 
                        curr_player_total, 
                        curr_dealer_total, 
                        turn,
                        game_index,
                        ):
        """
        Arguments:
        curr_spy_history_player: list -> real number spy value series of player observed upto this point
        curr_spy_history_dealer: list -> real number spy value series of dealer observed upto this point
        curr_card_history_player: list -> integer series of player denoting value of cards observed upto this point
        curr_card_history_dealer: list -> integer series of dealer denoting value of cards observed upto this point
        curr_player_total: integer score of player
        curr_dealer_total: integer score of dealer
        turn: string -> either "player" or "dealer" denoting if its the player drawing right now or the dealer opening her cards
        game_index: integer -> tells which game is going on. Can be useful to figure if a new game has started

        Note that correspopding series of card and spy values are of the same length

        The body is random for now, rewrite accordingly

        Output:
            if turn=="player" output either string "hit" or "stand" based on your decision
            else if turn=="dealer" output either string "surrender" or "continue" based on your decision
        """
        player_spy_prediction = 0
        if len(curr_spy_history_player) < 5:
            # we should concatenate 0 at the beginning to make it 5
            curr_spy_history_dealer = [0]*(5-len(curr_spy_history_player)) + curr_spy_history_player
        player_spy_prediction = self.get_player_spy_prediction(np.array(curr_spy_history_player[-5:]))
        player_card_prediction = self.get_card_value_from_spy_value(player_spy_prediction)
        modified_player_total = curr_player_total + player_card_prediction
        dealer_spy_prediction = 0
        if len(curr_spy_history_dealer) < 5:
            curr_spy_history_dealer = [0]*(5-len(curr_spy_history_dealer)) + curr_spy_history_dealer
        dealer_spy_prediction = self.get_dealer_spy_prediction(np.array(curr_spy_history_dealer[-5:]))
        dealer_card_prediction = self.get_card_value_from_spy_value(dealer_spy_prediction)
        modified_dealer_total = curr_dealer_total + dealer_card_prediction
        if turn=='player':
            if modified_player_total <= 21:
                return "hit"
            return "stand"
        else:
            if modified_dealer_total > 21:
                return "continue"
            if modified_dealer_total > curr_player_total:
                return "surrender"
            return "continue"

## Results:
| Table Index | Earnings |
|------------|----------|
| 0          | 104.0    |
| 1          | 70.5     |
| 2          | 54.0     |
| 3          | -18.5    |
| 4          | -84.0    |

We see that the table index 4 was not so profitable as the first ones.

# Part 6

Here we used similar startegy as in Part 5. We used correlation to find lnear shift between the spy values. High correlation means that the spy values are likely linearly shifted. Hence we could use this information to predict the next spy value.

In [None]:
import numpy as np
import copy
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
from sklearn import metrics as metrics
import pandas as pd
import math

def data_loader(path, table_idx, player_or_dealer):
    data = pd.read_csv(path, header=[0,1,2])
    spy = data[(f'table_{table_idx}', player_or_dealer, 'spy')]
    card = data[(f'table_{table_idx}', player_or_dealer, 'card')]
    return np.array([spy, card]).T

def sicily_data_loader(path, player_or_dealer):
    data = pd.read_csv(path, header=[0,1,2])
    spy = data[(f'special_table', player_or_dealer, 'spy')]
    card = data[(f'special_table', player_or_dealer, 'card')]
    return np.array([spy, card]).T

class MyPlayerMulti:
    def __init__(self, lag=5):
        self.lag = lag
        self.player_models = {}
        self.dealer_models = {}
        self.game_history = {}
        self.table_correlations = {}
        self.prediction_history = {}
        
    def get_card_value_from_spy_value(self, value):
        """Convert spy value to card value using the given formula"""
        value += 100
        value += 2.5
        value = math.trunc(value)
        value = value % 10
        if value <= 1:
            value += 10
        return value
    
    def choose_tables(self):
        """
        Return the indices of the tables to play on.
        The Sicilian table will be appended to this list automatically.
        
        Based on preliminary analysis, tables 0 and 1 seem to have useful patterns
        that might correlate with the Sicilian table.
        """
        return [0, 1]
    
    def _analyze_correlations(self, list_spy_history_player, list_spy_history_dealer):
        """Analyze correlations between table spy values to detect patterns"""
        if any(len(hist) < self.lag for hist in list_spy_history_player):
            return
        sicily_idx = len(list_spy_history_player) - 1
        for table_idx in range(sicily_idx):
            for shift in range(1, min(len(list_spy_history_player[table_idx]), 10)):
                if len(list_spy_history_player[table_idx]) >= shift + self.lag and len(list_spy_history_player[sicily_idx]) >= self.lag:
                    shifted_player = list_spy_history_player[table_idx][-(shift+self.lag):-shift]
                    sicily_player = list_spy_history_player[sicily_idx][-self.lag:]
                    try:
                        correlation = np.corrcoef(shifted_player, sicily_player)[0, 1]
                        if abs(correlation) > 0.7:
                            key = f"player_{table_idx}_sicily"
                            self.table_correlations[key] = {
                                'shift': shift,
                                'correlation': correlation
                            }
                    except:
                        pass
                if len(list_spy_history_dealer[table_idx]) >= shift + self.lag and len(list_spy_history_dealer[sicily_idx]) >= self.lag:
                    shifted_dealer = list_spy_history_dealer[table_idx][-(shift+self.lag):-shift]
                    sicily_dealer = list_spy_history_dealer[sicily_idx][-self.lag:]
                    try:
                        correlation = np.corrcoef(shifted_dealer, sicily_dealer)[0, 1]
                        if abs(correlation) > 0.7:
                            key = f"dealer_{table_idx}_sicily"
                            self.table_correlations[key] = {
                                'shift': shift,
                                'correlation': correlation
                            }
                    except:
                        pass
    
    def _predict_next_card(self, spy_history, player_or_dealer, table_idx):
        """Predict the next card based on spy history"""
        if len(spy_history) < self.lag:
            return self.get_card_value_from_spy_value(np.mean(spy_history))
        sicily_idx = len(self.choose_tables())
        if table_idx == sicily_idx:
            # print("CHECK", self.table_correlations)
            for corr_table_idx in range(sicily_idx):
                key = f"{player_or_dealer}_{corr_table_idx}_sicily"
                if key in self.table_correlations:
                    corr_info = self.table_correlations[key]
                    shift = corr_info['shift']
                    if player_or_dealer == 'player':
                        correlated_spy_histories = self.current_spy_histories_player
                    else:
                        correlated_spy_histories = self.current_spy_histories_dealer
                    if corr_table_idx < len(correlated_spy_histories) and len(correlated_spy_histories[corr_table_idx]) > shift:
                        correlated_spy = correlated_spy_histories[corr_table_idx][-shift]
                        return self.get_card_value_from_spy_value(correlated_spy)
        recent_spy = spy_history[-self.lag:]
        if len(recent_spy) > 1:
            slope = (recent_spy[-1] - recent_spy[0]) / (len(recent_spy) - 1)
            next_spy = recent_spy[-1] + slope
        else:
            next_spy = recent_spy[-1]
        return self.get_card_value_from_spy_value(next_spy)
    
    def _make_decision(self, player_total, dealer_total, predicted_next_card, turn, table_idx):
        """Make a strategic decision based on current totals and predicted next card"""
        
        if turn == 'player':
            next_total = player_total + predicted_next_card
            
            if table_idx == len(self.choose_tables()):
                has_strong_correlation = any(
                    "sicily" in key and abs(info['correlation']) > 0.8 
                    for key, info in self.table_correlations.items()
                )
                if has_strong_correlation and next_total <= 21:
                    return "hit"
            if next_total <= 21:
                return "hit"
            return "stand"
        
        else:  
            if table_idx != len(self.choose_tables()):
                curr_dealer_total = dealer_total
                modified_dealer_total = dealer_total + predicted_next_card
                curr_player_total = player_total
                if curr_dealer_total > 16:
                    if curr_player_total >= curr_dealer_total:
                        return "continue"
                    return "surrender"
                if modified_dealer_total > 21:
                    return "continue"
                if modified_dealer_total > curr_player_total:
                    return "surrender"
                return "continue"
            else:
                return "continue"
    def get_player_action_multi(self,
                            list_curr_spy_history_player, 
                            list_curr_spy_history_dealer,
                            list_curr_card_history_player, 
                            list_curr_card_history_dealer, 
                            list_curr_player_total, 
                            list_curr_dealer_total, 
                            turn, 
                            active_tables,
                            game_index,
                            ):
        """
        Make decisions for all tables simultaneously.
        
        Returns a list of actions for each table:
        - For player's turn: "hit" or "stand"
        - For dealer's turn: "continue" or "surrender"
        """
        self.current_spy_histories_player = list_curr_spy_history_player
        self.current_spy_histories_dealer = list_curr_spy_history_dealer
        self._analyze_correlations(list_curr_spy_history_player, list_curr_spy_history_dealer)
        actions = []
        for i, active in enumerate(active_tables):
            if not active:
                actions.append("stand" if turn == "player" else "continue")
                continue
            player_spy_history = list_curr_spy_history_player[i]
            dealer_spy_history = list_curr_spy_history_dealer[i]
            player_total = list_curr_player_total[i]
            dealer_total = list_curr_dealer_total[i]
            if turn == "player":
                predicted_card = self._predict_next_card(player_spy_history, "player", i)
            else:
                predicted_card = self._predict_next_card(dealer_spy_history, "dealer", i)
            action = self._make_decision(player_total, dealer_total, predicted_card, turn, i)
            actions.append(action)
        return actions

## Results:
We got a payoff of **+21.5** using our given strategy, which is decently good.