In [5]:
import json
import numpy as np
import pandas as pd
import gym
from gym import spaces
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv


class TaxOptimizationEnv(gym.Env):
    def __init__(self, data, deduction_policy):
        super(TaxOptimizationEnv, self).__init__()
        self.data = data.sample(frac=1).reset_index(drop=True)  # Shuffle data
        self.deduction_policy = deduction_policy
        self.current_index = 0  # Track transaction index
        
        # Initialize deduction limits
        self.deduction_limits = {rule["category"]: rule["max_limit"] for rule in deduction_policy["deductions"]}
        
        # Action space: 0 = Don't deduct, 1 = Deduct
        self.action_space = spaces.Discrete(2)
        
        # Observation space: 5 features + deduction limits
        self.observation_space = spaces.Box(low=0, high=1, shape=(5 + len(self.deduction_limits),), dtype=np.float32)
        
        self.state = self._get_next_transaction()
        self.episode_ended = False

    def _get_next_transaction(self):
        """Fetch the next transaction and convert to a state vector."""
        if self.current_index >= len(self.data):
            self.current_index = 0  # Restart from beginning
            self.data = self.data.sample(frac=1).reset_index(drop=True)  # Shuffle again
        
        row = self.data.iloc[self.current_index]
        self.current_index += 1
        
        # Normalize deduction limits
        deduction_limits = np.array([self.deduction_limits[cat] for cat in self.deduction_limits], dtype=np.float32)
        deduction_limits /= max(deduction_limits)  # Normalize to [0, 1]
        
        return np.concatenate([
            np.array([
                row["amount"],  
                row["category"],  
                row["merchant"],  
                row["payment_method"],  
                row["tax_deductible"]  
            ], dtype=np.float32),
            deduction_limits
        ])

    def reset(self):
        self.state = self._get_next_transaction()
        self.episode_ended = False
        return self.state

    def step(self, action):
        if self.episode_ended:
            return self.reset()

        # Get transaction details
        amount = self.state[0]
        category = self.state[1]
        tax_deductible = self.state[4]
        
        # Get deduction rule for the category
        deduction_rule = next(rule for rule in self.deduction_policy["deductions"] if rule["category"] == category)
        rate = deduction_rule["rate"]
        max_limit = deduction_rule["max_limit"]
        
        # Calculate reward
        if action == 1:  # Deduct
            if tax_deductible == 1 and self.deduction_limits[category] >= amount * rate:
                reward = amount * rate  # Reward = tax savings
                self.deduction_limits[category] -= amount * rate
            else:
                reward = -10  # Penalty for invalid deduction
        else:  # Don't deduct
            reward = 0
        
        self.state = self._get_next_transaction()
        done = False  # In this case, episodes can continue indefinitely
        return self.state, reward, done, {}

This is for indiviual users the code above is just for testing purposes

In [6]:
import json
import numpy as np
import pandas as pd
import gym
from gym import spaces
from sklearn.preprocessing import LabelEncoder
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
# Assuming this import exists in your project
#from tax_report_generator import generate_tax_report
import os

# Custom label encoder that handles unseen values
class SafeLabelEncoder(LabelEncoder):
    def __init__(self):
        super().__init__()
        self.unknown_value = -1
        
    def fit(self, y):
        super().fit(y)
        return self
    
    def transform(self, y):
        try:
            return super().transform(y)
        except ValueError:
            # Handle unseen labels by assigning a default value
            result = np.zeros(len(y), dtype=np.int32)
            for i, val in enumerate(y):
                if val in self.classes_:
                    result[i] = super().transform([val])[0]
                else:
                    result[i] = self.unknown_value
            return result

# Load deduction policy
with open("../../data/deduction_policy.json", "r") as f:
    deduction_policy = json.load(f)

class TaxOptimizationEnv(gym.Env):
    def __init__(self, df, deduction_policy, label_encoders):
        super(TaxOptimizationEnv, self).__init__()
        self.df = df.reset_index(drop=True)
        self.deduction_policy = deduction_policy
        self.label_encoders = label_encoders
        self.current_index = 0
        self.observation_space = spaces.Box(low=-1, high=1, shape=(5,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)

    def reset(self):
        self.current_index = 0
        return self._get_observation()

    def _get_observation(self):
        if self.current_index >= len(self.df):
            return np.zeros(5, dtype=np.float32)
        
        row = self.df.iloc[self.current_index]
        
        # Use safely transformed values
        encoded_category = self.label_encoders.get("category").transform([row["category"]])[0]
        encoded_merchant = self.label_encoders.get("merchant").transform([row["merchant"]])[0]
        encoded_payment = self.label_encoders.get("payment_method").transform([row["payment_method"]])[0]
        
        category_classes = self.label_encoders["category"].classes_
        is_deductible = float(row["category"] in category_classes and any(
            d["category"] == row["category"] for d in self.deduction_policy["deductions"]
        ))
        
        # Handle negative values in normalization
        cat_norm = (encoded_category + 1) / (len(category_classes) + 1) if encoded_category >= 0 else 0
        merch_norm = (encoded_merchant + 1) / (len(self.label_encoders["merchant"].classes_) + 1) if encoded_merchant >= 0 else 0
        payment_norm = (encoded_payment + 1) / (len(self.label_encoders["payment_method"].classes_) + 1) if encoded_payment >= 0 else 0
        
        return np.array([
            cat_norm,
            merch_norm,
            row["amount"],
            payment_norm,
            is_deductible
        ], dtype=np.float32)

    def step(self, action):
        row = self.df.iloc[self.current_index]
        category = row["category"]
        amount = row["amount"]
        
        deduction_rule = next((rule for rule in self.deduction_policy["deductions"] if rule["category"] == category), None)
        reward = min(amount * deduction_rule["rate"], deduction_rule["max_limit"]) if action == 1 and deduction_rule else 0
        
        self.current_index += 1
        done = self.current_index >= len(self.df)
        return self._get_observation(), reward, done, {}

# Load and preprocess initial data
df = pd.read_csv("../../data/freelancer_tax_deductions.csv")
label_encoders = {col: SafeLabelEncoder().fit(df[col]) for col in ["category", "merchant", "payment_method"]}

df["amount"] = (df["amount"] - df["amount"].min()) / (df["amount"].max() - df["amount"].min() + 1e-9)  # Avoid division by zero

# Train model
env = DummyVecEnv([lambda: TaxOptimizationEnv(df, deduction_policy, label_encoders)])
model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)
model.save("tax_savings_model")

def calculate_user_tax_savings(user_id, user_transactions, retrain=False):
    global label_encoders
    df = pd.DataFrame(user_transactions)
    
    # Handle retraining with new data
    if retrain:
        for col in ["category", "merchant", "payment_method"]:
            # Update label encoders with new values
            new_values = np.append(label_encoders[col].classes_, df[col].unique())
            unique_values = np.unique(new_values)
            label_encoders[col] = SafeLabelEncoder().fit(unique_values)
        
        # Normalize amounts
        orig_df = pd.read_csv("../../data/freelancer_tax_deductions.csv")
        combined_df = pd.concat([orig_df, df])
        min_amt = combined_df["amount"].min()
        max_amt = combined_df["amount"].max()
        df["amount"] = (df["amount"] - min_amt) / (max_amt - min_amt + 1e-9)
        
        # Retrain model with expanded data
        env = DummyVecEnv([lambda: TaxOptimizationEnv(df, deduction_policy, label_encoders)])
        model = DQN("MlpPolicy", env, verbose=1)
        model.learn(total_timesteps=5000)
        model.save("tax_savings_model")
    else:
        # Normalize amounts for prediction only
        min_amt = df["amount"].min()
        max_amt = df["amount"].max()
        df["amount"] = (df["amount"] - min_amt) / (max_amt - min_amt + 1e-9)
    
    # Create environment for prediction
    env = DummyVecEnv([lambda: TaxOptimizationEnv(df, deduction_policy, label_encoders)])
    model = DQN.load("tax_savings_model")  
    obs = env.reset()
    total_reward = 0
    
    # Run predictions
    for _ in range(len(user_transactions)):
        action, _states = model.predict(obs)
        obs, reward, done, _ = env.step(action)
        total_reward += reward
        if done:
            break
    
    tax_savings = total_reward * 1000  
    print(f"Total Tax Savings: {tax_savings}")
    #generate_tax_report(user_id, tax_savings)
    return {"user_id": user_id, "tax_savings": tax_savings}




Using cpu device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.24     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 810      |
|    time_elapsed     | 0        |
|    total_timesteps  | 800      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0274   |
|    n_updates        | 174      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 754      |
|    time_elapsed     | 2        |
|    total_timesteps  | 1600     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.00698  |
|    n_updates        | 374      |
----------------------------------
----------------------------------
| rollout/            |          |
|  

In [7]:
new_transactions = [
    {
        "transaction_id": "5280833a-4da9-416e-92f3-713ae32f63b2",
        "user_id": 101,
        "date": "2024-10-29",
        "amount": 7923.67,
        "currency": "EUR",
        "category": "Marketing & Advertising",
        "description": "Google Ads campaign",
        "deduction_rate": 1.0,
        "max_limit": 10000,
        "merchant": "Uber",
        "payment_method": "Bank Transfer",
        "country": "Germany",
        "tax_deductible": True
    },
    {
        "transaction_id": "23d051ef-6839-40f6-bf22-e3b381043aa3",
        "user_id": 101,
        "date": "2024-09-09",
        "amount": 6105.9,
        "currency": "EUR",
        "category": "Work Equipment",
        "description": "Ergonomic keyboard and mouse",
        "deduction_rate": 0.5,
        "max_limit": 5000,
        "merchant": "Microsoft",
        "payment_method": "Bank Transfer",
        "country": "USA",
        "tax_deductible": False
    }
]

result = calculate_user_tax_savings("102", new_transactions, retrain=True)
print(result)

Using cpu device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.985    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1333     |
|    time_elapsed     | 0        |
|    total_timesteps  | 8        |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.97     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1454     |
|    time_elapsed     | 0        |
|    total_timesteps  | 16       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes         | 12       |
|    fps              | 1411     |
|    time_elapsed     | 0        |
|    total_timesteps  | 24       |
----------------------------------
----------------------------------
| r



----------------------------------
| rollout/            |          |
|    exploration_rate | 0.65     |
| time/               |          |
|    episodes         | 92       |
|    fps              | 1101     |
|    time_elapsed     | 0        |
|    total_timesteps  | 184      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0122   |
|    n_updates        | 20       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.635    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 1090     |
|    time_elapsed     | 0        |
|    total_timesteps  | 192      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.0124   |
|    n_updates        | 22       |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rat

