In [19]:
import json
import numpy as np
import pandas as pd
import gym
from gym import spaces
import torch
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

class TaxOptimizationEnv(gym.Env):
    def __init__(self, data, deduction_policy):
        super(TaxOptimizationEnv, self).__init__()
        self.data = data.sample(frac=1).reset_index(drop=True)  # Shuffle data
        self.deduction_policy = deduction_policy
        self.current_index = 0  # Track transaction index
        
        # Initialize deduction limits
        self.deduction_limits = {rule["category"]: rule["max_limit"] for rule in deduction_policy["deductions"]}
        
        # Action space: 0 = Don't deduct, 1 = Deduct
        self.action_space = spaces.Discrete(2)
        
        # Observation space: 5 features + deduction limits
        self.observation_space = spaces.Box(low=0, high=1, shape=(5 + len(self.deduction_limits),), dtype=np.float32)
        
        self.state = self._get_next_transaction()
        self.episode_ended = False

    def _get_next_transaction(self):
        """Fetch the next transaction and convert to a state vector."""
        if self.current_index >= len(self.data):
            self.current_index = 0  # Restart from beginning
            self.data = self.data.sample(frac=1).reset_index(drop=True)  # Shuffle again
        
        row = self.data.iloc[self.current_index]
        self.current_index += 1
        
        # Normalize deduction limits
        deduction_limits = np.array([self.deduction_limits[cat] for cat in self.deduction_limits], dtype=np.float32)
        deduction_limits /= max(deduction_limits)  # Normalize to [0, 1]
        
        return np.concatenate([
            np.array([
                row["amount"],  
                row["category"],  
                row["merchant"],  
                row["payment_method"],  
                row["tax_deductible"]  
            ], dtype=np.float32),
            deduction_limits
        ])

    def reset(self):
        self.state = self._get_next_transaction()
        self.episode_ended = False
        return self.state

    def step(self, action):
        if self.episode_ended:
            return self.reset()

        # Get transaction details
        amount = self.state[0]
        category = self.state[1]
        tax_deductible = self.state[4]
        
        # Get deduction rule for the category
        deduction_rule = next(rule for rule in self.deduction_policy["deductions"] if rule["category"] == category)
        rate = deduction_rule["rate"]
        max_limit = deduction_rule["max_limit"]
        
        # Calculate reward
        if action == 1:  # Deduct
            if tax_deductible == 1 and self.deduction_limits[category] >= amount * rate:
                reward = amount * rate  # Reward = tax savings
                self.deduction_limits[category] -= amount * rate
            else:
                reward = -10  # Penalty for invalid deduction
        else:  # Don't deduct
            reward = 0
        
        self.state = self._get_next_transaction()
        done = False  # In this case, episodes can continue indefinitely
        return self.state, reward, done, {}

In [22]:
import pandas as pd
import json
import numpy as np
import gym
from gym import spaces
from sklearn.preprocessing import LabelEncoder
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

# Load dataset
df = pd.read_csv("../../data/freelancer_tax_deductions.csv")

# Initialize or update LabelEncoders
label_encoders = {}

for col in ["category", "merchant", "payment_method"]:
    if col not in label_encoders:
        label_encoders[col] = LabelEncoder()
    
    # Fit on the entire column (including unseen categories)
    df[col] = label_encoders[col].fit_transform(df[col])

# Normalize amount
df["amount"] = (df["amount"] - df["amount"].min()) / (df["amount"].max() - df["amount"].min())

# Load deduction policy
with open("../../data/deduction_policy.json", "r") as f:
    deduction_policy = json.load(f)

# Environment Class
class TaxOptimizationEnv(gym.Env):
    def __init__(self, df, deduction_policy, label_encoders):
        super(TaxOptimizationEnv, self).__init__()

        self.df = df
        self.deduction_policy = deduction_policy
        self.label_encoders = label_encoders
        self.current_index = 0

        # State space: (category, merchant, amount, payment_method, tax_deductible)
        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)

        # Action space: 0 = Accept Transaction, 1 = Claim Deduction
        self.action_space = spaces.Discrete(2)

    def reset(self):
        self.current_index = 0
        return self._get_observation()

    def _get_observation(self):
        if self.current_index >= len(self.df):
            return np.zeros(5)

        row = self.df.iloc[self.current_index]
        return np.array([
            row["category"] / max(1, len(self.label_encoders["category"].classes_)),  # Normalize category
            row["merchant"] / max(1, len(self.label_encoders["merchant"].classes_)),  # Normalize merchant
            row["amount"],  # Already normalized
            row["payment_method"] / max(1, len(self.label_encoders["payment_method"].classes_)),  # Normalize payment method
            1.0 if row["category"] in [self.label_encoders["category"].transform([d["category"]])[0] for d in self.deduction_policy["deductions"] if d["category"] in self.label_encoders["category"].classes_] else 0.0
        ], dtype=np.float32)

    def step(self, action):
        row = self.df.iloc[self.current_index]
        category = row["category"]
        amount = row["amount"]

        # Try to find a matching category in deduction policy
        deduction_rule = next(
            (rule for rule in self.deduction_policy["deductions"] 
             if rule["category"] in self.label_encoders["category"].classes_ and 
             self.label_encoders["category"].transform([rule["category"]])[0] == category),
            None
        )

        reward = 0
        if deduction_rule:
            rate = deduction_rule["rate"]
            max_limit = deduction_rule["max_limit"]
            reward = min(amount * rate, max_limit) if action == 1 else 0

        self.current_index += 1
        done = self.current_index >= len(self.df)
        return self._get_observation(), reward, done, {}

# Create environment
env = TaxOptimizationEnv(df, deduction_policy, label_encoders)
env = DummyVecEnv([lambda: env])  # Stable-Baselines3 requires vectorized environments

# Initialize the DQN agent
model = DQN('MlpPolicy', env, learning_rate=1e-3, buffer_size=10000, batch_size=64, verbose=1)

# Train the model
model.learn(total_timesteps=10000)

# Evaluate the model
obs = env.reset()
total_reward = 0

for _ in range(1000):  # Simulate 1000 steps
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    total_reward += reward
    if done:
        obs = env.reset()

print(f"Total Tax Savings: {total_reward}")




Using cuda device
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 304      |
|    time_elapsed     | 13       |
|    total_timesteps  | 4000     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0044   |
|    n_updates        | 974      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 285      |
|    time_elapsed     | 28       |
|    total_timesteps  | 8000     |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.00518  |
|    n_updates        | 1974     |
----------------------------------
Total Tax Savings: [271.40906]


In [None]:
tax_savings = total_reward * 1000  # Adjust based on your dataset's scale

# Generate the tax report with actual savings
report_id = generate_tax_report(tax_savings)

print(f"Total Tax Savings: ${tax_savings:,.2f}")
print(f"Tax Report Generated: https://docs.google.com/document/d/{report_id}/edit")

This is for indiviual users the code above is just for testing purposes

In [None]:
import pandas as pd
import json
import numpy as np
import gym
from gym import spaces
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from tax_report_generator import generate_tax_report

# Load deduction policy
with open("../../data/deduction_policy.json", "r") as f:
    deduction_policy = json.load(f)

class TaxOptimizationEnv(gym.Env):
    def __init__(self, df, deduction_policy, label_encoders):
        super(TaxOptimizationEnv, self).__init__()
        self.df = df
        self.deduction_policy = deduction_policy
        self.label_encoders = label_encoders
        self.current_index = 0

        self.observation_space = spaces.Box(low=0, high=1, shape=(5,), dtype=np.float32)
        self.action_space = spaces.Discrete(2)

    def reset(self):
        self.current_index = 0
        return self._get_observation()

    def _get_observation(self):
        if self.current_index >= len(self.df):
            return np.zeros(5)
        
        row = self.df.iloc[self.current_index]
        return np.array([
            row["category"] / len(self.label_encoders["category"].classes_),
            row["merchant"] / len(self.label_encoders["merchant"].classes_),
            row["amount"],  
            row["payment_method"] / len(self.label_encoders["payment_method"].classes_),
            1.0 if row["category"] in [self.label_encoders["category"].transform([d["category"]])[0] for d in self.deduction_policy["deductions"]] else 0.0
        ], dtype=np.float32)

    def step(self, action):
        row = self.df.iloc[self.current_index]
        category = row["category"]
        amount = row["amount"]
        
        deduction_rule = next(
            (rule for rule in self.deduction_policy["deductions"] if self.label_encoders["category"].transform([rule["category"]])[0] == category),
            None
        )

        reward = 0
        if deduction_rule:
            rate = deduction_rule["rate"]
            max_limit = deduction_rule["max_limit"]
            reward = min(amount * rate, max_limit) if action == 1 else 0
        
        self.current_index += 1
        done = self.current_index >= len(self.df)
        return self._get_observation(), reward, done, {}

def calculate_user_tax_savings(user_id, user_name, user_transactions):
    """Calculate tax savings for an individual user and generate a report."""
    df = pd.DataFrame(user_transactions)  # Convert user transactions to DataFrame
    
    # Normalize data if necessary (ensure consistency with training)
    df["amount"] = (df["amount"] - df["amount"].min()) / (df["amount"].max() - df["amount"].min())

    label_encoders = {}
    for col in ["category", "merchant", "payment_method"]:
        label_encoders[col] = LabelEncoder()
        df[col] = label_encoders[col].fit_transform(df[col])

    env = TaxOptimizationEnv(df, deduction_policy, label_encoders)
    env = DummyVecEnv([lambda: env])

    model = DQN.load("tax_savings_model")  # Load pre-trained model
    obs = env.reset()
    total_reward = 0

    for _ in range(len(user_transactions)):  # Process all transactions
        action, _states = model.predict(obs)
        obs, reward, done, info = env.step(action)
        total_reward += reward
        if done:
            obs = env.reset()

    tax_savings = total_reward * 1000  # Adjust based on real-world scale
    report_id = generate_tax_report(user_name, tax_savings)

    return {
        "user_id": user_id,
        "user_name": user_name,
        "tax_savings": tax_savings,
        "report_link": f"https://docs.google.com/document/d/{report_id}/edit"
    }
    
    
    


In [None]:
user_id = "example_user_123"  # Replace with actual user ID
user_name = "John Doe"  # Replace with actual user name
user_transactions = get_user_transactions(user_id)

if user_transactions:
    tax_savings_result = calculate_user_tax_savings(user_id, user_name, user_transactions)
    print(tax_savings_result)
else:
    print("No transactions found for this user.")

In [None]:
# Load processed receipt data
df = pd.read_csv("receipts_data.csv")

# Continue with tax optimization
env = TaxOptimizationEnv(df, deduction_policy, label_encoders)
env = DummyVecEnv([lambda: env])  # Stable-Baselines3 requires vectorized environments

# Train and evaluate as usual
model = DQN('MlpPolicy', env, learning_rate=1e-3, buffer_size=10000, batch_size=64, verbose=1)
model.learn(total_timesteps=10000)

obs = env.reset()
total_reward = 0

for _ in range(1000):
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    total_reward += reward
    if done:
        obs = env.reset()

print(f"Total Tax Savings: {total_reward}")
