In [3]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load the dataset
data = pd.read_csv('D:/Intelligent System/WineQT.csv')

# Define the features and target
features = ['alcohol', 'sulphates', 'citric acid', 'fixed acidity']
target = 'quality'

# Split the data into features (X) and target (y)
X = data[features]
y = data[target]

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train).astype('float32')  # Cast to float32
X_test_scaled = scaler.transform(X_test).astype('float32')  # Cast to float32

# Convert the data to TensorFlow datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_scaled, y_train.astype('float32'))).batch(32)  # Ensure y is also float32
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_scaled, y_test.astype('float32'))).batch(32)

# Create a simple linear model for boosting
class BoostingModel(tf.Module):
    def __init__(self):
        super().__init__()
        self.W = tf.Variable(tf.random.normal([len(features), 1], dtype=tf.float32))
        self.b = tf.Variable(tf.zeros([1], dtype=tf.float32))
    
    def __call__(self, x):
        return tf.matmul(x, self.W) + self.b

# Loss function (mean squared error)
def loss_fn(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

# Gradient descent optimizer
optimizer = tf.optimizers.Adam(learning_rate=0.01)

# Training loop for gradient boosting
def train_boosting_model(num_boosting_rounds=100):
    models = []
    residuals = y_train.astype('float32')  # Ensure residuals are float32

    for round in range(num_boosting_rounds):
        # Create a new model for each boosting round
        model = BoostingModel()

        # Training loop for the current boosting model
        for epoch in range(10):  # Adjust epochs based on your data size
            for x_batch, y_batch in train_dataset:
                with tf.GradientTape() as tape:
                    predictions = model(x_batch)
                    loss = loss_fn(y_batch, predictions)
                
                gradients = tape.gradient(loss, [model.W, model.b])
                optimizer.apply_gradients(zip(gradients, [model.W, model.b]))
        
        # Add the trained model to the boosting list
        models.append(model)

        # Update residuals
        residuals -= model(X_train_scaled).numpy().flatten()

    return models

# Function to make predictions using all boosting models
def predict_boosting_model(models, X):
    final_predictions = tf.zeros_like(tf.matmul(X, models[0].W) + models[0].b)

    for model in models:
        final_predictions += model(X)

    return final_predictions

# Train the boosting model
boosting_models = train_boosting_model()

# Make predictions on the test set
y_pred_test = predict_boosting_model(boosting_models, X_test_scaled).numpy().flatten()

# Evaluate the model
mse = mean_squared_error(y_test, y_pred_test)
mae = mean_absolute_error(y_test, y_pred_test)

print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')


Mean Squared Error: 279048.3974744134
Mean Absolute Error: 528.2455761505527
