In [None]:
import mysql.connector
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from transformers import AutoModel, AutoTokenizer

# Establishing the connection
mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    password="",
    database="dfa"
)

cursor = mydb.cursor()
# Creating a cursor object to execute queries

# Example: Fetching data from a table
cursor.execute("SELECT * FROM userinput")

# Fetching all rows from the result set
rows = cursor.fetchall()
row1=rows[0]

# Closing the connection
mydb.close()


# Define the autoencoder class
class Autoencoder(nn.Module):
    def __init__(self, input_size, encoding_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, encoding_size),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(encoding_size, 64),
            nn.ReLU(),
            nn.Linear(64, input_size),
            nn.Sigmoid()  # Sigmoid for reconstruction
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

def get_user_input():
    monthly_income = float(row1[0])
    savings_till_now = float(row1[1])
    loans = float(row1[2])
    emi_on_loans = float(row1[3])
    monthly_expenses = float(row1[4])
    health_care_costs = float(row1[5])
    num_dependents = int(row1[6])
    insurance_per_month = float(row1[7])
    period_of_investment = int(row1[8])
    risk = int(row1[9])

    return [monthly_income, savings_till_now, loans, emi_on_loans, monthly_expenses, health_care_costs, num_dependents, insurance_per_month, period_of_investment, risk]

def get_digit_count(number):
    return len(str(number).split('.')[0])

# Normalize user input
user_input = get_user_input()
digit_counts = [get_digit_count(number) for number in user_input]
normalized_user_input = [number / (10 ** digit_count) for number, digit_count in zip(user_input, digit_counts)]

# Convert normalized user input to tensor
input_parameters_tensor = torch.tensor(normalized_user_input, dtype=torch.float32)

# Define parameters
input_size = len(normalized_user_input)  # Number of input parameters
encoding_size = 6  # Size to which we want to reduce the parameters

# Initialize the autoencoder
autoencoder = Autoencoder(input_size, encoding_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.001)

# Training loop
epochs = 1000
for epoch in range(epochs):
    # Forward pass
    reconstructed_parameters = autoencoder(input_parameters_tensor)

    # Compute the loss
    loss = criterion(reconstructed_parameters, input_parameters_tensor)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        pass
        #print(f"Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}")
encoded_parameters = autoencoder.encoder(input_parameters_tensor)
decoded_parameters = autoencoder.decoder(encoded_parameters)

# Replace 'bert-base-uncased' with your chosen model name (e.g., 'gpt2', 'roberta-base')
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Assuming encoded_parameters_list is a list of lists
encoded_parameters_list = encoded_parameters.tolist()  # Convert tensor to list

# Convert the inner lists to strings, handling floats appropriately
encoded_parameters_strings = []
for parameter in encoded_parameters_list:
    if isinstance(parameter, list):
        encoded_parameters_strings.append(' '.join(map(str, parameter)))
    else:
        encoded_parameters_strings.append(str(parameter))

# Tokenize each parameter individually
tokenized_parameters = tokenizer(encoded_parameters_strings, padding='max_length', return_tensors='pt', truncation=True)

# Example: Classification head for investment strategy recommendation
class InvestmentClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(InvestmentClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        output = self.fc(x)
        return output

# Example usage (replace with your actual logic)
classifier = InvestmentClassifier(model.config.hidden_size, 3)  # 3 classes (e.g., conservative, moderate, aggressive)
# Pass input_ids to the pre-trained model
outputs = model(**tokenized_parameters)
last_hidden_state = outputs.last_hidden_state[:, 0, :]  # Take the first token's representation

# Use the last hidden stat for further processing
if classifier is not None:
    logits = classifier(last_hidden_state)  # Get class logits for classification
    #print(logits)  # Output the logits
    print(logits.tolist()[0][0])
    #Logits represent the raw output of the classifier before applying any activation function (e.g., softmax). 
    #They indicate the model's confidence scores for each class. 
    #You can interpret these scores to make decisions, such as predicting the class with the highest score.
else:
    # Handle output based on your specific task (e.g., generate text)
    ...

Epoch [0/1000], Loss: 0.0713
Epoch [100/1000], Loss: 0.0000
Epoch [200/1000], Loss: 0.0000
Epoch [300/1000], Loss: 0.0000
Epoch [400/1000], Loss: 0.0000
Epoch [500/1000], Loss: 0.0000
Epoch [600/1000], Loss: 0.0000
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
tensor([[ 0.1929, -0.2102, -0.5405],
        [ 0.1929, -0.2102, -0.5405],
        [ 0.1929, -0.2102, -0.5405],
        [ 0.1929, -0.2102, -0.5405],
        [ 0.1929, -0.2102, -0.5405],
        [ 0.0611, -0.2507, -0.4959]], grad_fn=<AddmmBackward0>)
