In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from sklearn.metrics import accuracy_score
from google.colab import drive

# Montarea unității Google Drive
drive.mount('/content/drive')

# Citirea datelor
data_path = "/content/drive/My Drive/"
data = pd.read_csv(data_path + "wdbc.data", header=None)

# Setarea numelor coloanelor
column_names = ['ID', 'Diagnosis']
for i in range(1, 31):
    column_names.append(f'Feature_{i}')
data.columns = column_names

# Preprocesarea datelor
# Convertim valoarea de diagnostic (M/B) în 0 și 1
data['Diagnosis'] = data['Diagnosis'].map({'M': 1, 'B': 0})

# Separarea caracteristicelor și etichetelor
X = data[['Feature_3', 'Feature_4']].values  # Raza și textura
y = data['Diagnosis'].values

# Împărțirea datelor în set de antrenare și set de testare
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardizarea datelor
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Definirea funcției sigmoidă pentru regresia logistică
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Definirea funcției de pierdere (cross-entropy)
def loss_function(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

class LogisticRegressionTF:
    def __init__(self, num_features, learning_rate=0.01, num_epochs=100):
        self.weights = tf.Variable(tf.zeros([num_features, 1], dtype=tf.float64))  # Convertim la float64
        self.bias = tf.Variable(0.0, dtype=tf.float64)  # Convertim la float64
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

    def fit(self, X, y):
        for epoch in range(self.num_epochs):
            with tf.GradientTape() as tape:
                z = tf.matmul(X, self.weights) + self.bias
                y_pred = tf.sigmoid(z)
                loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=z))

            gradients = tape.gradient(loss, [self.weights, self.bias])
            self.weights.assign_sub(self.learning_rate * gradients[0])
            self.bias.assign_sub(self.learning_rate * gradients[1])

    def predict(self, X):
        z = tf.matmul(X, self.weights) + self.bias
        y_pred = tf.sigmoid(z)
        return tf.round(y_pred)


# Antrenarea modelului
model = LogisticRegressionTF(num_features=X_train.shape[1])
model.fit(X_train, y_train)

# Evaluarea performanței modelului pe setul de antrenare
y_pred_train = model.predict(X_train).numpy().flatten()
train_accuracy = accuracy_score(y_train, y_pred_train)
print("Train Accuracy:", train_accuracy)

# Evaluarea performanței modelului pe setul de testare
y_pred_test = model.predict(X_test).numpy().flatten()
test_accuracy = accuracy_score(y_test, y_pred_test)
print("Test Accuracy:", test_accuracy)


Mounted at /content/drive


ValueError: `logits` and `labels` must have the same shape, received ((455, 1) vs (455,)).