In [1]:
import numpy as np

# Fungsi sigmoid
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Fungsi untuk menghitung nilai cost dengan gradient descent
def compute_cost(X, y, theta):
    m = len(y)  # Jumlah data pelatihan
    z = np.dot(X, theta)  # Menghitung nilai z = X * theta
    h = sigmoid(z)  # Menghitung probabilitas prediksi h = sigmoid(z)

    # Fungsi cost
    cost = - (1 / m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    return cost

# Fungsi untuk melakukan gradient descent
def gradient_descent(X, y, theta, learning_rate, iterations):
    m = len(y)  # Jumlah data pelatihan
    cost_history = []  # Menyimpan sejarah biaya (cost)

    for _ in range(iterations):
        z = np.dot(X, theta)  # Menghitung nilai z
        h = sigmoid(z)  # Menghitung probabilitas prediksi h = sigmoid(z)
        
        # Menghitung gradien
        gradient = (1 / m) * np.dot(X.T, (h - y))
        
        # Memperbarui theta
        theta -= learning_rate * gradient
        
        # Menyimpan cost setelah setiap iterasi
        cost_history.append(compute_cost(X, y, theta))
    
    return theta, cost_history

# Inisialisasi data X (fitur) dan y (label sebenarnya)
# Contoh data
X = np.array([[1, 2], [1, 3], [1, 4], [1, 5]])  # X adalah matriks fitur (termasuk bias 1 pada kolom pertama)
y = np.array([0, 0, 1, 1])  # y adalah label sebenarnya

# Inisialisasi parameter theta (parameter model)
theta = np.zeros(X.shape[1])

# Parameter untuk gradient descent
learning_rate = 0.1
iterations = 1000

# Melakukan gradient descent untuk mendapatkan theta optimal
theta_optimal, cost_history = gradient_descent(X, y, theta, learning_rate, iterations)

# Menampilkan hasil
print("Theta optimal:", theta_optimal)
print("History biaya:", cost_history[-10:])  # Menampilkan biaya terakhir untuk melihat konvergensi


Theta optimal: [-5.97303274  1.7764563 ]
History biaya: [np.float64(0.21097826037977715), np.float64(0.2108616648304053), np.float64(0.21074523694405634), np.float64(0.21062897632622063), np.float64(0.21051288258364279), np.float64(0.21039695532431688), np.float64(0.21028119415748178), np.float64(0.21016559869361556), np.float64(0.2100501685444313), np.float64(0.20993490332287176)]


In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class LogisticRegression:
    def __init__(self, learning_rate=0.1, iterations=1000):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.theta = None
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def compute_cost(self, X, y):
        m = len(y)
        z = np.dot(X, self.theta)
        h = self.sigmoid(z)
        cost = - (1 / m) * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
        return cost
    
    def gradient_descent(self, X, y):
        m = len(y)
        cost_history = []
        
        for _ in range(self.iterations):
            z = np.dot(X, self.theta)
            h = self.sigmoid(z)
            gradient = (1 / m) * np.dot(X.T, (h - y))
            self.theta -= self.learning_rate * gradient
            cost_history.append(self.compute_cost(X, y))
        
        return cost_history
    
    def fit(self, X, y):
        X = np.c_[np.ones((X.shape[0], 1)), X]  # Menambahkan kolom 1 untuk bias
        self.theta = np.zeros(X.shape[1])  # Inisialisasi parameter theta
        cost_history = self.gradient_descent(X, y)
        return cost_history
    
    def predict(self, X):
        X = np.c_[np.ones((X.shape[0], 1)), X]  # Menambahkan kolom 1 untuk bias
        z = np.dot(X, self.theta)
        h = self.sigmoid(z)
        return (h >= 0.5).astype(int)

# Contoh penggunaan dengan data teks
if __name__ == "__main__":
    # Contoh data teks dan label sentimen (0 = negatif, 1 = positif)
    texts = [
        "rekan kerja tidak baik", "gaji terlalu sedikit untuk kebutuhan hidup di kota ini" 
    ]
    labels = np.array([1, 0, 1, 0, 1, 0, 1])

    # Mengonversi teks menjadi vektor menggunakan TF-IDF
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit_transform(texts).toarray()  # Mengubah teks menjadi matriks fitur

    # Membagi data menjadi set pelatihan dan pengujian
    X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, random_state=42)

    # Membuat objek model regresi logistik
    model = LogisticRegression(learning_rate=0.1, iterations=1000)
    
    # Melatih model
    model.fit(X_train, y_train)
    
    # Melakukan prediksi pada data pengujian
    predictions = model.predict(X_test)
    
    # Menghitung akurasi
    accuracy = accuracy_score(y_test, predictions)
    print(f"Akurasi model: {accuracy * 100:.2f}%")


Akurasi model: 33.33%


In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Data input (teks latih)
texts = [
    "Alasan klasik pindah kerja karena gaji kecil",
    "Kerja pindah alas pindah kerja cuman gaji"
]

# Membuat objek TfidfVectorizer
vectorizer = TfidfVectorizer()

# Melakukan vektorisasi TF-IDF pada teks
tfidf_matrix = vectorizer.fit_transform(texts)

# Menampilkan hasil vektorisasi
print("Fitur (Kata-kata):")
print(vectorizer.get_feature_names_out())

print("\nVektor TF-IDF:")
print(tfidf_matrix.toarray())


Fitur (Kata-kata):
['alas' 'alasan' 'cuman' 'gaji' 'karena' 'kecil' 'kerja' 'klasik' 'pindah']

Vektor TF-IDF:
[[0.         0.42567716 0.         0.30287281 0.42567716 0.42567716
  0.30287281 0.42567716 0.30287281]
 [0.39054766 0.         0.39054766 0.27787788 0.         0.
  0.55575576 0.         0.55575576]]
