In [1]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, ParameterGrid


In [2]:
# Load dataset (gantikan dengan dataset sebenar anda)
dataset_path = "TRAIN FAKE N NOT FAKE list.xlsx"  # Gantikan dengan nama fail sebenar anda
df = pd.read_excel(dataset_path)

# Preprocessing
target_column = "label_preprocess2"  # Gantikan dengan nama kolum target sebenar
X = df[['cluster','Built_Up_SF','Bathroom','Furnishing','Bedroom','Tenure','Car_Park','Negeri','Property_Type','Latitude','Longitude','Occupancy','Unit_Type']]  # Features
y = df[target_column]  # Sasaran (target)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
# Define the grid for class weights
param_grid = {
    "class_weight_0": np.linspace(0.1, 1.0, 10),  # Pemberat untuk kelas 'fake'
    "class_weight_1": np.linspace(0.1, 1.0, 10),  # Pemberat untuk kelas 'not fake'
}

# Convert the grid to a list of parameter combinations
grid = list(ParameterGrid(param_grid))


In [4]:
# Initialize variables to store the best results
best_accuracy = 0
best_params = None
results = []


In [5]:
# Perform Grid Search
for params in grid:
    class_weights = {0: params["class_weight_0"], 1: params["class_weight_1"]}

    # Train logistic regression model
    model = LogisticRegression(class_weight=class_weights, max_iter=1000, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Save results
    results.append({"class_weight_0": params["class_weight_0"], "class_weight_1": params["class_weight_1"], "accuracy": accuracy})

    # Update best parameters if necessary
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = class_weights


In [6]:
# Convert results to a DataFrame for analysis
results_df = pd.DataFrame(results)

# Display the best results
print(f"Best Accuracy: {best_accuracy}")
print(f"Best Class Weights: {best_params}")


Best Accuracy: 0.8870431893687708
Best Class Weights: {0: 0.30000000000000004, 1: 0.5}


In [7]:
# Save results to Excel
results_df.to_excel("Grid_Search_Results.xlsx", index=False)
print("Results saved to 'Grid_Search_Results.xlsx'")


Results saved to 'Grid_Search_Results.xlsx'
