In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA

possible_n_vals = [9, 12, 15, 18, 24]
possible_n_vals = [15]
possible_e_vals = [1, 3, 5, 7, 15]


def run_model(n, e):

    X = np.load('Datasets/kryptonite-%s-X.npy'%(n))
    y = np.load('Datasets/kryptonite-%s-y.npy'%(n))

    # Shuffle and split the data
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.6, random_state=42)  # 60% training
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)  # 20% validation, 20% test


    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    x_test_scaled = scaler.transform(X_test)

    # print(np.mean(X_train_scaled, axis=0))
    # print(np.std(X_train_scaled, axis=0))

    feature_creator = PCA(n_components=min(e, X_train.shape[1]))
    X_train_features = feature_creator.fit_transform(X_train_scaled)
    X_val_features = feature_creator.transform(X_val_scaled)
    X_test_features = feature_creator.transform(x_test_scaled)
    
    
    # print("Created features")

    features = X_train_features.shape[-1]

    print(f'n:[{n}], e:[{e}]')
    print('Shape of train features:', X_train_features.shape)

    # Initialize and fit logistic regression
    classifier = RandomForestClassifier(n_estimators=100, max_depth=20, random_state=42, n_jobs=-1, max_features='log2')
    classifier.fit(X_train_features, y_train)
    print("Fit Model")

    # Evaluate on the validation set
    y_val_pred = classifier.predict(X_val_features)
    val_accuracy = accuracy_score(y_val, y_val_pred)
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Evaluate on the test set
    y_test_pred = classifier.predict(X_test_features)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    print(f"Test Accuracy: {test_accuracy:.4f}")
    return test_accuracy, features
        


from tqdm import tqdm

acc_by_n = []
feat_by_n = []
for n in tqdm(possible_n_vals):
    single_n = []
    single_feat = []
    for e in tqdm(possible_e_vals):
        acc, feat = run_model(n, e)
        single_n.append(acc)
        single_feat.append(feat)
    acc_by_n.append(single_n)
    feat_by_n.append(single_feat)

print(acc_by_n)

  0%|          | 0/1 [00:00<?, ?it/s]

n:[15], e:[1]
Shape of train features: (12000, 1)




Fit Model
Validation Accuracy: 0.5031
Test Accuracy: 0.5063
n:[15], e:[3]
Shape of train features: (12000, 3)




Fit Model
Validation Accuracy: 0.5031
Test Accuracy: 0.5072
n:[15], e:[5]
Shape of train features: (12000, 5)




Fit Model
Validation Accuracy: 0.5174
Test Accuracy: 0.5198
n:[15], e:[7]
Shape of train features: (12000, 7)




Fit Model
Validation Accuracy: 0.5219
Test Accuracy: 0.5322
n:[15], e:[15]
Shape of train features: (12000, 15)


100%|██████████| 5/5 [00:02<00:00,  2.05it/s]
100%|██████████| 1/1 [00:02<00:00,  2.44s/it]

Fit Model
Validation Accuracy: 0.5406
Test Accuracy: 0.5544
[[0.5063333333333333, 0.5072222222222222, 0.5197777777777778, 0.5322222222222223, 0.5544444444444444]]





In [9]:
from tqdm import tqdm

acc_by_n = []
feat_by_n = []
for n in tqdm(possible_n_vals):
    single_n = []
    single_feat = []
    for e in tqdm(possible_e_vals):
        acc, feat = run_model(n, e)
        single_n.append(acc)
        single_feat.append(feat)
    acc_by_n.append(single_n)
    feat_by_n.append(single_feat)

print(acc_by_n)

In [None]:
(sees(haya, X) ∧ dog(X) ∧ owns(lucinda, X)) ∧ (sees(haya, Y) ∧ dog(Y) ∧ owns(lucinda, Y)) ∧
(¬(X = Y)))
