In [44]:
import numpy as np

def load_numpy_array_data(file_path: str) -> np.array:
    """
    load numpy array data from file
    file_path: str location of file to load
    return: np.array data loaded
    """
    try:
        with open(file_path, 'rb') as file_obj:
            return np.load(file_obj, allow_pickle=True)
    except Exception as e:
        print(e)

In [45]:
train_arr = load_numpy_array_data("train.npy")
test_arr = load_numpy_array_data("test.npy")

In [46]:
train_arr.shape

(1005, 10)

In [47]:
train_arr

array([[1.0, 1.0, 2.0, ..., 0.0, -0.5367548655561731, 1],
       [1.0, 2.0, 1.0, ..., 1.0, -0.07899566203929799, 1],
       [4.0, 1.0, 0.0, ..., 0.0, 0.4069639389902209, 0],
       ...,
       [1.0, 2.0, 1.0, ..., 1.0, 0.34484613861932867, 1],
       [4.0, 2.0, 1.0, ..., 1.0, 0.46945076266332375, 1],
       [0.0, 0.0, 0.0, ..., 0.0, -0.07899566203929799, 1]], dtype=object)

In [None]:
train_arr[0]

In [None]:
# Extract first column (index 0)
first_column = train_arr[:, 9]

# Get unique values and their counts
unique, counts = np.unique(first_column, return_counts=True)

# Print unique counts
for value, count in zip(unique, counts):
    print(f"Value: {value}, Count: {count}")

In [48]:
test_arr.shape

(252, 10)

In [49]:
test_arr

array([[1.0, 0.0, 1.0, ..., 0.0, -0.1967989106772582, 1],
       [3.0, 2.0, 2.0, ..., 0.0, 1.116923977873487, 0],
       [2.0, 2.0, 2.0, ..., 1.0, 1.2871211820713415, 0],
       ...,
       [1.0, 0.0, 0.0, ..., 0.0, -0.5389393369341406, 1],
       [0.0, 2.0, 0.0, ..., 0.0, 0.7245827566792103, 1],
       [4.0, 1.0, 2.0, ..., 0.0, 1.0257973070629611, 0]], dtype=object)

In [None]:
y_train

In [None]:
from neuro_mf import ModelFactory
import numpy as np
from tqdm import tqdm

# Assuming train_arr and test_arr have already been loaded correctly
# Split the data into features (X) and target (y)
x_train, y_train = train_arr[:, :-1], train_arr[:, -1]
x_test, y_test = test_arr[:, :-1], test_arr[:, -1]
# Ensure y_train and y_test are numeric
y_train = y_train.astype(int)
y_test = y_test.astype(int)
# Create a ModelFactory instance using your model configuration file
model_factory = ModelFactory(model_config_path="model.yaml")

# Wrap the search process with a progress bar
with tqdm(total=100) as pbar:  # You may need to adjust the total to the actual number of iterations
    best_model_detail = model_factory.get_best_model(
        X=x_train,
        y=y_train,
        base_accuracy=60
    )
    # Update the progress bar accordingly during the search process
    pbar.update(10)  # Call this as needed to show progress

In [1]:
import pandas as pd
TARGET_COLUMN = 'treatment'

test_df = pd.read_csv("test.csv")
x, y = test_df.drop(TARGET_COLUMN, axis=1), test_df[TARGET_COLUMN]
y = y.map({'Yes': 0, 'No': 1})

In [None]:
import pickle

# Load the model from the pickle file
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)
y_hat_best_model = model.predict(x)

In [59]:
import pickle

# Load the model from the pickle file
with open('Processing.pkl', 'rb') as file:
    preprocessor = pickle.load(file)

In [60]:
transformers = preprocessor.transformers
for name, transformer, columns in transformers:
    print(f"Transformer: {name}")
    print(f"Columns: {columns}")

Transformer: LabelEncoder
Columns: ['work_interfere', 'benefits', 'care_options', 'anonymity', 'leave', 'Gender', 'remote_work', 'family_history']
Transformer: Transformer
Columns: ['Age']


In [62]:
# Now access the fitted OrdinalEncoder
ordinal_encoder = preprocessor.named_transformers_['LabelEncoder']

# Get the categories learned by the OrdinalEncoder
ordinal_categories = ordinal_encoder.categories_

# Find the index of 'work_interfere' column in the encoder
work_interfere_index = 0

# Get the categories for 'work_interfere'
work_interfere_labels = ordinal_categories[work_interfere_index]
print(f"Categories for 'work_interfere': {work_interfere_labels}")

# You can then transform the test data


Categories for 'work_interfere': ["Don't know" 'Never' 'Often' 'Rarely' 'Sometimes']


In [65]:
from Mental_Health.pipeline.prediction_pipeline import MentalhealthData, MentalHealthClassifier



mentalhealth_data = MentalhealthData(
    age=29,
    gender='male',
    family_history="Yes",
    benefits="Don't know",
    care_options="Not sure",
    anonymity="Don't know",
    leave="Very easy",
    work_interfere="Don't know",
    remote_work = "No"
)

mh_data = mentalhealth_data.get_mentalhealth_input_data_frame()

model_predictor = MentalHealthClassifier()

value = model_predictor.predict(dataframe=mh_data)[0]

In [66]:
value

1