# Model

## Import and read Data

In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import os
from train_tabular import fit


os.environ["WANDB_SILENT"] = "true"

X_train = pd.read_csv(r'../../data/train_data.csv')
Y_train = X_train["Category"]

X_test = pd.read_csv(r'../../data/test_data.csv')
Y_test = X_test["Category"]
X_train= X_train.iloc[:,[i for i in range(6,15)]+ [-1]]
X_test= X_test.iloc[:,[i for i in range(6,15)]]

## Preprocessing

In [2]:
# define preprocessing function
X_train.loc[X_train['Patient_gender'] =='male','Patient_gender'] =1
X_train.loc[X_train['Patient_gender'] =='female','Patient_gender'] =0
X_test.loc[X_test['Patient_gender'] =='male','Patient_gender'] =1
X_test.loc[X_test['Patient_gender'] =='female','Patient_gender'] =0

# Assuming `data` is your dataset and `columns_to_scale` is a list of column names to scale
columns_to_scale = ['COR', 'FSH', 'FT4', 'IGF1', 'LH', 'PROL', 'TEST',]

# Create a ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        # ('cat', OneHotEncoder(drop='first'), ['Patient_gender']),
        ('scaler', StandardScaler(), columns_to_scale),
    ],
    remainder='passthrough'  # This will include the non-specified columns as-is
)

# Create a Pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    # You can add more steps to the pipeline if needed
])
transformed_data = pipeline.fit_transform(X_train)
# Get the column names after preprocessing
preprocessed_columns = pipeline.named_steps['preprocessor'].get_feature_names_out()
# Extract the second part of the column names
preprocessed_columns = [col.split('__')[1] if '__' in col else col for col in preprocessed_columns]
# Fit and transform your data
X_transformed = pd.DataFrame(transformed_data,columns=preprocessed_columns)


## Setup Model

In [3]:
model = LogisticRegression(max_iter=500)

In [4]:
wandbadd={'max_iter':500}

## Fit Model and Evaluate Model

In [5]:
models = []
for fold in range(0,5):
    fitted_model_fold = fit(model,X_transformed,Y_train,X_test,Y_test,fold,"Tab-Data-LogReg-0.9Train","LogReg",wandb_additional_config=wandbadd)
    models.append(fitted_model_fold)

In [6]:
fitted_model_fold = fit(model,X_transformed,Y_train,X_test,Y_test,'all',"Tab-Data-LogReg-All-0.9Train","LogReg",wandb_additional_config=wandbadd)