In [None]:
import pandas as pd

In [None]:
# Read data
df = pd.read_parquet('../../data/data_cleaned_target.parquet',engine='pyarrow')

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['pct_change_close','target']) # Feature without pct change & target
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
import pickle

# Enregistrer le modèle
with open('regression_model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [None]:
import joblib
joblib.dump(X_train, 'X_train.pkl')
joblib.dump(X_test, 'X_test.pkl')
joblib.dump(y_train, 'y_train.pkl')
joblib.dump(y_test, 'y_test.pkl')

In [None]:
def evaluate_model(model, X_test, y_test):
    from sklearn.metrics import roc_auc_score, precision_score, accuracy_score, recall_score, f1_score, classification_report
    # ROC AUC
    roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    
    # Predictions
    y_pred = model.predict(X_test)
    
    # Metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Print
    print(f"ROC AUC: {roc_auc}")
    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print("Report",classification_report(y_test, y_pred))

In [None]:
evaluate_model(model, X_test, y_test)

In [None]:
from sklearn.metrics import mean_squared_error
y_pred = model.predict(X_test)
# The coefficients
print("Coefficients: \n", model.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))

In [None]:
from sklearn.decomposition import PCA

# PCA 20 dimension
pca = PCA(n_components=20)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

print("'Variance' for each component:", pca.explained_variance_ratio_)