In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import sys
import os

# === CONFIG ===
base_dir = r"C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500"
graph_embedding_dir = r"C:\Users\ns243\Documents\Academic\AI Master\Internship\GraphEmbedding"

# Add GraphEmbedding path for imports
sys.path.append(graph_embedding_dir)
from ge import Struc2Vec  # same as in the original code

# === PROCESSING LOOP ===
for root, dirs, files in os.walk(base_dir):
    if "label.csv" in files:
        directory = root  # equivalent to the original 'directory' variable
        print(f"\nProcessing directory: {directory}")

        # --- Reading the data ---
        df = pd.read_csv(f'{directory}/label.csv', index_col=0)
        df.index = pd.to_datetime(df.index)
        df.fillna(0, inplace=True)
        print('Data read successfully.')

        # --- Convert data to NumPy array and save as npz ---
        data = df.values
        np.savez(f'{directory}/flow.npz', result=data)
        print('Flow array saved as npz successfully.')

        # --- Convert returns to up/down trend classification ---
        trend_indicator = (data > 0).astype(int)
        np.savez(f'{directory}/trend_indicator.npz', result=trend_indicator)
        print('Trend indicator saved as npz successfully.')

        # --- Check for columns with zero variance and replace them ---
        epsilon = 1e-10
        std_devs = np.std(df, axis=0)
        zero_variance_mask = std_devs < epsilon
        df.loc[:, zero_variance_mask] = epsilon

        # --- Calculate and save correlation matrix ---
        corr_matrix = np.corrcoef(df, rowvar=False)
        np.save(f'{directory}/corr_adj.npy', corr_matrix)
        print('Correlation matrix saved successfully.')

        # --- Generate and save edge list ---
        edge_list = []
        for i in range(corr_matrix.shape[0]):
            for j in range(i + 1, corr_matrix.shape[1]):
                weight = corr_matrix[i, j]
                edge_list.append((i, j, weight))
        with open(f'{directory}/data.edgelist', 'w') as f:
            for edge in edge_list:
                f.write(f'{edge[0]} {edge[1]} {edge[2]}\n')
        print('Edge list saved successfully.')

        # --- Read the graph, train the model, and save embeddings ---
        G = nx.read_edgelist(f'{directory}/data.edgelist', create_using=nx.DiGraph(), 
                             nodetype=None, data=[('weight', float)])
        model = Struc2Vec(G, 10, 80, workers=4, verbose=40)
        model.train(embed_size=128)
        embeddings = model.get_embeddings()

        # --- Convert embeddings to numpy array and save ---
        embedding_array = np.array(list(embeddings.values()))
        np.save(f'{directory}/128_corr_struc2vec_adjgat.npy', embedding_array)
        print('Embedding array saved successfully.')

print("\n=== All S&P500 subfolders processed successfully! ===")



Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2015-10-05_2018-06-03
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   25.7s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.3min remaining:  1.3min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  2.5min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    0.7s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.9s remaining:    0.9s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.1s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2016-06-21_2019-02-18
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   21.2s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.3min remaining:  1.3min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.6min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.0s remaining:    1.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.4s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2017-03-08_2019-11-05
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   19.2s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.3min remaining:  1.3min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.4min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.3s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.3s remaining:    1.3s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    2.0s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2017-11-23_2020-07-22
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   20.9s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.3min remaining:  1.3min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.4min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.0s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.2s remaining:    1.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.7s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2018-08-10_2021-04-08
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   19.1s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.3min remaining:  1.3min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.4min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.2s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.4s remaining:    1.4s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.8s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2019-04-27_2021-12-24
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   20.8s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.4min remaining:  1.4min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.6min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.0s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.2s remaining:    1.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.8s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2020-01-12_2022-09-10
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   19.5s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.2min remaining:  1.2min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.3min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.0s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.2s remaining:    1.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.8s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2020-09-28_2023-05-28
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   19.9s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.2min remaining:  1.2min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.3min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.1s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.3s remaining:    1.3s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.8s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2021-06-15_2024-02-12
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   19.5s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.2min remaining:  1.2min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.3min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.1s remaining:    1.1s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.6s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2022-03-02_2024-10-29
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   19.6s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.2min remaining:  1.2min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  3.4min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    0.8s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.0s remaining:    1.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.4s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

Processing directory: C:\Users\ns243\Documents\Academic\AI Master\Internship\Data\splits_SP500\SP500_2022-11-17_2025-07-16
Data read successfully.
Flow array saved as npz successfully.
Trend indicator saved as npz successfully.
Correlation matrix saved successfully.
Edge list saved successfully.


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   23.3s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.1min remaining:  1.1min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  2.7min finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    0.9s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    1.1s remaining:    1.1s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    1.2s finished


Learning representation...
Learning representation done!
Embedding array saved successfully.

=== All S&P500 subfolders processed successfully! ===
