In [None]:
import os

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
analyze_path = os.path.join(parent_dir, "utils")

os.chdir(analyze_path)

In [None]:
import ast
import pandas as pd

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from tdam.cover import CubicalCover
from tdam.clustering import FailSafeClustering
from tdam.core_old import MapperAlgorithm

from TrafficTDApythonUtils.utils_v3 import *
from TrafficTDApythonUtils.plots import *

In [None]:
from utils import read_data
combined_data = read_data()

In [None]:
all_features_df = pd.read_csv("../ComputedData/ForModel/all_features.csv")

## Start Mapper

In [None]:
lens = PCA(10).fit_transform(all_features_df.to_numpy())

pca = PCA(10).fit(all_features_df.to_numpy())
ratios = pca.explained_variance_ratio_
print(ratios)
print(ratios.sum()) 

In [None]:
import pickle

In [None]:
overlaps = [1,5]
intervals = [6]
detailed_results = []
silhouette_for_intervals = []

for overlap in overlaps:
    for interval in intervals:
        print(f"Processing overlap {overlap}, interval {interval}")
        mapper_algo = MapperAlgorithm(
            cover=CubicalCover(
                n_intervals=interval,
                overlap_frac=overlap / 10
            ),
            clustering=FailSafeClustering(
                KMeans(
                    n_clusters=2,
                    random_state=42
                )
            ),
            n_jobs=10
        )

        mapper_info = mapper_algo.fit_transform(all_features_df.to_numpy(), lens)

        silhouette_for_intervals.append(mapper_info[1])
        result = {
            "overlap": overlap,
            "interval": interval,
            "silhouette": mapper_info[1],
            "mapper_info": mapper_info
        }
        detailed_results.append(result)

        with open(f"../ComputedData/ForMatrix/o{overlap}i{interval}.pkl", 'wb') as file:
            pickle.dump(result, file)

detailed_results_df = pd.DataFrame(detailed_results)

In [None]:
detailed_results_df = pickle.load(open("../ComputedData/ForMatrix/o5i2.pkl", "rb"))

In [None]:
choose = 'youbike_100m_count_mean'

mapper_plotter = MapperPlotter(detailed_results_df['mapper_info'], 
                               all_features_df, seed=87, iterations=30, dim=2,
                                range_lst=[-0.5, 0.5, 0.5, -0.5])

def avg_label(data):
    return sum(data) / len(data) if len(data) > 0 else 0

mapper_plot = mapper_plotter.create_mapper_plot(choose, avg_label, avg=True)
full_info = mapper_plotter.extract_data()
mapper_plotter.map_colors(choose, size=0, threshold=0)
mapper_plotter.plot(choose, avg=True, set_label=True, size=1000, anchor=1.33)