In [None]:
import pandas as pd
import numpy as np
import kmapper as km
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from umap import UMAP
from sklearn.cluster import DBSCAN
import json
from dm_lib import load_dataframe_from_disk
from dm_lib import column_str, attributes, create_dataframe_from_columns, undersample_data, oversample_with_smote

def create_mapper(df1, df2,method,param_value,filename,label):
    try:
        df3 = pd.merge(df1, df2, on='transaction_id', how='left')
    
        scaler = StandardScaler()
        numeric_columns = df1.drop(['transaction_id', 'fraud'], axis=1).columns
        df1_scaled = scaler.fit_transform(df1[numeric_columns])
        mapper = km.KeplerMapper(verbose=1)
        if method == 'tsne':
            lens = TSNE(n_components=2, perplexity=param_value, random_state=42).fit_transform(df1_scaled)
        elif method == 'umap':
            lens = UMAP(n_components=2, n_neighbors=param_value, random_state=42).fit_transform(df1_scaled)

        n_cubes_values = [5, 10, 15]
        perc_overlap_values = [0.3, 0.5, 0.7]
        eps_values = [0.3, 0.5, 0.7]
        min_samples_values = [3, 5, 7]

        best_params = None
        best_num_nodes = 0
        best_graph = None

        for n_cubes in n_cubes_values:
            for perc_overlap in perc_overlap_values:
                for eps in eps_values:
                    for min_samples in min_samples_values:
                        print(f"Trying parameters: n_cubes={n_cubes}, perc_overlap={perc_overlap}, eps={eps}, min_samples={min_samples}")
                        
                        cover = km.Cover(n_cubes=n_cubes, perc_overlap=perc_overlap)
                        clusterer = DBSCAN(eps=eps, min_samples=min_samples)
                        graph = mapper.map(lens, df1_scaled, cover=cover, clusterer=clusterer)

                        num_nodes = len(graph['nodes'])
                        num_edges = len(graph['links'])

                        if num_nodes > best_num_nodes:
                            best_num_nodes = num_nodes
                            best_params = (n_cubes, perc_overlap, eps, min_samples)
                            best_graph = graph

        if best_graph is None:
            raise Exception("Nav atrasti piemēroti parametri")
    
        tooltip_columns = df2.columns.tolist()
        tooltips = df[tooltip_columns].apply(lambda row: json.dumps(row.to_dict(), default=str), axis=1).values

        html = mapper.visualize(graph, path_html=filename,
                                custom_tooltips=tooltips, title=label)
    
        with open(filename, "w") as f:
            f.write(html)
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

for setName in ('B'):#'A','B'
    df = load_dataframe_from_disk('dataSet_'+setName+'.pkl')
    for entry in attributes:
        if entry['type'] not in ('cart','relieff'):
            continue
        column_list = entry['value'][:]
        column_list.insert(0, 'transaction_id')
        column_list.insert(0, 'fraud')
        mapper_df = create_dataframe_from_columns(df, column_list)
        tooltip_df = create_dataframe_from_columns(df, column_list)
        #for p in [50]:
        #    f = 'mapper/mapper_'+entry['name']+'_tSNE_p'+str(p)+'_set_'+setName+'.html'
        #    title = 'Datu kopa: '+setName+', atribūtu kopa:'+entry['name']+', t-SNE perplexity:'+str(p)
        #    create_mapper(mapper_df,tooltip_df,'tsne',p,f,title)
        for p in [50]:
            f = 'mapper/mapper_'+entry['name']+'_UMAP_n'+str(p)+'_set_'+setName+'.html'
            title = 'Datu kopa: '+setName+', atribūtu kopa:'+entry['name']+', UMAP n_neighbors:'+str(p)
            create_mapper(mapper_df,tooltip_df,'umap',p,f,title)


