In [None]:
import pandas as pd
from dm_lib import load_dataframe_from_disk
from dm_lib import column_str, attributes, create_dataframe_from_columns, undersample_data, oversample_with_smote
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

def apply_tsne(df, title, fraud_column, filename=None, perplexity=30):
    features = df.drop(fraud_column, axis=1)
    labels = df[fraud_column]

    tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)
    tsne_results = tsne.fit_transform(features)

    plt.figure(figsize=(10, 8))
    for label, color in zip([0, 1], ['tab:blue', 'tab:orange']):
        mask = labels == label
        plt.scatter(tsne_results[mask, 0], tsne_results[mask, 1], c=color, alpha=0.5, label=('Īsta transackija' if label == 0 else 'Krāpnieciska transakcija'))

    plt.title(title, fontsize=20)
    plt.legend()
    plt.xlabel('t-SNE komponente 1')
    plt.ylabel('t-SNE komponente 2')

    if filename:
        plt.savefig(filename)
        plt.close()
    else:
        plt.show()

for setName in ('B'):#'A','B'
    df = load_dataframe_from_disk('dataSet_'+setName+'.pkl')
    for entry in attributes:
        if entry['name'] not in ('firsttime'):
            continue
        for type in ('o','u',''):
            column_list = entry['value'][:]
            column_list.insert(0, 'fraud')
            source_df = create_dataframe_from_columns(df, column_list)
            if type == 'o':
                tsne_df = oversample_with_smote(source_df, 'fraud')
            elif type == 'u':
                tsne_df = undersample_data(source_df, 'fraud', desired_ratio=1)
            else:
                tsne_df = source_df
            #for p in [1,10,20,30,40,50,60,70,80,90,100]:
            for p in (5,30,50,100):
                f = 'tSNE/tSNE_'+entry['name']+'_p'+str(p)+'_set_'+setName+type+'.png'
                title = 'Datu kopa: '+setName+type+', atribūtu kopa:'+entry['name']+', perplexity:'+str(p)
                apply_tsne(tsne_df,title,'fraud',filename=f,perplexity = p)
