In [None]:
!pip install torch lightning numpy kaggle wandb

In [None]:
from google.colab import files

# Carica il file kaggle.json
files.upload()


In [None]:
!mkdir ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [None]:
!kaggle datasets download -d ealaxi/paysim1
!unzip paysim1.zip

In [None]:
import pandas as pd, sys, plotly.graph_objects as go
from torch.utils.data import Dataset, DataLoader


In [None]:
# PARAMETERS

device = "cuda"

In [None]:
# UTILS FUNCTIONS

def load_dataframe( dataset_file : str):
    return pd.read_csv(dataset_file)


def find_null_or_empty_records( dataframe: pd.DataFrame):
    n = len(dataframe)
    for index, row in dataframe.iterrows():
        print_progress_bar(index/n)
        # Controlla se ci sono valori nulli o vuoti nel record
        if row.isnull().any() or any(map(lambda x: x == '', row)):
            # Stampa il record
            print(f"Record con valori nulli o vuoti:\n{row}\n")
            
def print_progress_bar(percentuale, lunghezza_barra=20):
    blocchi_compilati = int(lunghezza_barra * percentuale)
    barra = "[" + "=" * (blocchi_compilati - 1) + ">" + " " * (lunghezza_barra - blocchi_compilati) + "]"
    sys.stdout.write(f"\r{barra} {percentuale * 100:.2f}% completo")
    sys.stdout.flush()
    
    
def compute_kind_inconsistence(dataframe):
    return {"inconsistent orig balance": len(dataframe.query('abs(oldbalanceOrg - newbalanceOrig) != amount'))/len(dataframe),
            "inconsistent dest balance": len(dataframe.query('abs(oldbalanceDest - newbalanceDest) != amount'))/len(dataframe),
            "zero cash transaction": len(dataframe.query('amount == 0 '))/len(dataframe),
            "self-transaction": len(dataframe.query('nameOrig == nameDest'))/len(dataframe)
            }

def plot_histogram(to_plot):
    
    
    # Converti il dizionario in un array di valori
    values = list(to_plot.values())
    
    # Crea un istogramma
    fig = go.Figure(data=[go.Bar(x=to_plot.keys(), y=values)])
    
    # Mostra l'istogramma
    fig.show()



In [None]:
class FraudDetectionDataset(Dataset):
    
    def __init__(self,dataset_file : str):
        self.raw_data = load_dataframe(dataset_file)
        

    def analize_data(self):
        print("----HEAD----")
        print(self.raw_data.head())
        print("----INFO----")
        print(self.raw_data.info())
        print("----DESCRIBE----")
        print(self.raw_data.describe())
        find_null_or_empty_records(self.raw_data)
        
    def extract_inconsistent_transactions(self):
        condiction = "abs(oldbalanceOrg - newbalanceOrig) != amount | abs(oldbalanceDest - newbalanceDest) != amount | amount == 0 | nameOrig == nameDest"
            
        return self.raw_data.query(condiction)
    
        
    

In [None]:
dataset = FraudDetectionDataset("PS_20174392719_1491204439457_log.csv")
inconsistent_data = dataset.extract_inconsistent_transactions()