# Introduction
This Jupyter notebook is designed to load a dataset of financial transactions and create two visualizations to help analyze the data, particularly focusing on the distribution of transaction amounts and their relationship with fraud. Below is an explanation of each component in the notebook:

# 1.Import Libraries
os: This library is used for interacting with the operating system, specifically to check if the file exists and is in the correct format (CSV).
pandas: A powerful data manipulation and analysis library used to load and work with the dataset.
matplotlib.pyplot: A plotting library used to create visualizations from the data.

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

# 2. Data Loading (exercise_0)
Purpose: This function loads the transaction data from a CSV file into a Pandas DataFrame.
Functionality:
It checks whether the specified file exists and is a CSV file.
If the file exists and is valid, it reads the file into a DataFrame using pd.read_csv().
If the file is not found or is not a CSV, it raises an error.

In [None]:
def exercise_0(file_path):
    if os.path.exists(file_path) and file_path.endswith('.csv'):
        df = pd.read_csv(file_path)
        return df
    else:
        raise FileNotFoundError("The specified file does not exist or is not a CSV file.")

# 3. Visualization 1 (visual_1)
Purpose: This function creates two bar plots to visualize the distribution of transaction amounts and their breakdown by fraud status.
Functionality:
transaction_counts: Calculates the count of transactions for each unique value in the amount column.
transaction_counts_split_by_fraud: Groups transactions by amount and splits them based on whether they are fraudulent (using the isFraud column).
Subplots:
The first subplot shows a bar plot of overall transaction counts by amount.
The second subplot shows a bar plot of transaction counts by amount, further divided by fraud status (isFraud).
The plots include titles, labels, and legends, and each bar is annotated with its corresponding count for clarity.

In [None]:
def visual_1(df):
    ...
    plt.show()


# 4. Visualization 2 (visual_2)
Purpose: This function creates a scatter plot to explore the relationship between transaction amounts and the original balance from which the transaction originated (oldbalanceOrg).
Functionality:
Data Filtering: Filters the DataFrame to include only rows where the amount is greater than 0, ensuring meaningful data points for the scatter plot.
Scatter Plot:
The x-axis represents the amount of the transaction.
The y-axis represents the oldbalanceOrg (original balance before the transaction).
The plot includes a title and axis labels, with axis limits set to avoid negative values.

In [None]:
def visual_2(df):
    ...
    plt.show()


# 5. Main Execution
Purpose: This section executes the notebook's main functions.
Functionality:
Data Loading: Calls exercise_0() to load the dataset from transactions.csv into the DataFrame df.
Generate Visualizations: Calls visual_1(df) and visual_2(df) to create and display the visualizations.

In [None]:
if __name__ == "__main__":
    file_path = 'transactions.csv'
    df = exercise_0(file_path)
    visual_1(df)
    visual_2(df)

# Complete Code & Summary
This notebook provides a structured approach to load and analyze a dataset of financial transactions. The first visualization helps to understand the distribution of transaction amounts and the occurrence of fraud within different transaction amounts. The second visualization examines the relationship between transaction amounts and the original balance, which may provide insights into transaction patterns and potential fraud detection.

To use the notebook, simply place the transaction data in a file named transactions.csv (or modify the file_path), and run the cells to load the data and generate the visualizations.

In [None]:
# Import necessary libraries
import os
import pandas as pd
import matplotlib.pyplot as plt

# Function to load data
def exercise_0(file_path):
    """
    Function to load data from a CSV file into a Pandas DataFrame.
    
    Parameters:
    file_path (str): The path to the CSV file.
    
    Returns:
    df (DataFrame): The loaded DataFrame.
    """
    if os.path.exists(file_path) and file_path.endswith('.csv'):
        df = pd.read_csv(file_path)
        return df
    else:
        raise FileNotFoundError("The specified file does not exist or is not a CSV file.")

# Visualization 1
def visual_1(df):
    """
    Creates visualizations showing transaction counts overall and split by fraud status.
    
    Parameters:
    df (DataFrame): The DataFrame containing the transaction data.
    """
    # Calculate transaction counts by 'amount'
    transaction_counts = df['amount'].value_counts().sort_index()
    
    # Calculate transaction counts by 'amount', split by fraud status
    transaction_counts_split_by_fraud = df.groupby(['amount', 'isFraud']).size().unstack(fill_value=0)
    
    # Create subplots
    fig, axes = plt.subplots(2, 1, figsize=(10, 10))
    
    # Plot overall transaction counts by 'amount'
    axes[0].bar(transaction_counts.index, transaction_counts.values)
    axes[0].set_title('Transaction Counts by Amount')
    axes[0].set_xlabel('Amount')
    axes[0].set_ylabel('Number of Transactions')
    
    # Annotate bars
    for i, v in enumerate(transaction_counts.values):
        axes[0].text(transaction_counts.index[i], v + 0.5, str(v), ha='center')
    
    # Plot transaction counts by 'amount' split by fraud status
    transaction_counts_split_by_fraud.plot(kind='bar', stacked=True, ax=axes[1])
    axes[1].set_title('Transaction Counts by Amount and Fraud Status')
    axes[1].set_xlabel('Amount')
    axes[1].set_ylabel('Number of Transactions')
    axes[1].legend(title='Fraud Status')
    
    # Annotate bars
    for i, amount in enumerate(transaction_counts_split_by_fraud.index):
        total = transaction_counts_split_by_fraud.loc[amount].sum()
        axes[1].text(i, total + 0.5, str(total), ha='center')
    
    plt.tight_layout()
    plt.show()

# Visualization 2
def visual_2(df):
    """
    Creates a scatter plot of 'amount' vs. 'oldbalanceOrg'.
    
    Parameters:
    df (DataFrame): The DataFrame containing the transaction data.
    """
    # Filter for rows where 'amount' > 0
    query = df[df['amount'] > 0]
    
    # Create scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(query['amount'], query['oldbalanceOrg'], alpha=0.5)
    plt.title('Scatter Plot of Amount vs. Old Balance Origin')
    plt.xlabel('Amount')
    plt.ylabel('Old Balance Origin')
    plt.xlim(left=0)
    plt.ylim(bottom=0)
    plt.show()

# Main execution
if __name__ == "__main__":
    # Load the data
    file_path = 'transactions.csv'
    df = exercise_0(file_path)
    
    # Generate visualizations
    visual_1(df)
    visual_2(df)
