In [3]:
import pandas as pd
import numpy as np

# Constants
num_transactions = 100

# Generate dummy data
np.random.seed(42)  # For reproducibility
data = {
    'cmx_id': np.arange(num_transactions),
    'Latitude': np.random.uniform(24.396308, 49.384358, num_transactions),  # Rough bounds of USA
    'Longitude': np.random.uniform(-125.0, -66.93457, num_transactions),
    'transaction_amount': np.random.uniform(1, 1000, num_transactions),
    'Merchant_name': np.random.choice(['Merchant_A', 'Merchant_B', 'Merchant_C'], num_transactions),
    'fraud_score': np.random.uniform(0, 1000, num_transactions)
}

df = pd.DataFrame(data)

# Add a fraud label based on fraud_score
df['fraud_label'] = df['fraud_score'] > 700  # Fraudulent if fraud_score > 700

# Save to CSV
df.to_csv('dummy_transactions.csv', index=False)


In [4]:
import folium
from folium.plugins import HeatMap
import pandas as pd
from IPython.display import display, clear_output
import time

# Load the data
df = pd.read_csv('dummy_transactions.csv')

# Create base map
m = folium.Map(location=[37.0902, -95.7129], zoom_start=5)

def add_markers(df, map_obj, delay=0.1):
    for i, row in df.iterrows():
        # Clear the output to update the map
        clear_output(wait=True)
        
        # Set color based on fraud score
        color = 'red' if row['fraud_label'] else 'blue'
        
        folium.CircleMarker(
            location=[row['Latitude'], row['Longitude']],
            radius=5,
            color=color,
            fill=True,
            fill_color=color,
            fill_opacity=0.6,
            tooltip=f"Amount: ${row['transaction_amount']}, Fraud Score: {row['fraud_score']}"
        ).add_to(map_obj)
        
        display(map_obj)
        time.sleep(delay)

# Add markers to map
add_markers(df, m)


In [5]:
import geopandas as gpd
import matplotlib.pyplot as plt

# Load the shapefile of US states
us_states = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')).query('continent == "North America"')

# Function to determine the state of a transaction
def get_state(lat, lon, states):
    point = gpd.GeoSeries([gpd.points_from_xy([lon], [lat])[0]])
    for state in states.itertuples():
        if point.within(state.geometry).any():
            return state.name
    return 'Unknown'

# Add state to the dataframe
df['state'] = df.apply(lambda row: get_state(row['Latitude'], row['Longitude'], us_states), axis=1)

# Summary statistics
fraud_stats = df[df['fraud_label']].groupby('state').agg(
    total_fraud_amount=('transaction_amount', 'sum'),
    num_fraud_transactions=('cmx_id', 'count')
).reset_index().sort_values(by='total_fraud_amount', ascending=False)

merchant_stats = df[df['fraud_label']].groupby('Merchant_name').agg(
    total_fraud_amount=('transaction_amount', 'sum'),
    num_fraud_transactions=('cmx_id', 'count')
).reset_index().sort_values(by='total_fraud_amount', ascending=False)

print("Fraud Stats by State:")
print(fraud_stats)

print("\nFraud Stats by Merchant:")
print(merchant_stats)

# Save summary statistics to CSV
fraud_stats.to_csv('fraud_stats_by_state.csv', index=False)
merchant_stats.to_csv('fraud_stats_by_merchant.csv', index=False)


AttributeError: The geopandas.dataset has been deprecated and was removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.

In [6]:
import geopandas as gpd

# Path to your downloaded shapefile
shapefile_path = 'ne_10m_admin_1_states_provinces.shx'

# Load the shapefile
north_america = gpd.read_file(shapefile_path)

# Filter for North America (if needed)
# For example, assuming 'continent' column exists
north_america = north_america[north_america['continent'] == 'North America']

# Display the first few rows to ensure it's loaded correctly
print(north_america.head())


DataSourceError: Unable to open ne_10m_admin_1_states_provinces.shp or ne_10m_admin_1_states_provinces.SHP in rb mode.