In [403]:
import random
import faker
import pandas as pd
import numpy as np
import re
import itertools
from itertools import combinations 
from functools import reduce

def conditions(string):
    
    '''Create conditional logic for identifying duplicates based on tiered levels of confidence'''
    
    if 'Full Name' in string and 'Full Address' in string and 'Email' in string and 'Phone' in string:
        
        return 'Exact Match'
    
    elif 'Full Address' in string and 'Email' in string and 'Phone' in string:
        
        return 'High Confidence'
    
    elif 'Full Address' in string and 'Email' in string:
        return 'Medium Confidence'
    else:
        return 'Low Confidence'
    
def create_match_confidence_reference():
    # initialize lists
    list_ = ["Full Name", "Full Address", "Email", "Phone"]
    
    unique_combinations = []
    for n in range(1, 5, 1):
        combos = itertools.combinations(list_, n)
        combos = [sorted(x) for x in combos]
        strings = [', '.join(combo) for combo in sorted(combos)]
        unique_combinations.append(strings)

    unique_combinations = sorted(list(itertools.chain(*unique_combinations)))

    confidence_df = pd.DataFrame({'Combination': unique_combinations})
    confidence_df['Confidence'] = confidence_df['Combination'].apply(conditions)

    custom_order = ['Exact Match', 'High Confidence', 'Medium Confidence', 'Low Confidence']
    confidence_df['Confidence'] = pd.Categorical(confidence_df['Confidence'], categories=custom_order, ordered=True)
    confidence_df = confidence_df.sort_values(by='Confidence').reset_index().drop(columns = 'index')
    confidence_df.loc[len(confidence_df.index)] = ['NO MATCH', 'NO MATCH'] 
    return(confidence_df)

def generate_random_full_name(sample_size):
    full_names = []
    for i in range(sample_size):
        first_names = ["Alice", "Bob", "Charlie", "David", "Eva", "Frank", "Grace", "Henry", "Ivy", "Jack"]
        last_names = ["Smith", "Johnson", "Williams", "Jones", "Brown", "Davis", "Miller", "Wilson", "Moore", "Taylor"]
        full_name = random.choice(first_names) + ' ' + random.choice(last_names)
        full_names.append(full_name)
    return(full_names)

def generate_random_full_address(sample_size):
    full_addresses = []
    for i in range(sample_size):
        street_num = str(random.choice(list(range(1, 9000, 1))))
        street_name = random.choice(["Maple", "Willow", "Cedar", "Oak", "Sunset", "Meadow", "Linden", "River", "Pine", "Grove"])
        street_type = random.choice(["Street", "Avenue", "Boulevard", "Lane", "Road", "Drive", "Court", "Place", "Circle", "Terrace"])
        full_address = ' '.join([street_num, street_name, street_type])
        full_addresses.append(full_address)
    return(full_addresses)

def generate_random_email(sample_size):
    email_addresses = []
    for i in range(sample_size):
        
        adjectives = ["silly", "wacky", "whimsical", "zany", "quirky", "cheerful", "goofy", "playful", "amusing", "lighthearted"]
        nouns = ["banana", "jellybean", "penguin", "snickerdoodle", "dizzygiraffe", "fizzypop", "ticklemonster", "chuckleninja", "gigglyghost", "bumblebee"]

        adjective = random.choice(adjectives)
        noun = random.choice(nouns)
        n = random.randint(1, 4)
        random_num = ''.join(random.choices('0123456789', k=n))
        username = f"{adjective}{noun}{random_num}"
        domain = random.choice(["gmail.com", "yahoo.com", "outlook.com", "hotmail.com", "aol.com", "protonmail.com", "mail.com"])
        email_address = username + "@" + domain
        email_addresses.append(email_address)
    return(email_addresses)

def generate_random_phone_number(sample_size):
    phone_numbers = []
    for i in range(sample_size):
        area_code = ''.join(random.choices('0123456789', k=3))
        central_office_code = ''.join(random.choices('0123456789', k=3))
        station_number = ''.join(random.choices('0123456789', k=4))
        phone_number = '-'.join([area_code, central_office_code, station_number])
        phone_numbers.append(phone_number)
    return(phone_numbers)


def generate_fake_pii_df(sample_size):
    '''
    Simulate data where names, addresses,phone numbers and emails are present across multiple IDs.
    This simulates where duplicate cases are present in the data and how potential fraud could be identified.
    '''
#     df = pd.DataFrame(columns=['Name', 'Address', 'Phone', 'Email'])
#     for i in range(sample_size):
#         name = generate_random_full_name(n)
#         address = generate_random_full_address(n)
#         phone = generate_random_phone_number(n)
#         email = generate_random_email(n)
#         df.loc[i] = [name, address, phone, email]

    n = round(0.20*sample_size)
    df = pd.DataFrame()
    df['Name'] = np.random.choice(generate_random_full_name(n), size = sample_size, replace = True)
    df['Address'] = np.random.choice(generate_random_full_address(n), size = sample_size, replace = True)
    df['Phone'] = np.random.choice(generate_random_phone_number(n), size = sample_size, replace = True)
    df['Email'] = np.random.choice(generate_random_email(n), size = sample_size, replace = True)
    df['Previous_ID'] = range(0, len(df))
    df['Previous_ID'] = df['Previous_ID'].astype(str)
    return(df)

def get_matches(data, ids, column_to_match_on):
    '''
    Iterate over each id and get the value for the PII column and pull a list of IDs that share the same PII.
    If there is no value to pull then simply append its own id to the list. 
    Create a dataframe of the ID, the PII value, and the list of IDs that shared the same value.
    '''
    all_relations = []
    for app_id in ids:
        value = data[data['Previous_ID'] == app_id][column_to_match_on].values[0]
        if value == '':
            related_ids = [app_id]
        else:
            related_ids = data[data[column_to_match_on] == value]['Previous_ID'].values.tolist()
        all_relations.append([app_id, value, related_ids])
        
    match_df = pd.DataFrame(all_relations)
    match_df.columns = ['Previous_ID', column_to_match_on, '{}_Related_Ids'.format(column_to_match_on)]
    return(match_df)


def merge_dataframe_list(df_list, merge_on):
    '''Merge list of matching dataframes for each PII into a single dataframe'''
    #if multiple dataframes want to combine column-wise use reduce from functools
    merged_df = reduce(lambda x, y: pd.merge(x,y, on = merge_on), df_list)
    
    all_related_ids = []
    for row in merged_df.itertuples():
        all_ids = row.Name_Related_Ids + row.Address_Related_Ids + row.Phone_Related_Ids + row.Email_Related_Ids 
        all_ids = list(set(all_ids))
        #all_ids = [ele for ele in all_ids if ele != row.Previous_ID]
        all_related_ids.append(sorted(all_ids))
        
    merged_df['All_Related_Ids'] = all_related_ids
    
    return(merged_df)

def get_match_strings(related_id_cols, match_df):
    '''
    Iterate over each list of related ids for every row and check if id is in any of the PII_Related_ID cols.
    If it is then append the name of the PII_Related_ID column to a string and build out the string specifying all 
    PII matched for each id in the list.
    '''
    confidence_df = create_match_confidence_reference()
    
    #Create a label of pii matched on and calculate similarity scores.
    all_edge_labels = []
    all_related_ids = []
    all_confidences = []
    for row in match_df.itertuples():
        ids = row.All_Related_Ids
        edge_label = []
        new_ids = []
        confidences = []
        for id in ids:
            id_matched_on = []
            if id in row.Name_Related_Ids:
                id_matched_on.append('Full Name')
            if id in row.Address_Related_Ids:
                id_matched_on.append('Full Address')
            if id in row.Email_Related_Ids:
                id_matched_on.append('Email')
            if id in row.Phone_Related_Ids:
                id_matched_on.append('Phone')
            else:
                #id_matched_on.append('NO MATCH')
                pass
            
            sorted_id_matched_on = sorted(list(set(id_matched_on)))
            label = ', '.join(sorted_id_matched_on)
            
            #Remove low confidence matches
            match_type = confidence_df[confidence_df['Combination'] == label]['Confidence'].values[0]
            
            if match_type != 'Low Confidence':
                edge_label.append(label)
                new_ids.append(id)
                confidences.append(match_type)
                
            else:
                pass
                
        all_related_ids.append(new_ids)
        all_edge_labels.append(edge_label)
        all_confidences.append(confidences)
    match_df['Edge_Label'] = all_edge_labels
    match_df['New_All_Related_Ids'] = all_related_ids
    match_df['Confidence_Lists'] = all_confidences
    return(match_df)

def assign_group_id(all_related_ids):
    '''
    Identify a unique list of groups and assign the unique group an ID. Here there will be overlap across groupings.
    This is the case where an id matched on email against one case but matched all other PII with other cases. 
    This would be helpful in trying to determine if fraudulent behavior is occurring across other applications.
    '''
    unique_lists = []
    id = 0
    for lst in all_related_ids:
        if lst not in unique_lists:
            unique_lists.append([id, sorted(lst)])
            id += 1
        else:
            pass
    return(unique_lists)

def similarity_score(label):
    '''Get the proportion of PII matched out of the total number of all PII that could be matched'''
    score = 0
    if 'Full Name' in label:
        score += 1
    if 'Full Address' in label:
        score += 1
    if 'Email' in label:
        score +=1
    if 'Phone' in label:
        score += 1
    return(score/4)

def weighted_similarity_score(label):
    '''Weight PII differently and get the proportion of PII matched out of the total weight of all PII'''
    score = 0
    if 'Full Name' in label:
        score += 2
    if 'Full Address' in label:
        score += 3
    if 'Email' in label:
        score += 2
    if 'Phone' in label:
        score += 1
    return(score/8)


def create_nodes(df):
    nodes = df['Previous_ID'].unique().tolist()
    nodes = ['ID: ' + node for node in nodes ]
    return(nodes)

def create_edges(df):
    edges_df = df[['Previous_ID', 'New_All_Related_Ids', 'Edge_Label']]
    edges_df = edges_df[edges_df['Previous_ID'] != edges_df['New_All_Related_Ids']]
    edges = [('ID: ' + row['Previous_ID'], 'ID: ' + row['New_All_Related_Ids']) for index, row in edges_df.iterrows()]
    edge_labels = edges_df['Edge_Label']
    return(edges, edge_labels)

    
def main():
    #Simulate fake pii data and begin iterating over each row to get list of related ids
    df = generate_fake_pii_df(50)
    
    #Determine a list of ids for which to begin matching against (in this case all the ids in the data)
    ids = df['Previous_ID'].astype(str).tolist()

    columns_to_match = ['Name', 'Email', 'Phone', 'Address']

    dataframes = []
    for col in columns_to_match:
        match_df = get_matches(df, ids, col)
        dataframes.append(match_df)

    merged_df = merge_dataframe_list(dataframes, merge_on = 'Previous_ID')
    
    #Create a column that specifies what each related id matched on
    related_cols = ['Name_Related_Ids', 'Email_Related_Ids', 'Phone_Related_Ids', 'Address_Related_Ids']
    final_df = get_match_strings(related_cols, merged_df)
    
    #Get a list of unique groups and assign the group an id (may have overlap between groups depending on pii matched)
    unique_lists = assign_group_id(final_df['New_All_Related_Ids'])
    
    group_df = pd.DataFrame({'Group_ID' : [i[0] for i in unique_lists],
                             'All_Related_Ids_String': [i[1] for i in unique_lists]})
    
    #Join the related ids together as a string so we can map the group id back to the related ids column
    group_df['All_Related_Ids_String'] = group_df['All_Related_Ids_String'].apply(lambda x: ', '.join(x))
    final_df['All_Related_Ids_String'] = final_df['New_All_Related_Ids'].apply(lambda x: ', '.join(x))
    merged_df = pd.merge(final_df, group_df, on = 'All_Related_Ids_String', how = 'left')
    merged_df = merged_df.drop(columns = 'All_Related_Ids_String')
    
    #Expand the 1:many dataframe to the 1:1 dataframe so that each relationship gets its own row
    exploded_df = merged_df.explode(['New_All_Related_Ids', 'Edge_Label', 'Confidence_Lists'])
    
    #Calculate similarity scores to determine how similar ids are to each other
    exploded_df['Similarity_Score'] = exploded_df['Edge_Label'].apply(lambda x: similarity_score(x))
    exploded_df['Weighted_Similarity_Score'] = exploded_df['Edge_Label'].apply(lambda x: weighted_similarity_score(x))
    
    #Using a particular set of logical conditions, label the ids as a Exact Match, High, Medium, or Low Confidence duplicate 
    func = np.vectorize(conditions)
    exploded_df['Is_Duplicate'] = func(exploded_df['Edge_Label'])
    
    return(merged_df, exploded_df)  

In [404]:
merged_df, exploded_df = main()
merged_df

Unnamed: 0,Previous_ID,Name,Name_Related_Ids,Email,Email_Related_Ids,Phone,Phone_Related_Ids,Address,Address_Related_Ids,All_Related_Ids,Edge_Label,New_All_Related_Ids,Confidence_Lists,Group_ID
0,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","[Email, Full Address, Full Name, Phone, Email,...","[0, 16, 40]","[Exact Match, Medium Confidence, Medium Confid...",0
1,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","[Email, Full Address, Full Name, Phone, Email,...","[0, 16, 40]","[Exact Match, Medium Confidence, Medium Confid...",16
2,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","[Email, Full Address, Full Name, Phone, Email,...","[0, 16, 40]","[Exact Match, Medium Confidence, Medium Confid...",40
3,1,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",amusinggigglyghost2@yahoo.com,"[1, 4, 7, 10, 13, 17, 21, 24, 37, 41]",692-019-4344,"[0, 1, 37, 42]",1173 Willow Terrace,"[1, 3, 6, 8, 23, 43, 45, 46]","[0, 1, 10, 13, 14, 17, 18, 2, 21, 23, 24, 27, ...","[Email, Full Address, Full Name, Phone]",[1],[Exact Match],1
4,2,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",sillyticklemonster8256@hotmail.com,"[2, 9]",577-840-8296,"[2, 8, 16, 18, 21, 33, 49]",7687 Willow Boulevard,"[2, 38, 47]","[0, 1, 14, 16, 18, 2, 21, 27, 30, 33, 38, 39, ...","[Email, Full Address, Full Name, Phone]",[2],[Exact Match],2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,47,Charlie Miller,"[3, 6, 7, 15, 17, 20, 25, 29, 31, 38, 43, 44, 47]",cheerfulsnickerdoodle5@outlook.com,"[3, 12, 14, 15, 29, 30, 38, 47]",654-783-9540,"[3, 23, 36, 40, 47]",7687 Willow Boulevard,"[2, 38, 47]","[12, 14, 15, 17, 2, 20, 23, 25, 29, 3, 30, 31,...","[Email, Full Address, Full Name, Email, Full A...","[38, 47]","[Medium Confidence, Exact Match]",38
76,47,Charlie Miller,"[3, 6, 7, 15, 17, 20, 25, 29, 31, 38, 43, 44, 47]",cheerfulsnickerdoodle5@outlook.com,"[3, 12, 14, 15, 29, 30, 38, 47]",654-783-9540,"[3, 23, 36, 40, 47]",7687 Willow Boulevard,"[2, 38, 47]","[12, 14, 15, 17, 2, 20, 23, 25, 29, 3, 30, 31,...","[Email, Full Address, Full Name, Email, Full A...","[38, 47]","[Medium Confidence, Exact Match]",47
77,48,Ivy Smith,"[11, 22, 35, 41, 42, 46, 48]",playfuldizzygiraffe728@gmail.com,"[6, 23, 28, 31, 32, 45, 48]",851-229-2295,"[5, 32, 34, 35, 48]",5377 River Lane,"[21, 30, 34, 48]","[11, 21, 22, 23, 28, 30, 31, 32, 34, 35, 41, 4...","[Email, Full Address, Full Name, Phone]",[48],[Exact Match],48
78,49,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",quirkygigglyghost537@gmail.com,"[19, 22, 25, 49]",577-840-8296,"[2, 8, 16, 18, 21, 33, 49]",2518 Linden Road,"[4, 11, 14, 20, 22, 41, 42, 49]","[0, 1, 11, 14, 16, 18, 19, 2, 20, 21, 22, 25, ...","[Email, Full Address, Email, Full Address, Ful...","[22, 49]","[Medium Confidence, Exact Match]",22


In [405]:
exploded_df

Unnamed: 0,Previous_ID,Name,Name_Related_Ids,Email,Email_Related_Ids,Phone,Phone_Related_Ids,Address,Address_Related_Ids,All_Related_Ids,Edge_Label,New_All_Related_Ids,Confidence_Lists,Group_ID,Similarity_Score,Weighted_Similarity_Score,Is_Duplicate
0,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","Email, Full Address, Full Name, Phone",0,Exact Match,0,1.00,1.000,Exact Match
0,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","Email, Full Address",16,Medium Confidence,0,0.50,0.625,Medium Confidence
0,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","Email, Full Address, Full Name",40,Medium Confidence,0,0.75,0.875,Medium Confidence
1,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","Email, Full Address, Full Name, Phone",0,Exact Match,16,1.00,1.000,Exact Match
1,0,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",cheerfuljellybean0@gmail.com,"[0, 16, 20, 34, 35, 40, 43, 46]",692-019-4344,"[0, 1, 37, 42]",2966 Oak Circle,"[0, 16, 19, 31, 36, 40, 44]","[0, 1, 14, 16, 18, 19, 2, 20, 27, 30, 31, 34, ...","Email, Full Address",16,Medium Confidence,16,0.50,0.625,Medium Confidence
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,48,Ivy Smith,"[11, 22, 35, 41, 42, 46, 48]",playfuldizzygiraffe728@gmail.com,"[6, 23, 28, 31, 32, 45, 48]",851-229-2295,"[5, 32, 34, 35, 48]",5377 River Lane,"[21, 30, 34, 48]","[11, 21, 22, 23, 28, 30, 31, 32, 34, 35, 41, 4...","Email, Full Address, Full Name, Phone",48,Exact Match,48,1.00,1.000,Exact Match
78,49,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",quirkygigglyghost537@gmail.com,"[19, 22, 25, 49]",577-840-8296,"[2, 8, 16, 18, 21, 33, 49]",2518 Linden Road,"[4, 11, 14, 20, 22, 41, 42, 49]","[0, 1, 11, 14, 16, 18, 19, 2, 20, 21, 22, 25, ...","Email, Full Address",22,Medium Confidence,22,0.50,0.625,Medium Confidence
78,49,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",quirkygigglyghost537@gmail.com,"[19, 22, 25, 49]",577-840-8296,"[2, 8, 16, 18, 21, 33, 49]",2518 Linden Road,"[4, 11, 14, 20, 22, 41, 42, 49]","[0, 1, 11, 14, 16, 18, 19, 2, 20, 21, 22, 25, ...","Email, Full Address, Full Name, Phone",49,Exact Match,22,1.00,1.000,Exact Match
79,49,Henry Jones,"[0, 1, 2, 14, 18, 27, 30, 39, 40, 49]",quirkygigglyghost537@gmail.com,"[19, 22, 25, 49]",577-840-8296,"[2, 8, 16, 18, 21, 33, 49]",2518 Linden Road,"[4, 11, 14, 20, 22, 41, 42, 49]","[0, 1, 11, 14, 16, 18, 19, 2, 20, 21, 22, 25, ...","Email, Full Address",22,Medium Confidence,49,0.50,0.625,Medium Confidence


In [413]:
from pyvis.network import Network

net = Network(notebook=True, bgcolor="#222222", font_color="white")#select_menu = True,filter_menu=True)
edges, edge_labels = create_edges(exploded_df)
net.add_nodes(create_nodes(exploded_df))
#net.add_edges(edges)
for node, edge, edge_label in zip(nodes, edges, edge_labels):
    net.add_edge(edge[0], edge[1], title = edge_label)

net.show('edges.html')

edges.html


In [None]:
# import geopandas as gpd
# import folium

# # Replace 'your_shapefile.shp' with the path to your shapefile
# gdf = gpd.read_file('./data/Wildfires_1878_2019_Polygon_Data/Shapefile/US_Wildfires_1878_2019.shp')
# gdf = gdf.to_crs({'proj':'longlat', 'ellps':'WGS84', 'datum':'WGS84'})
# gdf['centroid'] = gpd.GeoSeries(gdf["geometry"]).centroid
# gdf.to_pickle('geo_dataframe_forest_fires.pkl')

# my_map = folium.Map(tiles = 'cartodbdark_matter')
# for _, r in test_df.iterrows():
#     sim_geo = gpd.GeoSeries(r["geometry"]).simplify(tolerance=0.001)
#     geo_j = sim_geo.to_json()
#     geo_j = folium.GeoJson(data = geo_j, style_function=lambda x: {'fillColor': 'orange',
#                                                                    'color': 'yellow',
#                                                                    'weight': 1,
#                                                                    'fillOpacity': 1})
#     #folium.Popup(r["FireCause"]).add_to(geo_j)
#     geo_j.add_to(my_map)
# #my_map.save("forest_fires.html")
# my_map

In [None]:
# forest_fires_new_df = pd.read_pickle('/Users/andrewcasanova/Documents/geo_dataframe_forest_fires.pkl')
# forest_fires_new_df.head()

In [None]:
#IFrame(src = "forest_fires.html", width = 800, height = 600)

In [None]:
#https://pdf.sciencedirectassets.com/271100/1-s2.0-S0379711219X00028/1-s2.0-S0379711218303941/am.pdf?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEFkaCXVzLWVhc3QtMSJGMEQCIB6%2BrP729s6pOcd3RO8MOnfurh3JEkz3x5YghLvpPrNRAiBaWbgpz6TYtOMzEZKdaczL56O4QENCk5HZO1k2s7LOEyq8BQix%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F8BEAUaDDA1OTAwMzU0Njg2NSIMt816wIlFIr8aw6Y%2FKpAFha3vg1FILlX33M9Yn%2F%2F8Ln4q2wttgJL8A0mUR52lEptZVBRBtDKGZ6pJMEC0NymWt7hlmD4PkxD9uJszMq%2FolInsEQkMDhZcx80cUagpZYH0UR33nDrfmAyjASm%2FR28LUmIdQfkrp0JEIAVdSMxHg7PLzT4fXzYBDekJVyW1teoA4OaBoruJb%2BkGrN6iqtItrvIqbxN86HjuYbsLiKcZTQMLwUdXMK9PzsSuK7ckpuycoMu44vSPDvoNU%2BFGrPIfUAnVNqpmSBUbuOHvl5ULkiuKw8C9LBIitO%2BZcTCizvmAiM%2B6lsWKpLlBx%2BDvNrqu5Tj7mmN75E5o238XwigPV%2BDlC2Sv%2FGZgaVeDsqqawshXT2kE%2B1zrzmtVgA9J495A6Yu%2FMQZzQYgEwHB63bFMNnVonz1YOCUjKXDCXPsSfEY35KRhb04AMWoFjli3wsGlpdwTxqSqEyKhPRsS%2BVx5e%2FXvcszOgQeW4g%2FWRi2p5sY3C7LFTLFXZ%2BS5g8u0%2F4VpGJKkd6NMiAVIBKC13HXwWc%2F0hR%2BZCYCpRIqxCaLuVrcHeRIx90Zv4sdM5G4J%2FzQgnvMqOTmKcORn0v7%2BCknqN7re3cTswxh3%2FkA07LvG%2BYnkBX2mo7bqSe2IFwohM9cRdAvytsUR5Qo7IwQInCMlCN5AseMNbEhZ4os0zMHDaCHUvhaos%2FjDtG4ll0ewMgT7GQKZ5xMIetUSxB9G2C8DkaV4rvolaCebu1QLGsiaf9Sn0BT8OHqixlWpRt3qBSb%2FXengCYwKLATRoefvUfRrM9Ddfr5FRUExuca3yQVRtKz6ibnNZbUbe3bufyV%2FvUDI4ZvaxuWh13zlt1b5LL7bMYqLyyzi4ed2MdJM0emhanQwkd6UqwY6sgGboIm0vi1pPjXxvMG4nfUJoanyhSJ4KZqMOPqmaDr2TOerDPkMmnvVxUpZ1zEPPpg3ew3BhqxDz4%2Be6CDg%2FOLIUOk5njvwEpx0wAM0V0XgdGKxa57JQuk85EYUp9tLZ4%2FmfkYo84onIedu4ig%2BKOZasOPjH%2B54VFx6ekJOw1GHEDh20ZG3XJQ0ZEjTn7ilqhjvpCL1KusuN6y%2B81Pxwjj7WL3GG6PYDYm%2FLdnpwoFcRlMw&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Date=20231128T003309Z&X-Amz-SignedHeaders=host&X-Amz-Expires=300&X-Amz-Credential=ASIAQ3PHCVTYWWV37SQY%2F20231128%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Signature=c33d3df67240330cb588a4eb76b655ecd01191cdb98cd24c5f49c7a922309983&hash=bf059bd61fc8778f809a598de2aada60e4a3e75e672853757544641ac0ed95b2&host=68042c943591013ac2b2430a89b270f6af2c76d8dfd086a07176afe7c76c2c61&pii=S0379711218303941&tid=pdf-9dc187da-f80b-4720-8f62-1882dd8caf87&sid=5afc32371e344648f128f916f88a52edc0d7gxrqa&type=client
#https://towardsdatascience.com/creating-an-interactive-map-of-wildfire-data-using-folium-in-python-7d6373b6334a
#https://www.sciencebase.gov/catalog/item/5ee13de982ce3bd58d7be7e7
#https://palkovic.org/wp-content/uploads/2020/12/USGS_wildfires.html