In [1]:
import pandas as pd
import sqlite3
pd.set_option('display.max_columns', None)

In [7]:
jan = pd.read_csv('uploads/Crime_01_2024.csv')
jan

Unnamed: 0,IncidentDate,OccurredFromTime,IncidentNum,Offense,NIBRS,NIBRSCategory,SRS_UCR,CrimeAgainst,FelMisdCit,IncidentTopSRS_UCR,IncidentLocation,IntersectionOtherLoc,District,Neighborhood,NbhdNum,Latitude,Longitude,IncidentSupplemented,LastSuppDate,VictimNum,FirearmUsed,IncidentNature
0,2020-12-17,18:00:00,20056840,STEALING - $750 OR MORE,23H,All Other Larceny,6.0,Property,F,6.0,8025 S BROADWAY,,1.0,Patch,2,38.544389,-90.262290,Yes,2024-01-29 08:55:56.3400000,,No,Stolen Property - Criminal
1,2020-12-17,18:00:00,20056840,PROPERTY DAMAGE 1ST DEGREE,290,Destruction/Damage/Vandalism of Property,,Property,,6.0,8025 S BROADWAY,,1.0,Patch,2,38.544389,-90.262290,Yes,2024-01-29 08:55:56.3400000,,No,Stolen Property - Criminal
2,2020-12-23,06:05:00,20057591,ROBBERY - 1ST DEGREE,120,Robbery,3.0,Property,,3.0,2600 N 14TH ST,,4.0,Old North St Louis,63,38.650037,-90.196741,Yes,2024-01-27 00:05:05.7070000,,Yes,Carjacking - Criminal
3,2020-12-29,00:52:00,20058325,ROBBERY - 1ST DEGREE,120,Robbery,3.0,Property,,3.0,4922 CLAXTON AVE,,6.0,Mark Twain,71,38.689941,-90.249730,Yes,2024-01-12 13:47:31.0000000,,Yes,Robbery - Criminal
4,2020-12-31,22:20:00,20058746,ASSAULT 1ST DEGREE OR ATTEMPT,13A,Aggravated Assault,4.0,Person,,4.0,811 SPRUCE ST,ROOM 311,4.0,Downtown,35,38.623713,-90.194768,Yes,2024-01-08 08:33:09.0300000,363565.0,Yes,Aggravated Assault - Shooting - Criminal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5870,2024-01-25,12:00:00,24007184,STEALING UNDER $150 (PETTY LARCENY - OTHER),23H,All Other Larceny,6.0,Property,,6.0,5118 WATERMAN BLVD,,5.0,Central West End,38,38.648258,-90.268370,No,,,No,Larceny - Criminal
5871,2024-01-31,00:01:00,24007205,DESTRUCTION OF PRIVATE PROPERTY (CITY CHARGE),290,Destruction/Damage/Vandalism of Property,,Property,,,7208 N BROADWAY,,6.0,North Riverfront,79,38.631053,-90.187720,No,,,No,Destruction of Property - Criminal
5872,2024-01-26,11:00:00,24007385,STEALING,23D,Theft From Building,6.0,Property,F,6.0,4021 LACLEDE AVENUE,,5.0,Central West End,38,38.637080,-90.244946,No,,,No,Larceny - Criminal
5873,2024-01-22,12:00:00,24007544,LEAVING THE SCENE OF A MOTOR VEHICLE ACCIDENT,90Z,All Other Offenses,,Unspecified,,,6014 MARMADUKE AVE,,2.0,Clifton Heights,11,38.609736,-90.290430,No,,,No,Offenses - All Other - Criminal


In [11]:
def clean_data(csv_loc):
    df = pd.read_csv(csv_loc)
    df = df.drop('IncidentTopSRS_UCR', axis=1)
    df.rename(columns={'CrimeAgainst': 'NIBRSCat',
                       'NIBRS': 'NIBRSCode',
                       'NIBRSCategory':'NIBRSOffenseType',
                       'SRS_UCR':'UCR_SRS',
                       'OccurredFromTime':'TimeOccurred',
                       'Offense':'SLMPDOffense',
                       'FelMisdCit':'CrimeGrade',
                       'IncidentLocation':'PrimaryLocation',
                       'IntersectionOtherLoc':'SecondaryLocation',
                       'NbhdNum':'NeighborhoodNum',
                       'IncidentSupplemented':'Supplemented',
                       'LastSuppDate':'SupplementDate'}, inplace=True)
    
    ordered_cols = ['IncidentNum', 'IncidentDate', 'TimeOccurred', 'SLMPDOffense',
                    'NIBRSCode', 'NIBRSCat', 'NIBRSOffenseType', 'UCR_SRS', 'CrimeGrade',
                    'PrimaryLocation', 'SecondaryLocation', 'District', 'Neighborhood',
                    'NeighborhoodNum', 'Latitude', 'Longitude', 'Supplemented',
                    'SupplementDate', 'VictimNum', 'FirearmUsed', 'IncidentNature']
    df = df[ordered_cols]

    # remove incidents prior to 2021-01-01
    df['IncidentDate'] = pd.to_datetime(df['IncidentDate'])
    df = df[~(df['IncidentDate'] < '2021-01-01')]
    # revert to string column
    df['IncidentDate'] = df['IncidentDate'].astype('str')

    df.reset_index(inplace=True, drop=True)
    
    supp_df = df[df['Supplemented'] == 'Yes']
    unfound_df = df[(df['Supplemented'].isna()) & (df['SLMPDOffense'] == 'UNFOUNDED INCIDENT')]
    new_df = df[df['Supplemented'] == 'No']

    if len(df) != len(supp_df) + len(unfound_df) + len(new_df):
        print("Something doesn't add up")


    
    # supp_df transformations:
    
    supp_df.to_sql('supp_temp', conn, if_exists='replace', index=False)
    
    delete_query = """
    DELETE FROM crime_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM supp_temp)
    """
    conn.execute(delete_query)

    add_supp_query = """INSERT INTO crime_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                               NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                               PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                               NeighborhoodNum,Latitude,Longitude,Supplemented,
                                               SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
                        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""

    # get tuples for the add query
    new_rows = [tuple(x) for x in supp_df.itertuples(index=False)]
    
    conn.executemany(add_supp_query, new_rows)

    conn.execute('DROP TABLE IF EXISTS supp_temp')


    # unfound_df transformations:

    
    unfound_df.to_sql('unfounded_temp', conn, if_exists='replace', index=False)

    delete_query = """
    DELETE FROM crime_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM unfounded_temp)
    """
    conn.execute(delete_query)

    unfounded_delete_query = """
    DELETE FROM unfounded_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM unfounded_temp)
    """
    conn.execute(unfounded_delete_query)


    add_unfounded_query = """INSERT INTO unfounded_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                                         NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                                         PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                                         NeighborhoodNum,Latitude,Longitude,Supplemented,
                                                         SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
                             VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""

    # get tuples for the add query
    new_rows = [tuple(x) for x in unfound_df.itertuples(index=False)]
    
    conn.executemany(add_unfounded_query, new_rows)
    
    
    conn.execute('DROP TABLE IF EXISTS unfounded_temp')


    # new_df transformations:

    
    new_df.to_sql('new_temp', conn, if_exists='replace', index=False)
    
    delete_query = """
    DELETE FROM crime_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM new_temp)
    """
    conn.execute(delete_query)
    
    add_new_query = """INSERT INTO crime_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                               NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                               PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                               NeighborhoodNum,Latitude,Longitude,Supplemented,
                                               SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
                        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""
    # get tuples for the add query
    new_rows = [tuple(x) for x in new_df.itertuples(index=False)]
    conn.executemany(add_new_query, new_rows)

    conn.execute('DROP TABLE IF EXISTS new_temp')
    

    
    # Return updated table
    updated_df = pd.read_sql_query("""SELECT IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                             NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                             PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                             NeighborhoodNum,Latitude,Longitude,Supplemented,
                                             SupplementDate,VictimNum,FirearmUsed,IncidentNature
                                      FROM crime_data""", conn)
    updated_df = updated_df.sort_values(['IncidentDate', 'IncidentNum'])

    # Commit changes
    conn.commit()

    return [df, supp_df, unfound_df, new_df, updated_df]

In [3]:
conn=sqlite3.connect('test_db.db')

In [14]:
jan_loc = 'uploads/Crime_01_2024.csv'
cleaned = clean_data(jan_loc)
clean_df, supp_df, unfound_df, new_df, updated_df = cleaned

In [15]:
updated_df

Unnamed: 0,IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,PrimaryLocation,SecondaryLocation,District,Neighborhood,NeighborhoodNum,Latitude,Longitude,Supplemented,SupplementDate,VictimNum,FirearmUsed,IncidentNature
0,21000002,2021-01-01,02:00.0,MISCELLANEOUS WEAPON VIOLATION,520,,,,,1700 CHESTNUT ST,,4.0,36,,38.629573,-90.204995,,,,,
1,21000003,2021-01-01,09:00.0,MISCELLANEOUS WEAPON VIOLATION,520,,,,,4961 LACLEDE AVE,210,5.0,38,,38.641221,-90.264226,,,,,
2,21000003,2021-01-01,09:00.0,PROPERTY DAMAGE,290,,,,,4961 LACLEDE AVE,210,5.0,38,,38.641221,-90.264226,,,,,
3,21000007,2021-01-01,10:00.0,PROPERTY DAMAGE - 2ND DEGREE,290,,,,,6963 PERNOD AVE,,2.0,9,,38.598725,-90.310722,,,,,
4,21000008,2021-01-01,20:00.0,MISCELLANEOUS WEAPON VIOLATION,520,,,,,3500 KINGSLAND CT,,1.0,16,,38.579494,-90.243210,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177534,24004797,2024-01-31,22:00:00,ASSAULT- 4TH DEGREE - PURSUANT TO SUBDIVISION (3),13B,Person,Simple Assault,,M,6055 W FLORISSANT AVE,,6.0,North Pointe,73,38.710429,-90.252241,No,,371360.0,No,Simple Assault - Criminal
175592,24004856,2024-01-31,17:30:00,STEALING - $750 OR MORE,23G,Property,Theft From Motor Vehicle Parts/Accessories,6.0,F,300 N 4TH ST,,4.0,Downtown,35,38.628159,-90.186982,Yes,2024-02-06 14:33:46.0630000,,No,Larceny - Criminal
177558,24005157,2024-01-31,10:00:00,STEALING - MOTOR VEHICLE/WATERCRAFT/AIRCRAFT,240,Property,Motor Vehicle Theft,7.0,F,1250 BLUMEYER ST,,4.0,Covenant Blu Grand Center,77,38.643889,-90.225851,No,,,No,Motor Vehicle Theft - Criminal
177559,24005186,2024-01-31,23:30:00,STEALING - MOTOR VEHICLE/WATERCRAFT/AIRCRAFT,240,Property,Motor Vehicle Theft,7.0,F,I 70 WESTBOUND,MADISON ST,4.0,Old North St Louis,63,38.652490,-90.193730,No,,,No,Motor Vehicle Theft - Criminal


In [None]:
updated_df[updated_df['SLMPDOffense'] == 'UNFOUNDED INCIDENT']

In [None]:
jan_loc = 'uploads/Crime_01_2024.csv'
cleaned = clean_data(jan_loc)
clean_df, supp_df, unfound_df, new_df, updated_df = cleaned
updated_df

In [None]:
df = pd.read_sql_query("SELECT * FROM crime_data", conn)
df

In [None]:
df = df.dropna(subset=['Id'])
df

In [None]:
df.info()

In [None]:
conn.close()

In [6]:
dd = pd.read_csv('uploads/Crime2021-2023.csv')
dd

Unnamed: 0,IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,PrimaryLocation,SecondaryLocation,District,Neighborhood,NeighborhoodNum,Latitude,Longitude,Supplemented,SupplementDate,VictimNum,FirearmUsed,IncidentNature
0,21000002,2021-01-01,02:00.0,MISCELLANEOUS WEAPON VIOLATION,520,,,,,1700 CHESTNUT ST,,4.0,36,,38.629573,-90.204995,,,,,
1,21000003,2021-01-01,09:00.0,MISCELLANEOUS WEAPON VIOLATION,520,,,,,4961 LACLEDE AVE,210,5.0,38,,38.641221,-90.264226,,,,,
2,21000003,2021-01-01,09:00.0,PROPERTY DAMAGE,290,,,,,4961 LACLEDE AVE,210,5.0,38,,38.641221,-90.264226,,,,,
3,21000007,2021-01-01,10:00.0,PROPERTY DAMAGE - 2ND DEGREE,290,,,,,6963 PERNOD AVE,,2.0,9,,38.598725,-90.310722,,,,,
4,21000008,2021-01-01,20:00.0,MISCELLANEOUS WEAPON VIOLATION,520,,,,,3500 KINGSLAND CT,,1.0,16,,38.579494,-90.243210,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173359,24000551,2023-12-31,00:00.0,DESTRUCTION OF PRIVATE PROPERTY (CITY CHARGE),290,,,,,3920 DUNNICA AVE,,1.0,16,,38.587915,-90.252870,,,,,
173360,24000557,2023-12-31,00:00.0,DESTRUCTION OF PRIVATE PROPERTY (CITY CHARGE),290,,,,,2007 E FAIR AVE,,6.0,68,,38.677708,-90.217160,,,,,
173361,24000591,2023-12-31,00:00.0,DISCHARGING FIREARM WITHIN CITY,520,,,,,5516 GRACE AVE,,1.0,3,,38.569796,-90.254490,,,,,
173362,24000629,2023-12-31,00:00.0,STEALING UNDER $150 (PETTY LARCENY-MOTOR VEH P...,23G,,,,,1500 OLIVE ST,,4.0,36,,38.630687,-90.201630,,,,,
