In [1]:
import pandas as pd
import sqlite3
pd.set_option('display.max_columns', None)

In [None]:
jan = pd.read_csv('uploads/Crime_01_2024.csv')
jan

In [None]:
# add_query = """INSERT INTO crime_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
#                                        NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
#                                        PrimaryLocation,SecondaryLocation,District,Neighborhood,
#                                        NeighborhoodNum,Latitude,Longitude,Supplemented,
#                                        SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
#                 VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""

In [5]:
def clean_data(csv_loc):
    df = pd.read_csv(csv_loc)
    df = df.drop('IncidentTopSRS_UCR', axis=1)
    df.rename(columns={'CrimeAgainst': 'NIBRSCat',
                       'NIBRS': 'NIBRSCode',
                       'NIBRSCategory':'NIBRSOffenseType',
                       'SRS_UCR':'UCR_SRS',
                       'OccurredFromTime':'TimeOccurred',
                       'Offense':'SLMPDOffense',
                       'FelMisdCit':'CrimeGrade',
                       'IncidentLocation':'PrimaryLocation',
                       'IntersectionOtherLoc':'SecondaryLocation',
                       'NbhdNum':'NeighborhoodNum',
                       'IncidentSupplemented':'Supplemented',
                       'LastSuppDate':'SupplementDate'}, inplace=True)
    
    ordered_cols = ['IncidentNum', 'IncidentDate', 'TimeOccurred', 'SLMPDOffense',
                    'NIBRSCode', 'NIBRSCat', 'NIBRSOffenseType', 'UCR_SRS', 'CrimeGrade',
                    'PrimaryLocation', 'SecondaryLocation', 'District', 'Neighborhood',
                    'NeighborhoodNum', 'Latitude', 'Longitude', 'Supplemented',
                    'SupplementDate', 'VictimNum', 'FirearmUsed', 'IncidentNature']
    df = df[ordered_cols]

    # remove incidents prior to 2021-01-01
    df['IncidentDate'] = pd.to_datetime(df['IncidentDate'])
    df = df[~(df['IncidentDate'] < '2021-01-01')]
    # revert to string column
    df['IncidentDate'] = df['IncidentDate'].astype('str')

    df.reset_index(inplace=True, drop=True)
    
    supp_df = df[df['Supplemented'] == 'Yes']
    unfound_df = df[(df['Supplemented'].isna()) & (df['SLMPDOffense'] == 'UNFOUNDED INCIDENT')]
    new_df = df[df['Supplemented'] == 'No']

    if len(df) != len(supp_df) + len(unfound_df) + len(new_df):
        print("Something doesn't add up")


    
    # supp_df transformations:
    
    supp_df.to_sql('supp_temp', conn, if_exists='replace', index=False)
    
    delete_query = """
    DELETE FROM crime_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM supp_temp)
    """
    conn.execute(delete_query)

    add_supp_query = """INSERT INTO crime_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                               NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                               PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                               NeighborhoodNum,Latitude,Longitude,Supplemented,
                                               SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
                        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""

    # get tuples for the add query
    new_rows = [tuple(x) for x in supp_df.itertuples(index=False)]
    
    conn.executemany(add_supp_query, new_rows)

    conn.execute('DROP TABLE IF EXISTS supp_temp')


    # unfound_df transformations:

    
    unfound_df.to_sql('temp_unfounded', conn, if_exists='replace', index=False)

    delete_query = """
    DELETE FROM crime_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM temp_unfounded)
    """
    conn.execute(delete_query)

    unfounded_delete_query = """
    DELETE FROM unfounded_data 
    WHERE IncidentNum IN (SELECT IncidentNum FROM temp_unfounded)
    """
    conn.execute(unfounded_delete_query)


    add_unfounded_query = """INSERT INTO unfounded_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                                         NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                                         PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                                         NeighborhoodNum,Latitude,Longitude,Supplemented,
                                                         SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
                             VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""

    # get tuples for the add query
    new_rows = [tuple(x) for x in unfound_df.itertuples(index=False)]
    
    conn.executemany(add_unfounded_query, new_rows)
    
    
    conn.execute('DROP TABLE IF EXISTS temp_unfounded')


    # new_df transformations:

    
    add_new_query = """INSERT INTO crime_data (IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                               NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                               PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                               NeighborhoodNum,Latitude,Longitude,Supplemented,
                                               SupplementDate,VictimNum,FirearmUsed,IncidentNature) 
                        VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"""
    # get tuples for the add query
    new_rows = [tuple(x) for x in new_df.itertuples(index=False)]
    conn.executemany(add_new_query, new_rows)

    
    # Return updated table
    updated_df = pd.read_sql_query("""SELECT IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,
                                             NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,
                                             PrimaryLocation,SecondaryLocation,District,Neighborhood,
                                             NeighborhoodNum,Latitude,Longitude,Supplemented,
                                             SupplementDate,VictimNum,FirearmUsed,IncidentNature
                                      FROM crime_data""", conn)
    updated_df.drop('Id', axis=1)
    updated_df = update_df.sort_values('IncidentNum')

    # Commit changes
    conn.commit()

    return [df, supp_df, unfound_df, new_df, updated_df]

In [3]:
conn=sqlite3.connect('test_db.db')

In [None]:
# len = conn.execute('SELECT MAX(Id) FROM crime_data')
# table_length = len.fetchone()[0]
# print(table_length)

In [6]:
jan_loc = 'uploads/Crime_01_2024.csv'
cleaned = clean_data(jan_loc)
clean_df, supp_df, unfound_df, new_df, updated_df = cleaned

In [19]:
new_df

Unnamed: 0,IncidentNum,IncidentDate,TimeOccurred,SLMPDOffense,NIBRSCode,NIBRSCat,NIBRSOffenseType,UCR_SRS,CrimeGrade,PrimaryLocation,SecondaryLocation,District,Neighborhood,NeighborhoodNum,Latitude,Longitude,Supplemented,SupplementDate,VictimNum,FirearmUsed,IncidentNature
1781,24000005,2024-01-01,00:30:00,LEAVING SCENE OF ACCIDENT - PHYSICAL INJURY,90Z,Unspecified,All Other Offenses,,F,1023 SPRUCE ST,,4.0,Downtown,35,38.624142,-90.197622,No,,,No,Aggravated Assault - Other Weapon - Criminal
1782,24000005,2024-01-01,00:30:00,LEAVING THE SCENE OF A MOTOR VEHICLE ACCIDENT,90Z,Unspecified,All Other Offenses,,,1023 SPRUCE ST,,4.0,Downtown,35,38.624142,-90.197622,No,,,No,Aggravated Assault - Other Weapon - Criminal
1783,24000006,2024-01-01,01:10:00,LEAVING THE SCENE OF A MOTOR VEHICLE ACCIDENT,90Z,Unspecified,All Other Offenses,,,HAMPTON AVE,CHIPPEWA ST,2.0,Southampton,7,38.593514,-90.294240,No,,,No,Offenses - All Other - Criminal
1784,24000007,2024-01-01,01:00:00,DISCHARGING FIREARM WITHIN CITY,520,Society,Weapons Law Violations,,,SPRUCE ST,S 10TH ST,4.0,Downtown,35,38.623730,-90.196540,No,,,Yes,Weapon Law Violation - Criminal
1795,24000021,2024-01-01,02:25:00,PROPERTY DAMAGE 1ST DEGREE,290,Property,Destruction/Damage/Vandalism of Property,,F,5079 CATES AVE,,5.0,Academy,51,38.655696,-90.265849,No,,,Yes,Destruction of Property - Criminal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5870,24007184,2024-01-25,12:00:00,STEALING UNDER $150 (PETTY LARCENY - OTHER),23H,Property,All Other Larceny,6.0,,5118 WATERMAN BLVD,,5.0,Central West End,38,38.648258,-90.268370,No,,,No,Larceny - Criminal
5871,24007205,2024-01-31,00:01:00,DESTRUCTION OF PRIVATE PROPERTY (CITY CHARGE),290,Property,Destruction/Damage/Vandalism of Property,,,7208 N BROADWAY,,6.0,North Riverfront,79,38.631053,-90.187720,No,,,No,Destruction of Property - Criminal
5872,24007385,2024-01-26,11:00:00,STEALING,23D,Property,Theft From Building,6.0,F,4021 LACLEDE AVENUE,,5.0,Central West End,38,38.637080,-90.244946,No,,,No,Larceny - Criminal
5873,24007544,2024-01-22,12:00:00,LEAVING THE SCENE OF A MOTOR VEHICLE ACCIDENT,90Z,Unspecified,All Other Offenses,,,6014 MARMADUKE AVE,,2.0,Clifton Heights,11,38.609736,-90.290430,No,,,No,Offenses - All Other - Criminal


In [None]:
updated_df.info()

In [None]:
jan_loc = 'uploads/Crime_01_2024.csv'
cleaned = clean_data(jan_loc)
clean_df, supp_df, unfound_df, new_df, updated_df = cleaned
updated_df

In [None]:
df = pd.read_sql_query("SELECT * FROM crime_data", conn)
df

In [None]:
df = df.dropna(subset=['Id'])
df

In [None]:
df.info()

In [None]:
conn.close()

In [None]:
df = pd.read_csv('uploads/Crime2021-2023.csv')
df