### Project Data Integration

In [122]:
import pandas as pd 
import numpy as np

In [123]:
# open the cleaned crime data csv file 
la_crime_data = pd.read_csv("cleaned_crime_data.csv", encoding="utf-8", sep=",", low_memory=False)

la_crime_data.head()

Unnamed: 0,DR_NO,DATE_RPTD,DATE_OCC,AREA,AREA_NAME,RPT_DIST_NO,CRM_CD,CRM_CD_DESC,MOCODES,VICT_AGE,VICT_SEX,VICT_DESCENT,PREMIS_CD,PREMIS_DESC,WEAPON_USED_CD,WEAPON_DESC,STATUS,STATUS_DESC,LOCATION
0,250504239,2025-03-13,2025-03-13,5,Harbor,516,510,VEHICLE - STOLEN,Unknown,X,X,X,108.0,PARKING LOT,Unknown / None Used,Unknown / None Used,IC,Invest Cont,1300 N FRIES AV
1,250504238,2025-03-13,2025-03-13,5,Harbor,569,440,THEFT PLAIN - PETTY ($950 & UNDER),Unknown,X,X,X,101.0,STREET,Unknown / None Used,Unknown / None Used,IC,Invest Cont,BERTH 73
2,250304275,2025-03-13,2025-03-13,3,Southwest,338,510,VEHICLE - STOLEN,Unknown,X,X,X,101.0,STREET,Unknown / None Used,Unknown / None Used,IC,Invest Cont,3100 MCCLINTOCK AV
3,251000503,2025-03-13,2025-03-13,10,West Valley,1035,624,BATTERY - SIMPLE ASSAULT,0913 1815 1251 0400 0416,16,M,H,721.0,HIGH SCHOOL,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,18200 KITTRIDGE ST
4,251700501,2025-03-13,2025-03-13,17,Devonshire,1717,930,CRIMINAL THREATS - NO WEAPON DISPLAYED,0340 0359 1822 1259,16,M,H,101.0,STREET,511.0,VERBAL THREAT,IC,Invest Cont,11200 GOTHIC AV


In [124]:
la_crime_data.shape

(127826, 19)

In [125]:
la_crime_data.dtypes

DR_NO              int64
DATE_RPTD         object
DATE_OCC          object
AREA               int64
AREA_NAME         object
RPT_DIST_NO        int64
CRM_CD             int64
CRM_CD_DESC       object
MOCODES           object
VICT_AGE          object
VICT_SEX          object
VICT_DESCENT      object
PREMIS_CD         object
PREMIS_DESC       object
WEAPON_USED_CD    object
WEAPON_DESC       object
STATUS            object
STATUS_DESC       object
LOCATION          object
dtype: object

In [126]:
# open the la population csv 
la_population = pd.read_csv("total_la_population.csv", encoding="utf-8", sep=",")

la_population.head(15)

Unnamed: 0,Neighborhood,American Indian and Alaska Native,Asian,African American,Native Hawaiian and Other Pacific Islander,White,Some Other Race,Two or More Races,Hispanic or Latino
0,City of L.A.,44777,463960,328445,5627,1092687,967895,607324,1822163
1,Adams-Normandie,129,394,783,63,663,2673,944,4783
2,Arleta,452,4430,181,46,2958,12733,7266,25861
3,Arlington Heights,121,1919,2293,0,744,5048,1755,8195
4,Arlington Park,68,32,1169,0,122,929,95,1508
5,Atwater Village,41,2129,113,0,3953,2075,1278,3238
6,Baldwin Hills,216,535,13592,67,2122,4224,3546,8433
7,Bel Air,3,972,242,0,5382,109,576,370
8,Beverly Crest,0,301,135,0,4668,36,405,258
9,Beverly Glen,0,431,45,0,2741,84,559,447


In [140]:
total_sum = [
    'American Indian and Alaska Native',
    'Asian',
    'African American',
    'Native Hawaiian and Other Pacific Islander',
    'White',
    'Some Other Race',
    'Two or More Races',
    'Hispanic or Latino'
]

# Remove commas and convert to integers
la_population[total_sum] = la_population[total_sum].replace({',': ''}, regex=True).astype(int)

# Sum across the specified columns row-wise
la_population['Total Population'] = la_population[total_sum].sum(axis=1)

In [128]:
la_population.shape

(152, 9)

In [129]:
la_population.dtypes

Neighborhood                                  object
American Indian and Alaska Native             object
Asian                                         object
African American                              object
Native Hawaiian and Other Pacific Islander    object
White                                         object
Some Other Race                               object
Two or More Races                             object
Hispanic or Latino                            object
dtype: object

In [130]:
la_crime_data_cols = la_crime_data.columns
la_population_cols = la_population.columns

print(la_crime_data_cols)
print(la_population_cols)

Index(['DR_NO', 'DATE_RPTD', 'DATE_OCC', 'AREA', 'AREA_NAME', 'RPT_DIST_NO',
       'CRM_CD', 'CRM_CD_DESC', 'MOCODES', 'VICT_AGE', 'VICT_SEX',
       'VICT_DESCENT', 'PREMIS_CD', 'PREMIS_DESC', 'WEAPON_USED_CD',
       'WEAPON_DESC', 'STATUS', 'STATUS_DESC', 'LOCATION'],
      dtype='object')
Index(['Neighborhood', 'American Indian and Alaska Native', 'Asian',
       'African American', 'Native Hawaiian and Other Pacific Islander',
       'White', 'Some Other Race', 'Two or More Races', 'Hispanic or Latino'],
      dtype='object')


In [142]:
# rename la_population["Total Population"] to all lower case to match la_crime_data column name
la_population = la_population.rename(columns={"Neighborhood" : "AREA_NAME"})

In [143]:
la_crime_data_unique = la_crime_data['AREA_NAME'].unique()

la_crime_data_unique

array(['Harbor', 'Southwest', 'West Valley', 'Devonshire', 'Mission',
       'Topanga', 'Hollenbeck', 'Southeast', 'Foothill', 'West LA',
       'Northeast', 'Olympic', 'Newton', '77th Street', 'Central',
       'Van Nuys', 'N Hollywood', 'Pacific', 'Rampart', 'Wilshire',
       'Hollywood'], dtype=object)

In [144]:
la_population_unique = la_population["AREA_NAME"].unique()

la_population_unique

array(['City of L.A.', 'Adams-Normandie', 'Arleta', 'Arlington Heights',
       'Arlington Park', 'Atwater Village', 'Baldwin Hills', 'Bel Air',
       'Beverly Crest', 'Beverly Glen', 'Beverlywood', 'Boyle Heights',
       'Brentwood', 'Broadway-Manchester', 'Byzantine-Latino Quarter',
       'Cahuenga Pass', 'Canoga Park', 'Carthay Circle',
       'Central-Alameda', 'Century City', 'Century Cove', 'Century Palms',
       'Chatsworth', 'Cheviot Hills', 'Chinatown & Historic L.A.',
       'Country Club Park', 'Crenshaw', 'Cypress Park', 'Del Rey',
       'Downtown', 'Eagle Rock', 'East Hollywood', 'Echo Park',
       'El Sereno', 'Elysian Park', 'Elysian Valley', 'Encino',
       'Exposition Park', 'Fairfax', 'Figueroa Park Square',
       'Florence (LA)', 'Glassell Park', 'Gramercy Park', 'Granada Hills',
       'Green Meadows', 'Griffith Park/Los Feliz', 'Hancock Park',
       'Hansen Dam Rec Area', 'Harbor City', 'Harbor Gateway',
       'Harvard Heights', 'Hermon', 'Highland Park',

In [145]:
la_population['AREA_NAME'] = la_population['AREA_NAME'].apply(lambda x: 'Harbor' if 'Harbor' in x else x)

In [146]:
la_population['AREA_NAME'] = la_population['AREA_NAME'].replace('North Hollywood', 'N Hollywood')

In [147]:
# southwest 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Adams-Normandie','West Adams','West Adams Terrace/Kinney Heights/Berkely Square', 'University Expo Park West', 
                                                                'Vermont Square','Vermont-Slauson/Harvard Park/Chesterfield Square', 'Baldwin Hills', 'Crenshaw', 'Broadway-Manchester',
                                                                'Central-Alameda', 'Exposition Park', 'Florence (LA)', 'Historic South Central', 'Hyde Park', 'Leimert Park','Gramercy Park',
                                                                'Manchester Square', 'South Park', 'Vermont Vista', 'Watts', 'Vermont Knolls', 'Historic West Adams', 'South Robertson'], 'Southwest')


In [148]:
# west valley

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['West Hills', 'Canoga Park','Woodland Hills','Reseda',
                                                                'Arleta','Encino', 'Tarzana','Valley Village',
                                                                 'Pacoima','Sherman Oaks','Sunland','Sepulveda Dam Recreation Area',
                                                                 'Studio City', 'Winnetka','Valley Glen'], 'West Valley')

In [149]:
# southeast 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Green Meadows','Toluca Lake', 'Toluca Woods', 'West Toluca Lake'], 'Southeast')

In [150]:
# northeast

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Atwater Village','Cypress Park','Eagle Rock','Glassell Park','Highland Park', 'Montecito Heights',
                                                                 'Mt. Washington','Hermon', 'Sycamore Grove', 'Monterey Hills'], 'Northeast')

In [151]:
# central 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Koreatown','City of L.A.','Chinatown & Historic L.A.','Mid-Wilshire (Brookside)', 'Mid-Wilshire (Fremont Place-Windsor Village)',
                                                                 'Mid-Wilshire (Wilshire Park)','Hancock Park','Hollwood Hills', 'Elysian Park','Elysian Valley','Windsor Square', 'Fairfax','Picfair Village',
                                                                 'Downtown', 'South Carthay', 'Arlington Heights','Arlington Park', "Larchmont-St. Andrew's Square", 'Virgil Village', 'Country Club Park','Figueroa Park Square',
                                                                ], 'Central')

In [152]:
# topanga

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace('Topanga State Park', 'Topanga')

In [153]:
# wilshire

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Wilshire Center', 'Wilshire Vista','Wilshire Vista Heights','Wilshire', 'Mid-City', 
                                                                 'Mid-City West', 'Maplewood-St. Andrews', 'Miracle Mile', 'Western Wilton', 'Carthay Circle'], 'Wilshire')

In [154]:
# pacific 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Redondo Sycamore', 'Pacific Palisades','Playa Vista','Bel Air',
                                                                'Brentwood','Century City', 'Century Cove', 'Century Palms','Cheviot Hills','Marina',
                                                                'Mar Vista','Palms','Rancho Park', 'Playa del Rey', 'Westchester', 'Del Rey'], 'Pacific')

In [155]:
# harbor

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Harbor','San Pedro','Wilmington', 'Terminal Island'], 'Harbor')

In [156]:
# devonshire

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Granada Hills','Chatsworth', 'Porter Ranch', 'Northridge'], 'Devonshire')

In [157]:
# mission 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Mission Hills','North Hills','Panorama City','Sylmar'], 'Mission')

In [158]:
# hollenbeck 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Boyle Heights','University Hills','Lincoln Heights','El Sereno'], 'Hollenbeck')

In [159]:
# foothill 


la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Lakeview Terrace','Shadow Hills','Tujunga', 'Sun Valley', 'La Tuna Canyon', 'Hansen Dam Rec Area'], 'Foothill')

In [160]:
# rampart

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Silver Lake','Echo Park','Pico-Union','Pico Park','Westlake & Historic Filipinotown', 'Byzantine-Latino Quarter', 'Rampart Village'], 'Rampart')

In [161]:
# west LA
la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Westwood','Venice', 'Beverly Crest','Beverly Glen', 'Beverlywood',
                                                                'Westside Village', 'South Robertson'],'West LA')

In [162]:
# olympic

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['University Park','Harvard Heights','Jefferson Park', 'Olympic Park',], 'Olympic')

In [163]:
# newton


la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Western Heights/Sugar Hill/West Adams Heights', 'View Heights'], 'Newton')

In [164]:
# hollywood 

la_population['AREA_NAME'] = la_population['AREA_NAME'].replace(['Hollywood', 'Melrose', 'Griffith Park/Los Feliz', 
                                                                 'King Estates','Larchmont Village', 'Cahuenga Pass', 'East Hollywood'], 'Hollywood')

In [165]:
# Van Nuys
la_population['AREA_NAME'] = la_population['AREA_NAME'].replace('Lake Balboa', 'Van Nuys')

In [166]:
# 77th street
# this will be dropped due to the road going through multiple divisions, 
# and the location of the crime reported is not the exact location due to confidentiality. 

In [167]:
la_population_unique = la_population['AREA_NAME'].unique()

la_population_unique

array(['Central', 'Southwest', 'West Valley', 'Northeast', 'Pacific',
       'West LA', 'Hollenbeck', 'Rampart', 'Hollywood', 'Wilshire',
       'Devonshire', 'Southeast', 'Foothill', 'Harbor', 'Olympic',
       'Van Nuys', 'Mission', 'N Hollywood', 'Topanga', 'Newton'],
      dtype=object)

In [168]:
print(len(la_population_unique))

20


In [169]:
matching_areas = la_crime_data[la_crime_data['AREA_NAME'].isin(la_population['AREA_NAME'])]


matching_areas

Unnamed: 0,DR_NO,DATE_RPTD,DATE_OCC,AREA,AREA_NAME,RPT_DIST_NO,CRM_CD,CRM_CD_DESC,MOCODES,VICT_AGE,VICT_SEX,VICT_DESCENT,PREMIS_CD,PREMIS_DESC,WEAPON_USED_CD,WEAPON_DESC,STATUS,STATUS_DESC,LOCATION
0,250504239,2025-03-13,2025-03-13,5,Harbor,516,510,VEHICLE - STOLEN,Unknown,X,X,X,108.0,PARKING LOT,Unknown / None Used,Unknown / None Used,IC,Invest Cont,1300 N FRIES AV
1,250504238,2025-03-13,2025-03-13,5,Harbor,569,440,THEFT PLAIN - PETTY ($950 & UNDER),Unknown,X,X,X,101.0,STREET,Unknown / None Used,Unknown / None Used,IC,Invest Cont,BERTH 73
2,250304275,2025-03-13,2025-03-13,3,Southwest,338,510,VEHICLE - STOLEN,Unknown,X,X,X,101.0,STREET,Unknown / None Used,Unknown / None Used,IC,Invest Cont,3100 MCCLINTOCK AV
3,251000503,2025-03-13,2025-03-13,10,West Valley,1035,624,BATTERY - SIMPLE ASSAULT,0913 1815 1251 0400 0416,16,M,H,721.0,HIGH SCHOOL,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,18200 KITTRIDGE ST
4,251700501,2025-03-13,2025-03-13,17,Devonshire,1717,930,CRIMINAL THREATS - NO WEAPON DISPLAYED,0340 0359 1822 1259,16,M,H,101.0,STREET,511.0,VERBAL THREAT,IC,Invest Cont,11200 GOTHIC AV
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127821,241304040,2024-01-01,2024-01-01,13,Newton,1385,626,INTIMATE PARTNER - SIMPLE ASSAULT,0400 0913 1813 2000,43,F,H,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,400 E 60TH ST
127822,240709471,2024-01-01,2024-01-01,7,Wilshire,721,815,SEXUAL PENETRATION W/FOREIGN OBJECT,1822 0515,69,F,W,701.0,HOSPITAL,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,8700 W BEVERLY BL
127823,241104022,2024-01-01,2024-01-01,11,Northeast,1184,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1234 1822 0408 0429 0416 0444,43,M,H,207.0,BAR/COCKTAIL/NIGHTCLUB,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,1400 W SUNSET BL
127824,240504085,2024-01-01,2024-01-01,5,Harbor,569,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",0329 0311,X,X,X,506.0,ABANDONED BUILDING ABANDONED HOUSE,Unknown / None Used,Unknown / None Used,IC,Invest Cont,2500 SIGNAL ST


In [170]:
la_population['AREA_NAME'].unique()

array(['Central', 'Southwest', 'West Valley', 'Northeast', 'Pacific',
       'West LA', 'Hollenbeck', 'Rampart', 'Hollywood', 'Wilshire',
       'Devonshire', 'Southeast', 'Foothill', 'Harbor', 'Olympic',
       'Van Nuys', 'Mission', 'N Hollywood', 'Topanga', 'Newton'],
      dtype=object)

In [171]:
print(len(la_population['AREA_NAME'].unique()))

20


In [172]:
# Find the unique values in la_poulation that are not in la_crime_data
unique_in_la_population = la_population[~la_population['AREA_NAME'].isin(la_crime_data['AREA_NAME'])]

unique_in_la_population

Unnamed: 0,AREA_NAME,American Indian and Alaska Native,Asian,African American,Native Hawaiian and Other Pacific Islander,White,Some Other Race,Two or More Races,Hispanic or Latino,Total Population


In [173]:
unique_in_la_crime_data = la_crime_data[~la_crime_data['AREA_NAME'].isin(la_population['AREA_NAME'])]

unique_in_la_crime_data

Unnamed: 0,DR_NO,DATE_RPTD,DATE_OCC,AREA,AREA_NAME,RPT_DIST_NO,CRM_CD,CRM_CD_DESC,MOCODES,VICT_AGE,VICT_SEX,VICT_DESCENT,PREMIS_CD,PREMIS_DESC,WEAPON_USED_CD,WEAPON_DESC,STATUS,STATUS_DESC,LOCATION
83,251204190,2025-02-14,2025-02-14,12,77th Street,1239,930,CRIMINAL THREATS - NO WEAPON DISPLAYED,1258 1822 0305 0444 0319 0421 1251,13,M,B,124.0,BUS STOP,511.0,VERBAL THREAT,IC,Invest Cont,6000 S VERMONT AV
162,251204137,2025-01-31,2025-01-31,12,77th Street,1207,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1258 0302 0216 0213 0202 0443 1251 1822,10,M,B,704.0,ELEMENTARY SCHOOL,104.0,SHOTGUN,IC,Invest Cont,800 W 51ST ST
210,251204103,2025-01-22,2025-01-22,12,77th Street,1259,510,VEHICLE - STOLEN,Unknown,X,X,X,101.0,STREET,Unknown / None Used,Unknown / None Used,IC,Invest Cont,7800 MCKINLEY AV
226,251204086,2025-01-16,2025-01-16,12,77th Street,1269,510,VEHICLE - STOLEN,Unknown,X,X,X,108.0,PARKING LOT,Unknown / None Used,Unknown / None Used,IC,Invest Cont,8500 TOWNE AV
253,251204039,2025-01-01,2025-01-01,12,77th Street,1258,946,OTHER MISCELLANEOUS CRIME,Unknown,X,X,X,726.0,POLICE FACILITY,Unknown / None Used,Unknown / None Used,IC,Invest Cont,7600 S BROADWAY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127734,241204065,2024-01-01,2024-01-01,12,77th Street,1268,236,INTIMATE PARTNER - AGGRAVATED ASSAULT,0400 0416 2000 1814 0913,19,F,B,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",AA,Adult Arrest,100 W 82ND ST
127764,241204043,2024-01-01,2024-01-01,12,77th Street,1209,210,ROBBERY,0344 1822 0216 0443 0341 0370 1218,43,M,H,101.0,STREET,500.0,UNKNOWN WEAPON/OTHER WEAPON,IC,Invest Cont,FIGUEROA
127803,241204086,2024-01-01,2024-01-01,12,77th Street,1239,761,BRANDISH WEAPON,0334 0329 0361 0913 0421 0432 0445,56,M,H,501.0,SINGLE FAMILY DWELLING,310.0,CONCRETE BLOCK/BRICK,AA,Adult Arrest,600 W 61ST ST
127806,241204067,2024-01-01,2024-01-01,12,77th Street,1268,624,BATTERY - SIMPLE ASSAULT,0400 0416 0913 0561,41,F,B,501.0,SINGLE FAMILY DWELLING,400.0,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",IC,Invest Cont,100 W 82ND ST


In [174]:
la_crime_data = la_crime_data.drop(la_crime_data[la_crime_data['AREA_NAME'].str.lower() == '77th street'].index)

In [175]:
la_crime_data.shape

(121046, 19)

In [176]:
la_crime_data.columns

Index(['DR_NO', 'DATE_RPTD', 'DATE_OCC', 'AREA', 'AREA_NAME', 'RPT_DIST_NO',
       'CRM_CD', 'CRM_CD_DESC', 'MOCODES', 'VICT_AGE', 'VICT_SEX',
       'VICT_DESCENT', 'PREMIS_CD', 'PREMIS_DESC', 'WEAPON_USED_CD',
       'WEAPON_DESC', 'STATUS', 'STATUS_DESC', 'LOCATION'],
      dtype='object')

In [177]:
la_population.shape

(152, 10)

In [178]:
la_population.columns

Index(['AREA_NAME', 'American Indian and Alaska Native', 'Asian',
       'African American', 'Native Hawaiian and Other Pacific Islander',
       'White', 'Some Other Race', 'Two or More Races', 'Hispanic or Latino',
       'Total Population'],
      dtype='object')

In [179]:
print(la_population['AREA_NAME'].duplicated().sum()) 

132


In [180]:
la_population

Unnamed: 0,AREA_NAME,American Indian and Alaska Native,Asian,African American,Native Hawaiian and Other Pacific Islander,White,Some Other Race,Two or More Races,Hispanic or Latino,Total Population
0,Central,44777,463960,328445,5627,1092687,967895,607324,1822163,5332878
1,Southwest,129,394,783,63,663,2673,944,4783,10432
2,West Valley,452,4430,181,46,2958,12733,7266,25861,53927
3,Central,121,1919,2293,0,744,5048,1755,8195,20075
4,Central,68,32,1169,0,122,929,95,1508,3923
...,...,...,...,...,...,...,...,...,...,...
147,Wilshire,9,109,278,0,831,308,576,407,2518
148,Wilshire,0,243,717,0,937,459,263,672,3291
149,Central,0,1246,109,0,794,100,481,362,3092
150,West Valley,694,13114,2999,46,23594,20575,9824,31335,102181


In [181]:

la_population = la_population.groupby('AREA_NAME').sum().reset_index()

In [182]:
la_population.shape

(20, 10)

In [183]:
population_crime_df = pd.merge(la_crime_data, la_population, on="AREA_NAME", how="left")

In [184]:
display(population_crime_df)

Unnamed: 0,DR_NO,DATE_RPTD,DATE_OCC,AREA,AREA_NAME,RPT_DIST_NO,CRM_CD,CRM_CD_DESC,MOCODES,VICT_AGE,...,LOCATION,American Indian and Alaska Native,Asian,African American,Native Hawaiian and Other Pacific Islander,White,Some Other Race,Two or More Races,Hispanic or Latino,Total Population
0,250504239,2025-03-13,2025-03-13,5,Harbor,516,510,VEHICLE - STOLEN,Unknown,X,...,1300 N FRIES AV,1689,19777,14606,669,35988,42779,35622,123570,274700
1,250504238,2025-03-13,2025-03-13,5,Harbor,569,440,THEFT PLAIN - PETTY ($950 & UNDER),Unknown,X,...,BERTH 73,1689,19777,14606,669,35988,42779,35622,123570,274700
2,250304275,2025-03-13,2025-03-13,3,Southwest,338,510,VEHICLE - STOLEN,Unknown,X,...,3100 MCCLINTOCK AV,8949,13176,135099,755,38986,212898,103916,371954,885733
3,251000503,2025-03-13,2025-03-13,10,West Valley,1035,624,BATTERY - SIMPLE ASSAULT,0913 1815 1251 0400 0416,16,...,18200 KITTRIDGE ST,4482,68900,30270,729,285533,128171,103957,250663,872705
4,251700501,2025-03-13,2025-03-13,17,Devonshire,1717,930,CRIMINAL THREATS - NO WEAPON DISPLAYED,0340 0359 1822 1259,16,...,11200 GOTHIC AV,1938,41598,10827,321,80569,28667,26983,60021,250924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121041,241304040,2024-01-01,2024-01-01,13,Newton,1385,626,INTIMATE PARTNER - SIMPLE ASSAULT,0400 0913 1813 2000,43,...,400 E 60TH ST,36,797,3271,0,673,1456,505,1710,8448
121042,240709471,2024-01-01,2024-01-01,7,Wilshire,721,815,SEXUAL PENETRATION W/FOREIGN OBJECT,1822 0515,69,...,8700 W BEVERLY BL,1669,32237,13757,117,34523,31395,16778,51830,182306
121043,241104022,2024-01-01,2024-01-01,11,Northeast,1184,230,"ASSAULT WITH DEADLY WEAPON, AGGRAVATED ASSAULT",1234 1822 0408 0429 0416 0444,43,...,1400 W SUNSET BL,3766,26109,2644,33,40412,36598,29174,73905,212641
121044,240504085,2024-01-01,2024-01-01,5,Harbor,569,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",0329 0311,X,...,2500 SIGNAL ST,1689,19777,14606,669,35988,42779,35622,123570,274700


#AREA_NAMES repeat due to the multiple neighborhoods within the same division. Need to leave them as be to count all of the instances a particular crime that is committed against a certain demographic within each division, and compare the amount and type of crimes that occured within the first 3 months of 2024 and 2025. Wanted to start the integration of my 2 datasets for the proposal and further analysis will be completed later on.

In [185]:
population_crime_df.shape

(121046, 28)

In [186]:
merged_dataframe = population_crime_df.to_csv('crime_population_merged.csv', index=False)