In [53]:
# Dependencies and Setup
import pandas as pd
import numpy as np
import requests
from pprint import pprint
from statistics import mean
from sodapy import Socrata
import os

# Import API key
from api_keys import LA_API_key, app_token, password

# Define endpoint, API key, headers, and parameters

token = app_token

client = Socrata("data.lacity.org", token, username=username, password = password)

# Set limit to the number of rows to get all data

results = client.get("63jg-8b9z", limit = 2049807)

# Display results

results[:5]


[{'dr_no': '001307355',
  'date_rptd': '2010-02-20T00:00:00.000',
  'date_occ': '2010-02-20T00:00:00.000',
  'time_occ': '1350',
  'area': '13',
  'area_name': 'Newton',
  'rpt_dist_no': '1385',
  'part_1_2': '2',
  'crm_cd': '900',
  'crm_cd_desc': 'VIOLATION OF COURT ORDER',
  'mocodes': '0913 1814 2000',
  'vict_age': '48',
  'vict_sex': 'M',
  'vict_descent': 'H',
  'premis_cd': '501',
  'premis_desc': 'SINGLE FAMILY DWELLING',
  'status': 'AA',
  'status_desc': 'Adult Arrest',
  'crm_cd_1': '900',
  'location': '300 E  GAGE                         AV',
  'lat': '33.9825',
  'lon': '-118.2695'},
 {'dr_no': '011401303',
  'date_rptd': '2010-09-13T00:00:00.000',
  'date_occ': '2010-09-12T00:00:00.000',
  'time_occ': '0045',
  'area': '14',
  'area_name': 'Pacific',
  'rpt_dist_no': '1485',
  'part_1_2': '2',
  'crm_cd': '740',
  'crm_cd_desc': 'VANDALISM - FELONY ($400 & OVER, ALL CHURCH VANDALISMS)',
  'mocodes': '0329',
  'vict_age': '0',
  'vict_sex': 'M',
  'vict_descent': 'W',
 

In [54]:
# Check to see if all rows were collected

len(results)

2049807

In [55]:
# Convert to Data Frame

crime_df = pd.DataFrame(results)

# Display Data Frame

crime_df.head()

Unnamed: 0,area,area_name,crm_cd,crm_cd_1,crm_cd_2,crm_cd_3,crm_cd_4,crm_cd_desc,cross_street,date_occ,...,premis_desc,rpt_dist_no,status,status_desc,time_occ,vict_age,vict_descent,vict_sex,weapon_desc,weapon_used_cd
0,13,Newton,900,900,,,,VIOLATION OF COURT ORDER,,2010-02-20T00:00:00.000,...,SINGLE FAMILY DWELLING,1385,AA,Adult Arrest,1350,48,H,M,,
1,14,Pacific,740,740,,,,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",MANCHESTER AV,2010-09-12T00:00:00.000,...,STREET,1485,IC,Invest Cont,45,0,W,M,,
2,13,Newton,946,946,,,,OTHER MISCELLANEOUS CRIME,,2010-08-09T00:00:00.000,...,ALLEY,1324,IC,Invest Cont,1515,0,H,M,,
3,6,Hollywood,900,900,998.0,,,VIOLATION OF COURT ORDER,HOLLYWOOD BL,2010-01-05T00:00:00.000,...,STREET,646,IC,Invest Cont,150,47,W,F,HAND GUN,102.0
4,1,Central,122,122,,,,"RAPE, ATTEMPTED",SAN PEDRO ST,2010-01-02T00:00:00.000,...,ALLEY,176,IC,Invest Cont,2100,47,H,F,"STRONG-ARM (HANDS, FIST, FEET OR BODILY FORCE)",400.0


In [60]:
# Drop unnecessary columns

df = crime_df.drop(columns = ["mocodes", "part_1_2", "area", "crm_cd", "premis_cd", "crm_cd_1", "crm_cd_2", "crm_cd_3", "crm_cd_4", "status", "vict_age", "vict_descent", "vict_sex", "weapon_desc", "weapon_used_cd", "cross_street"])

# Display Data Frame

df.head()

Unnamed: 0,area_name,crm_cd_desc,date_occ,date_rptd,dr_no,lat,location,lon,premis_desc,rpt_dist_no,status_desc,time_occ
0,Newton,VIOLATION OF COURT ORDER,2010-02-20T00:00:00.000,2010-02-20T00:00:00.000,1307355,33.9825,300 E GAGE AV,-118.2695,SINGLE FAMILY DWELLING,1385,Adult Arrest,1350
1,Pacific,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",2010-09-12T00:00:00.000,2010-09-13T00:00:00.000,11401303,33.9599,SEPULVEDA BL,-118.3962,STREET,1485,Invest Cont,45
2,Newton,OTHER MISCELLANEOUS CRIME,2010-08-09T00:00:00.000,2010-08-09T00:00:00.000,70309629,34.0224,1300 E 21ST ST,-118.2524,ALLEY,1324,Invest Cont,1515
3,Hollywood,VIOLATION OF COURT ORDER,2010-01-05T00:00:00.000,2010-01-05T00:00:00.000,90631215,34.1016,CAHUENGA BL,-118.3295,STREET,646,Invest Cont,150
4,Central,"RAPE, ATTEMPTED",2010-01-02T00:00:00.000,2010-01-03T00:00:00.000,100100501,34.0387,8TH ST,-118.2488,ALLEY,176,Invest Cont,2100


In [61]:
# Get rid of NA data

crime_clean_df = df.dropna()

In [64]:
# Create new column "Year" that has the year only

crime_2018_df["Year"] = crime_clean_df["date_occ"].str[0:4]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [65]:
# Filter Data Frame for 2018 data

la_crime_data_2018_df = crime_2018_df[crime_2018_df["Year"]=="2018"]

# Display Data Frame

la_crime_data_2018_df.head()

Unnamed: 0,area_name,crm_cd_desc,date_occ,date_rptd,dr_no,lat,location,lon,premis_desc,rpt_dist_no,status_desc,time_occ,Year
1615293,Wilshire,"THEFT, PERSON",2018-07-30T00:00:00.000,2018-07-30T00:00:00.000,180714273,34.0653,5TH,-118.3273,SIDEWALK,739,Invest Cont,1840,2018
1616315,Pacific,BIKE - STOLEN,2018-11-15T00:00:00.000,2018-11-15T00:00:00.000,181424227,33.9884,ABBOT KINNEY,-118.4532,STREET,1443,Invest Cont,719,2018
1616625,Hollywood,VEHICLE - STOLEN,2018-06-22T00:00:00.000,2018-07-02T00:00:00.000,180615514,34.0896,WESTERN,-118.3092,STREET,678,Invest Cont,930,2018
1616708,Olympic,VEHICLE - STOLEN,2018-12-18T00:00:00.000,2018-12-19T00:00:00.000,182022256,34.0509,1100 S BERENDO ST,-118.2937,STREET,2056,Invest Cont,1900,2018
1616737,Mission,VEHICLE - STOLEN,2018-03-18T00:00:00.000,2018-03-19T00:00:00.000,181907969,34.2355,NORDHOFF,-118.4569,STREET,1974,Invest Cont,2100,2018


In [67]:
# Define path for CSV file to be saved

path = r"C:/Users/Carolyn/Desktop/projectOne/main/resources"

# Save Data Frame as CSV file

la_crime_data_2018_df.to_csv(os.path.join(path,r'la_crime_data_2018_df.csv'))

In [68]:
# Check that all rows have the same count

la_crime_data_2018_df.count()

area_name      227321
crm_cd_desc    227321
date_occ       227321
date_rptd      227321
dr_no          227321
lat            227321
location       227321
lon            227321
premis_desc    227321
rpt_dist_no    227321
status_desc    227321
time_occ       227321
Year           227321
dtype: int64