In [6]:
# Import Dependencies
import pandas as pd
pd.set_option("display.max_rows", 100)

In [10]:
pd.get_option("display.max_rows")
pd.describe_option("max_rows")

display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 100]


In [8]:
# Reference the file where the CSV is located
crime_csv_path = "Resources/crime_incident_data2017.csv"

# Import the data into a Pandas DataFrame
crime_df = pd.read_csv(crime_csv_path)
crime_df

Unnamed: 0,Address,Case Number,Crime Against,Neighborhood,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
0,,17-X4762181,Person,,1,1/1/96,1/1/96,800,Sex Offenses,1,Rape,,,,,1/26/17,1/1/17
1,,17-X4757824,Property,Centennial,1,1/20/00,1/1/00,1615,Fraud Offenses,1,Identity Theft,,,,,1/20/17,1/1/17
2,200 BLOCK OF SE 78TH AVE,17-900367,Property,Montavilla,1,12/1/03,12/1/03,800,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5207,-122.583,7668150.0,682825.0,1/9/17,1/1/17
3,,17-X4748982,Property,Southwest Hills,1,1/1/10,1/1/10,0,Fraud Offenses,1,Identity Theft,,,,,1/5/17,1/1/17
4,,17-X4748982,Property,Southwest Hills,1,1/1/10,1/1/10,0,Larceny Offenses,1,All Other Larceny,,,,,1/5/17,1/1/17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41027,8800 BLOCK OF NE SANDY BLVD,17-285386,Society,Sumner,1,8/31/17,8/1/17,535,Drug/Narcotic Offenses,1,Drug/Narcotic Violations,45.5554,-122.571,7671560.0,695399.0,8/31/17,8/1/17
41028,9700 BLOCK OF SE STARK ST,17-286082,Society,Hazelwood,1,8/31/17,8/1/17,1619,Prostitution Offenses,1,Prostitution,45.5191,-122.563,7673292.0,682101.0,8/31/17,8/1/17
41029,9700 BLOCK OF SE STARK ST,17-286413,Society,Hazelwood,1,8/31/17,8/1/17,1959,Prostitution Offenses,1,Prostitution,45.5191,-122.563,7673292.0,682101.0,8/31/17,8/1/17
41030,UNKNOWN ADDRESS,17-286445,Society,,1,8/31/17,8/1/17,2021,Prostitution Offenses,1,Prostitution,,,,,8/31/17,8/1/17


In [11]:
# look for missing values
crime_df.count()

Address              37365
Case Number          41032
Crime Against        41032
Neighborhood         39712
Number of Records    41032
Occur Date           41032
Occur Month Year     41032
Occur Time           41032
Offense Category     41032
Offense Count        41032
Offense Type         41032
Open Data Lat        36712
Open Data Lon        36712
Open Data X          36712
Open Data Y          36712
Report Date          41032
Report Month Year    41032
dtype: int64

In [12]:
# drop null rows
no_null_crime_df = crime_df.dropna(how='any')

In [13]:
# verify counts
no_null_crime_df.count()

Address              36146
Case Number          36146
Crime Against        36146
Neighborhood         36146
Number of Records    36146
Occur Date           36146
Occur Month Year     36146
Occur Time           36146
Offense Category     36146
Offense Count        36146
Offense Type         36146
Open Data Lat        36146
Open Data Lon        36146
Open Data X          36146
Open Data Y          36146
Report Date          36146
Report Month Year    36146
dtype: int64

In [15]:
# Check to see if there are any values with mispelled or similar values in "Offense Type"
no_null_crime_df["Offense Type"].value_counts()

Theft From Motor Vehicle                       6947
Motor Vehicle Theft                            4689
All Other Larceny                              4558
Vandalism                                      3863
Burglary                                       2824
Shoplifting                                    2259
Identity Theft                                 1794
Simple Assault                                 1216
Drug/Narcotic Violations                       1095
Theft of Motor Vehicle Parts or Accessories    1073
Intimidation                                    900
Theft From Building                             895
False Pretenses/Swindle/Confidence Game         870
Aggravated Assault                              839
Robbery                                         608
Counterfeiting/Forgery                          448
Weapons Law Violations                          266
Credit Card/ATM Fraud                           226
Arson                                           200
Prostitution

In [16]:
# Combining similar offenses together
no_null_crime_df = no_null_crime_df.replace(
    {"Commercial Sex Acts": "Prostitution", "Assisting or Promoting Prostitution": "Prostitution"})
no_null_crime_df

Unnamed: 0,Address,Case Number,Crime Against,Neighborhood,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
2,200 BLOCK OF SE 78TH AVE,17-900367,Property,Montavilla,1,12/1/03,12/1/03,800,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5207,-122.583,7668150.0,682825.0,1/9/17,1/1/17
5,5400 BLOCK OF NE MALLORY AVE,17-900129,Property,King,1,11/28/10,11/1/10,1612,Fraud Offenses,1,Identity Theft,45.5625,-122.664,7647987.0,698581.0,1/3/17,1/1/17
6,5000 BLOCK OF NE 19TH AVE,17-901079,Property,Vernon,1,11/8/13,11/1/13,1200,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
7,5000 BLOCK OF NE 19TH AVE,17-901079,Property,Vernon,1,11/8/13,11/1/13,1200,Fraud Offenses,1,Identity Theft,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
8,12000 BLOCK OF SE PINE ST,17-900253,Property,Hazelwood,1,1/6/14,1/1/14,805,Fraud Offenses,1,Credit Card/ATM Fraud,45.5204,-122.539,7679522.0,682404.0,1/6/17,1/1/17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41026,10200 BLOCK OF NE SANDY BLVD,17-286419,Society,Parkrose,1,8/31/17,8/1/17,2005,Drug/Narcotic Offenses,1,Drug/Narcotic Violations,45.5591,-122.557,7675224.0,696649.0,8/31/17,8/1/17
41027,8800 BLOCK OF NE SANDY BLVD,17-285386,Society,Sumner,1,8/31/17,8/1/17,535,Drug/Narcotic Offenses,1,Drug/Narcotic Violations,45.5554,-122.571,7671560.0,695399.0,8/31/17,8/1/17
41028,9700 BLOCK OF SE STARK ST,17-286082,Society,Hazelwood,1,8/31/17,8/1/17,1619,Prostitution Offenses,1,Prostitution,45.5191,-122.563,7673292.0,682101.0,8/31/17,8/1/17
41029,9700 BLOCK OF SE STARK ST,17-286413,Society,Hazelwood,1,8/31/17,8/1/17,1959,Prostitution Offenses,1,Prostitution,45.5191,-122.563,7673292.0,682101.0,8/31/17,8/1/17


In [17]:
# Check to see if you combined similar offenses correctly in "Offense Type".
no_null_crime_df["Offense Type"].value_counts()

Theft From Motor Vehicle                       6947
Motor Vehicle Theft                            4689
All Other Larceny                              4558
Vandalism                                      3863
Burglary                                       2824
Shoplifting                                    2259
Identity Theft                                 1794
Simple Assault                                 1216
Drug/Narcotic Violations                       1095
Theft of Motor Vehicle Parts or Accessories    1073
Intimidation                                    900
Theft From Building                             895
False Pretenses/Swindle/Confidence Game         870
Aggravated Assault                              839
Robbery                                         608
Counterfeiting/Forgery                          448
Weapons Law Violations                          266
Credit Card/ATM Fraud                           226
Arson                                           200
Prostitution

In [18]:
# Create a new DataFrame that looks into a specific neighborhood
vernon_crime_df = no_null_crime_df.loc[no_null_crime_df["Neighborhood"] == "Vernon"]
vernon_crime_df

Unnamed: 0,Address,Case Number,Crime Against,Neighborhood,Number of Records,Occur Date,Occur Month Year,Occur Time,Offense Category,Offense Count,Offense Type,Open Data Lat,Open Data Lon,Open Data X,Open Data Y,Report Date,Report Month Year
6,5000 BLOCK OF NE 19TH AVE,17-901079,Property,Vernon,1,11/8/13,11/1/13,1200,Fraud Offenses,1,False Pretenses/Swindle/Confidence Game,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
7,5000 BLOCK OF NE 19TH AVE,17-901079,Property,Vernon,1,11/8/13,11/1/13,1200,Fraud Offenses,1,Identity Theft,45.5594,-122.646,7652567.0,697337.0,1/26/17,1/1/17
147,1000 BLOCK OF NE EMERSON ST,17-901190,Property,Vernon,1,11/26/16,11/1/16,2040,Fraud Offenses,1,Identity Theft,45.5619,-122.655,7650320.0,698297.0,1/29/17,1/1/17
148,1000 BLOCK OF NE EMERSON ST,17-901190,Property,Vernon,1,11/26/16,11/1/16,2040,Larceny Offenses,1,All Other Larceny,45.5619,-122.655,7650320.0,698297.0,1/29/17,1/1/17
271,5300 BLOCK OF NE 14TH PL,17-2593,Property,Vernon,1,12/19/16,12/1/16,900,Larceny Offenses,1,All Other Larceny,45.5618,-122.651,7651314.0,698264.0,1/3/17,1/1/17
572,5400 BLOCK OF NE 13TH AVE,17-900012,Property,Vernon,1,1/1/17,1/1/17,725,Larceny Offenses,1,Theft From Motor Vehicle,45.5625,-122.652,7650993.0,698515.0,1/1/17,1/1/17
742,5700 BLOCK OF NE 12TH AVE,17-2395,Property,Vernon,1,1/2/17,1/1/17,2000,Vandalism,1,Vandalism,45.5643,-122.653,7650716.0,699162.0,1/3/17,1/1/17
862,5100 BLOCK OF NE 17TH AVE,17-900165,Property,Vernon,1,1/3/17,1/1/17,2230,Larceny Offenses,1,Theft From Motor Vehicle,45.56,-122.648,7652075.0,697582.0,1/4/17,1/1/17
1243,1800 BLOCK OF NE ALBERTA ST,17-900302,Property,Vernon,1,1/6/17,1/1/17,1300,Larceny Offenses,1,Theft From Building,45.559,-122.646,7652422.0,697213.0,1/7/17,1/1/17
1531,1500 BLOCK OF NE KILLINGSWORTH ST,17-8822,Property,Vernon,1,1/9/17,1/1/17,2345,Larceny Offenses,1,Shoplifting,45.5627,-122.649,7651670.0,698560.0,1/9/17,1/1/17
