In [1]:
#Import dependencies
import pandas as pd
import os

In [2]:
#Read raw population date into dataframe
arrests_csv = os.path.join("Resources","arrests-by-state-2017.csv")
arrests_raw = pd.read_csv(arrests_csv)

In [3]:
arrests_raw.head()

Unnamed: 0,State,Age Range,Total\nall \nclasses1,Violent\ncrime2,Property\ncrime2,Murder and\nnonnegligent\nmanslaughter,Rape3,Robbery,Aggravated\nassault,Burglary,...,Driving\nunder the\ninfluence,Liquor\nlaws,Drunkenness4,Disorderly\nconduct,Vagrancy,All other\noffenses\n(except\ntraffic),Suspicion,Curfew\nand\nloitering\nlaw\nviolations,Number\nof\nagencies,2017\nestimated \npopulation
0,ALABAMA,Under 18,4707,365,1264,18,34,116,197,323,...,42,212,34,338,0,1053,0,0,,
1,ALABAMA,Total all ages,153285,6005,18366,317,344,1140,4204,3223,...,7194,2064,6165,2018,56,71853,0,0,223.0,3737635.0
2,ALASKA,Under 18,1570,164,459,1,26,20,117,117,...,23,80,10,14,0,172,0,0,,
3,ALASKA,Total all ages,29152,2374,3695,46,126,289,1913,623,...,3102,653,43,757,2,10737,0,0,32.0,736205.0
4,ARIZONA,Under 18,22613,1228,4528,30,52,450,696,761,...,198,1461,78,1424,57,3062,3,1126,,


In [4]:
arrests_raw["State"].unique()

array(['ALABAMA  ', 'ALASKA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA ',
       'COLORADO', 'CONNECTICUT', 'DELAWARE', 'DISTRICT OF COLUMBIA',
       'FLORIDA', 'GEORGIA', 'HAWAII', 'IDAHO', 'ILLINOIS6', 'INDIANA',
       'IOWA', 'KANSAS', 'KENTUCKY', 'LOUISIANA', 'MAINE', 'MARYLAND',
       'MASSACHUSETTS', 'MICHIGAN', 'MINNESOTA', 'MISSISSIPPI',
       'MISSOURI', 'MONTANA', 'NEBRASKA', 'NEVADA', 'NEW HAMPSHIRE',
       'NEW JERSEY', 'NEW MEXICO', 'NEW YORK', 'NORTH CAROLINA',
       'NORTH DAKOTA', 'OHIO', 'OKLAHOMA', 'OREGON', 'PENNSYLVANIA',
       'RHODE ISLAND', 'SOUTH CAROLINA', 'SOUTH DAKOTA', 'TENNESSEE',
       'TEXAS', 'UTAH', 'VERMONT', 'VIRGINIA', 'WASHINGTON',
       'WEST VIRGINIA', 'WISCONSIN', 'WYOMING'], dtype=object)

In [5]:
#Choose and rename columns to include in dataframe
arrests_2017 = arrests_raw[["State", "Age Range", "Drug \nabuse\nviolations"]]
arrests_2017 = arrests_2017.rename(columns={"Drug \nabuse\nviolations":"Drug Violations"})
arrests_2017["State"] = arrests_2017["State"].replace({"ILLINOIS6": "ILLINOIS","ALABAMA  ":"ALABAMA", "CALIFORNIA ":"CALIFORNIA"})

In [6]:
#Groupby State Name and create state arrests dataframe
state_drug_arrests_group = arrests_2017.groupby(["State"])["Drug Violations"].sum()

state_drug_arrests = pd.DataFrame({"Drug Violations": state_drug_arrests_group})
state_drug_arrests.reset_index()


Unnamed: 0,State,Drug Violations
0,ALABAMA,11463
1,ALASKA,1184
2,ARIZONA,36326
3,ARKANSAS,18391
4,CALIFORNIA,215806
5,COLORADO,19241
6,CONNECTICUT,9759
7,DELAWARE,4350
8,DISTRICT OF COLUMBIA,254
9,FLORIDA,129984


In [7]:
#Check total drug arrest count
total_drug_arrests = state_drug_arrests.sum(axis = 0, skipna = True) 
total_drug_arrests

Drug Violations    1479595
dtype: int64

In [8]:
# Group by regions
region1_states = (["CONNECTICUT", "MAINE", "MASSACHUSETTS", "NEW HAMPSHIRE", 
                   "NEW JERSEY", "NEW YORK", "PENNSYLVANIA", "RHODE ISLAND", "VERMONT"])
region_1 = []
for a in region1_states:
    region_1.append(state_drug_arrests.loc[a, "Drug Violations"])

print(region_1)

[9759, 3759, 10006, 8163, 65524, 73248, 66830, 1903, 1186]


In [9]:
region2_states = (["ILLINOIS", "INDIANA", "IOWA", "KANSAS", "MICHIGAN", "MINNESOTA", 
                   "MISSOURI", "NEBRASKA", "NORTH DAKOTA", "OHIO", "SOUTH DAKOTA"])
region_2 = []
for b in region2_states:
    region_2.append(state_drug_arrests.loc[b, "Drug Violations"])
print(region_2)

[11760, 27755, 10547, 10295, 35216, 21174, 42245, 930, 6122, 38995, 9879]


In [11]:
region3_states = (["ALABAMA","ARKANSAS", "DELAWARE", "DISTRICT OF COLUMBIA", "FLORIDA",
                   "GEORGIA", "KENTUCKY", "LOUISIANA", "MARYLAND", "MISSISSIPPI",
                   "NORTH CAROLINA", "OKLAHOMA", "SOUTH CAROLINA", "TENNESSEE", "TEXAS"])
region_3 = []
for c in region3_states:
    region_3.append(state_drug_arrests.loc[c, "Drug Violations"])
print(region_3)

[11463, 18391, 4350, 254, 129984, 42735, 26930, 27131, 30940, 10553, 27421, 21304, 34356, 50417, 146793]


In [12]:
region4_states = (["ALASKA", "ARIZONA", "CALIFORNIA", "COLORADO", "HAWAII", "IDAHO",
                   "MONTANA", "NEVADA", "NEW MEXICO", "OREGON", "UTAH"])
region_4 = []
for d in region4_states:
    region_4.append(state_drug_arrests.loc[d, "Drug Violations"])
print(region_4)

[1184, 36326, 215806, 19241, 3160, 9401, 3257, 9664, 2476, 17478, 19252]


In [18]:
region_1_total = sum(region_1)
region_2_total = sum(region_2)
region_3_total = sum(region_3)
region_4_total = sum(region_4)

region_drug_data = {"Region": [1,2,3,4],
                    "2017 Arrests": [region_1_total,region_2_total,region_3_total,region_4_total]}
region_drug_arrests = pd.DataFrame(region_drug_data)
region_drug_arrests.head()

Unnamed: 0,Region,2017 Arrests
0,1,240378
1,2,214918
2,3,583022
3,4,337245


In [20]:
#Save State Drug Arrests as CSV
state_drug_arrests.to_csv(r'Resources/drug-arrests_state.csv')

In [21]:
#Save Region Drug Arrests as CSV
region_drug_arrests.to_csv(r'Resources/drug-arrests_region.csv')