# Import Dependencies

In [2]:
import pandas as pd
from sodapy import Socrata
import os
import re
import requests
import json
import folium
from folium import plugins
from folium.plugins import HeatMap
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud, STOPWORDS
import numpy as np
from scipy import stats
import statsmodels.api as sm
import credentials

pd.set_option("display.max_columns", None)

# Datasets

## Residential Permit Dataset

In [None]:
#authenticate client
client = Socrata('data.montgomerycountymd.gov',credentials.data_moco_token,
                 credentials.data_moco_username,credentials.data_moco_password)

#API Call for all residential permits
Res_permits = client.get_all("m88u-pqki")

#convert response to dataframe
Res_permits_df = pd.DataFrame.from_records(Res_permits)

In [None]:
Res_permits_df.columns

In [None]:
len(set(Res_permits_df.city))

## Clean Residential Permit Dataset

In [None]:
filtered_Res_permits_df['addeddate'] = pd.to_datetime(filtered_Res_permits_df['addeddate'])
filtered_Res_permits_df['issueddate'] = pd.to_datetime(filtered_Res_permits_df['issueddate'])
filtered_Res_permits_df['finaleddate'] = pd.to_datetime(filtered_Res_permits_df['finaleddate'])

## Crime Dataset

In [None]:
#authenticate client
client = Socrata('data.montgomerycountymd.gov',credentials.data_moco_token,
                 credentials.data_moco_username,credentials.data_moco_password)

#API Call for all reported crimes
reported_crime = client.get_all("icn6-v9z3")

#convert response to dataframe
crime_df = pd.DataFrame.from_records(reported_crime)

In [None]:
crime_df.head()

## Clean Crime Dataset

In [None]:
#change column type from string to datetime
crime_df['date'] = pd.to_datetime(crime_df['date'])

#drop duplicate cases
crime_df_2 = crime_df.drop(['geolocation'],axis='columns', inplace = False)
crime_df_no_dups = crime_df_2.drop_duplicates(inplace=False)

#drop undesired columns
trimmed_crime_df = crime_df_no_dups.drop([':@computed_region_tx5f_5em3', ':@computed_region_kbsp_ykn9',
       ':@computed_region_d7bw_bq6x', ':@computed_region_rbt8_3x7n',
       ':@computed_region_a9cs_3ed7', ':@computed_region_r648_kzwt',
       ':@computed_region_d9ke_fpxt', ':@computed_region_vu5j_pcmz', 'sector',
       'beat', 'pra', 'agency', 'case_number', 'incident_id', 'nibrs_code',
                                 'offence_code','start_date','end_date','police_district_number'
                                         ],axis='columns', inplace = False)
#create clean dataframe
clean_crime_df = trimmed_crime_df.copy()

In [None]:
clean_crime_df.head()

## Food Inspection Dataset

In [None]:
#authenticate client
client = Socrata('data.montgomerycountymd.gov',credentials.data_moco_token,
                 credentials.data_moco_username,credentials.data_moco_password)

#API Call for all food inspections
food_inspection = client.get_all("5pue-gfbe")

#convert response to dataframe
food_df = pd.DataFrame.from_records(food_inspection)

In [None]:
food_df.head()

## Clean Food Inspection Dataset

In [None]:
#drop undesired columns
trimmed_food_df = food_df.drop([ ':@computed_region_tx5f_5em3', ':@computed_region_kbsp_ykn9',
       ':@computed_region_d7bw_bq6x', ':@computed_region_rbt8_3x7n',
       ':@computed_region_d9ke_fpxt',':@computed_region_vu5j_pcmz','violation1', 'violation2', 'violation3',
       'violation4', 'violation5', 'violation6a', 'violation6b', 'violation7a',
       'violation7b', 'violation8', 'violation9', 'violation20', 'violation22',
       'violationmenu', 'violationtransfat', 'violationsmoking','inspectiontype',
                                'inspectionresults','inspectiondate', 'location'],axis='columns', inplace = False)

#list for establishment categories of interest
store_categories = ['Restaurant','Market', 'Farmers Market', 'Carry Out']

#filter dataframe for desired location categories and create new dataframe
filtered_food_df = trimmed_food_df.loc[trimmed_food_df.category.isin(store_categories)]

#drop duplicate stores to get a list of unique establishments
clean_food_df = filtered_food_df.drop_duplicates(inplace=False)

In [None]:
clean_food_df.head()

## SDAT Dataset

In [None]:
#create a list of desired fields
fields = ['premise_address_number_mdp_field_premsnum_sdat_field_20','premise_address_name_mdp_field_premsnam_sdat_field_23',
          'premise_address_type_mdp_field_premstyp_sdat_field_24','premise_address_city_mdp_field_premcity_sdat_field_25',
          'premise_address_zip_code_mdp_field_premzip_sdat_field_26',
          'sales_segment_1_transfer_date_yyyy_mm_dd_mdp_field_tradate_sdat_field_89',
          'sales_segment_1_consideration_mdp_field_considr1_sdat_field_90',
          'land_use_code_mdp_field_lu_desclu_sdat_field_50']

#convert list of desired fields into a string to be set as an arguement in API Call
select_statement = ','.join(fields)

#authenticate client 
client = Socrata("opendata.maryland.gov",credentials.data_moco_token,
                 credentials.data_moco_username,credentials.data_moco_password)

#API Call for all properties in Montgomery County
sdat_results = client.get_all("ed4q-f8tm", where='county_name_mdp_field_cntyname = "Montgomery County"',
                               select = select_statement)
                            

# Convert to pandas DataFrame
sdat_df = pd.DataFrame.from_records(sdat_results)

## Clean SDAT Data

In [None]:
#Rename columns
sdat_df= sdat_df.rename(columns={"premise_address_number_mdp_field_premsnum_sdat_field_20":"stno",
                     'premise_address_name_mdp_field_premsnam_sdat_field_23':"stname",
                     'premise_address_type_mdp_field_premstyp_sdat_field_24':"suffix",
                     'premise_address_city_mdp_field_premcity_sdat_field_25': 'city', 
                     'premise_address_zip_code_mdp_field_premzip_sdat_field_26': 'zip',
                     'sales_segment_1_transfer_date_yyyy_mm_dd_mdp_field_tradate_sdat_field_89': 'sales_date',
                     'sales_segment_1_consideration_mdp_field_considr1_sdat_field_90': 'sales_price',
                     'land_use_code_mdp_field_lu_desclu_sdat_field_50': 'land_use_code'})

#drop duplicates
sdat_df_no_dups = sdat_df.drop_duplicates(inplace=False)

#change date from '0000.00.00' to '1800.01.01' for processing purposes 
sdat_df_adjust = sdat_df_no_dups.replace('0000.00.00','1800.01.01')

#create dict to fill null values in sales price column
values = {"sales_price": 0}

#fill null values in sales price column
sdat_df_adjust_2 = sdat_df_adjust.fillna(value=values, inplace=False)

#convert sales date from a string to datetime format
sdat_df_adjust_2['sales_date'] = pd.to_datetime(sdat_df_adjust_2['sales_date'])

#convert street number from string to interger to remove zeros
sdat_df_adjust_2['sales_price'] = sdat_df_adjust_2['sales_price'].astype('float')

#convert street number from string to interger to remove zeros
sdat_df_adjust_2['stno'] = sdat_df_adjust_2['stno'].astype('int')

#convert street number from interger to string after leading zeros were removed
sdat_df_adjust_2['stno'] = sdat_df_adjust_2['stno'].astype('str')

#filtered_sdat_df = filtered_sdat_df.loc[filtered_sdat_df.stno !='0']

#filter for properties sold after 1/1/2016
filtered_sdat_df = sdat_df_adjust_2.loc[sdat_df_adjust_2['sales_date'] >= datetime.datetime(2016, 1, 1)]

#filter for only residential properties
filtered_sdat_df_2 = filtered_sdat_df.loc[filtered_sdat_df.sales_price > 0]

#filter for only residential properties
residential_sdat_df = filtered_sdat_df_2.loc[filtered_sdat_df.land_use_code =='Residential (R)']

In [None]:
residential_sdat_df.head()

## Great Schools Dataset

## Census Dataset

In [3]:
#ACS5 median income by state and place
url = f"https://api.census.gov/data/2020/acs/acs5?get=NAME,B19013_001E&for=place:*&in=state:24&key={credentials.census_key}"

#API Call 
response = requests.request("GET", url)

#convert json to list
result = json.loads(response.text)

#create dataframe from list of results
df = pd.DataFrame(result, columns =['place_name', 'median_income', 'state', 'place_code'])
income_df = df.iloc [1:]

## Clean Census Dataset

In [4]:
def clean_place_name(dataframe):
    output = [] 
    for i,row in dataframe.iterrows():
        clean_place= re.sub('\sCDP', '',  row.place_name)
        output.append(clean_place)
    return output

In [5]:
income_df['clean_place_name'] = clean_place_name(income_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  income_df['clean_place_name'] = clean_place_name(income_df)


In [6]:
income_df

Unnamed: 0,place_name,median_income,state,place_code,clean_place_name
1,"Rising Sun town, Maryland",72021,24,66275,"Rising Sun town, Maryland"
2,"Riva CDP, Maryland",126792,24,66400,"Riva, Maryland"
3,"Riverdale Park town, Maryland",84695,24,66635,"Riverdale Park town, Maryland"
4,"Riverside CDP, Maryland",79620,24,66762,"Riverside, Maryland"
5,"Riviera Beach CDP, Maryland",94773,24,66850,"Riviera Beach, Maryland"
...,...,...,...,...,...
532,"Rawlings CDP, Maryland",-666666666,24,65150,"Rawlings, Maryland"
533,"Redland CDP, Maryland",123005,24,65312,"Redland, Maryland"
534,"Reid CDP, Maryland",-666666666,24,65550,"Reid, Maryland"
535,"Reisterstown CDP, Maryland",71851,24,65600,"Reisterstown, Maryland"


In [None]:
t = income_df.loc[income_df.place_name == 'Fairland CDP, Maryland']

for i,row in t.iterrows():
    #pattern = re.compile('^Maryland')
    clean_address = re.sub('\sCDP,\sMaryland', '',  row.place_name)
    print(clean_address)

In [None]:
from credentials import google_key

In [9]:
address = 'Ridgely town, Maryland'
api_response = requests.get('https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}'.format(address, credentials.google_key))
api_response_dict = api_response.json()

In [8]:
api_response_dict

{'results': [{'address_components': [{'long_name': 'Reisterstown',
     'short_name': 'Reisterstown',
     'types': ['locality', 'political']},
    {'long_name': 'Baltimore County',
     'short_name': 'Baltimore County',
     'types': ['administrative_area_level_2', 'political']},
    {'long_name': 'Maryland',
     'short_name': 'MD',
     'types': ['administrative_area_level_1', 'political']},
    {'long_name': 'United States',
     'short_name': 'US',
     'types': ['country', 'political']}],
   'formatted_address': 'Reisterstown, MD, USA',
   'geometry': {'bounds': {'northeast': {'lat': 39.4842559,
      'lng': -76.78330509999999},
     'southwest': {'lat': 39.4343069, 'lng': -76.839055}},
    'location': {'lat': 39.4695489, 'lng': -76.82942129999999},
    'location_type': 'APPROXIMATE',
    'viewport': {'northeast': {'lat': 39.4842559, 'lng': -76.78330509999999},
     'southwest': {'lat': 39.4343069, 'lng': -76.839055}}},
   'place_id': 'ChIJGywN1xAWyIkRIEJyjFUZEgc',
   'types': ['

## Vacancy Dataset

In [None]:
#maybe pop density

## Zillow Dataset

In [None]:
#Function to call API for property types
def zillow_api(home_type):
    """Acquire Zillow data from API for properties on the market by home type"""
    url = "https://zillow-com1.p.rapidapi.com/propertyExtendedSearch"

    querystring = {"location":"montgomery county, md","page":"1","status_type":"ForSale","home_type":home_type,
                   "sort":"Newest","maxPrice":"587000"}

    headers = {
        "X-RapidAPI-Host": "zillow-com1.p.rapidapi.com",
        "X-RapidAPI-Key": credentials.rapid_key}

    #API Call 
    response = requests.request("GET", url, headers=headers, params=querystring)

    #convert response to dictionary
    result = json.loads(response.text)

    #convert dictionary to dataframe
    zillow_df = pd.DataFrame.from_dict(result.get('props')) 

    if result.get('totalPages', 0) > 1:
        count = 2
        for x in range(result.get('totalPages')-1):
            if x == 0:
                querystring['page'] = x + count 
                response = requests.request("GET", url, headers=headers, params=querystring)
                result = json.loads(response.text)
                zillow_temp_df = pd.DataFrame.from_dict(result.get('props')) 
                full_zillow_df = pd.concat([zillow_df, zillow_temp_df])
            else:
                querystring['page'] = x + count 
                response = requests.request("GET", url, headers=headers, params=querystring)
                result = json.loads(response.text)
                zillow_temp_df = pd.DataFrame.from_dict(result.get('props')) 
                full_zillow_df = pd.concat([full_zillow_df, zillow_temp_df]) 
        return full_zillow_df
    else:
        return zillow_df

In [None]:
#upload properties for sale from zillow api
zillow_house = zillow_api('Houses')
zillow_townhomes = zillow_api('Townhomes')
zillow_multi_family = zillow_api('Multi-family')
zillow_condos = zillow_api('Condos')

#merge the tables of different property types to form one table
complete_zillow_df = pd.concat([zillow_house,zillow_townhomes, zillow_multi_family , zillow_condos]) 

## Clean Zillow Dataset

In [None]:
#convert the address column to lowercase
complete_zillow_df['address'] = complete_zillow_df['address'].str.lower()

#drop rows with missing longitude
complete_zillow_df.dropna(subset=['longitude'], inplace = True)

# Maps

## Sources

- https://www.census.gov/data/developers/data-sets/acs-5year.html
- https://www.data.montgomerycountymd.gov 