In [1]:
import pandas as pd

In [2]:
# Need to be able to see all columns
pd.set_option('display.max_columns', 999)

In [3]:
police = pd.read_csv("./data/police_sta.csv").add_prefix("police_")
fire = pd.read_csv("./data/fire_dept.csv").add_prefix("fire_")
community = pd.read_csv("./data/community_centers.csv").add_prefix("community_")
parks = pd.read_csv("./data/parks.csv").add_prefix("parks_")
crime = pd.read_csv("./data/crime.csv", low_memory=False).add_prefix("crime_")

crime.name = "crime"
police.name = "police"
fire.name = "fire"
community.name = "community"
parks.name = "parks"

In [4]:
humidity = pd.read_csv("./data/weather/humidity.csv")
temp = pd.read_csv("./data/weather/temperature.csv")
pressure = pd.read_csv("./data/weather/pressure.csv")
weather_descript = pd.read_csv("./data/weather/weather_description.csv")
wind_dir = pd.read_csv("./data/weather/wind_direction.csv")
wind_speed = pd.read_csv("./data/weather/wind_speed.csv")

humidity.name = "humidity"
temp.name = "temperature"
pressure.name = "pressure"
weather_descript.name = "weather_description"
wind_dir.name = "wind_direction"
wind_speed.name = "wind_speed"

In [5]:
datasets = [crime, police, fire, community, parks, humidity, temp, pressure, weather_descript, wind_dir, wind_speed]

In [6]:
for df in datasets:
    # Some of the datasets have Lat/Long as Y/X, which can be confusing
    # They also have longcoord and latcoord, but they're unsigned, so dropping them
    # Regex is to avoid partial matching
    df.columns = df.columns.str.replace("^[X]", "long", regex=True)
    df.columns = df.columns.str.replace("^[Y]", "lat", regex=True)
    df.drop(list(df.filter(regex="coord")), axis = 1, inplace=True)
    
    # Makes typing easier
    df.columns = [x.lower() for x in df.columns]

In [7]:
# Renaming for consistency
crime = crime.rename({"crime_occurred_on_date":"datetime"}, axis=1)