In [1]:
# Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load Data
file = "../data/FBI - Homicide Reported by Population.csv"

homicide_df = pd.read_csv(file)

homicide_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Columns: 155 entries, series to 10-2025
dtypes: int64(154), object(1)
memory usage: 2.6+ KB


In [3]:
# Melt the Data
id_column = "series"
value_columns = homicide_df.columns.drop(id_column).tolist()

homicide_clean_df = pd.melt(
    homicide_df,
    id_vars=[id_column],
    value_vars=value_columns,
    var_name="Date",
    value_name="Value"
)

homicide_clean_df["Date"] = pd.to_datetime(homicide_clean_df["Date"], 
                                           format="%m-%Y")

homicide_clean_df["Value"] = pd.to_numeric(homicide_clean_df["Value"])

homicide_clean_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   series  308 non-null    object        
 1   Date    308 non-null    datetime64[ns]
 2   Value   308 non-null    int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 7.3+ KB


In [4]:
# Filter Data
homicide_clean_df = homicide_clean_df[homicide_clean_df["series"]== "United States"]

In [5]:
# Modify Columns
homicide_clean_df.columns = homicide_clean_df.columns.str.strip().str.lower()

In [6]:
# Drop Column
columns_to_drop = ["series"]

homicide_clean_df = homicide_clean_df.drop(
    columns_to_drop,
    axis=1)

In [7]:
# Rename Column
columns_to_rename = {"value": "count"}

homicide_clean_df = homicide_clean_df.rename(columns = columns_to_rename)

In [8]:
# Preview Data
homicide_clean_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 154 entries, 0 to 306
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    154 non-null    datetime64[ns]
 1   count   154 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 3.6 KB


In [9]:
# Date Time
homicide_clean_df["date"] = pd.to_datetime(homicide_clean_df["date"])

# Group By
yearly = homicide_clean_df.groupby(
    homicide_clean_df['date'].dt.year)['count'].sum()

homicide_yearly = yearly.reset_index(name="count")

homicide_yearly = homicide_yearly.rename(columns={"date": "year"})

homicide_yearly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   year    13 non-null     int32
 1   count   13 non-null     int64
dtypes: int32(1), int64(1)
memory usage: 288.0 bytes


In [20]:
# Output Data
homicide_yearly.to_csv("../data/exports/cleaned_homicide_data.csv",
                         index=False)

In [11]:
file = "../data/FBI - Robbery Reported by Population.csv"

robbery = pd.read_csv(file)

robbery.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Columns: 122 entries, series to 10-2025
dtypes: int64(121), object(1)
memory usage: 2.0+ KB


In [12]:
id_column = "series"
value_columns = robbery.columns.drop(id_column).tolist()

robbery_clean = pd.melt(
    homicide_df,
    id_vars=[id_column],
    value_vars=value_columns,
    var_name="Date",
    value_name="Value"
)

robbery_clean["Date"] = pd.to_datetime(robbery_clean["Date"], 
                                           format="%m-%Y")

robbery_clean["Value"] = pd.to_numeric(robbery_clean["Value"])

robbery_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 242 entries, 0 to 241
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   series  242 non-null    object        
 1   Date    242 non-null    datetime64[ns]
 2   Value   242 non-null    int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 5.8+ KB


In [13]:
robbery_clean = robbery_clean[robbery_clean["series"]== "United States"]

In [14]:
robbery_clean.columns = robbery_clean.columns.str.strip().str.lower()

In [15]:
columns_to_drop = ["series"]

robbery_clean = robbery_clean.drop(
    columns_to_drop,
    axis=1)

In [16]:
columns_to_rename = {"value": "count"}

robbery_clean = robbery_clean.rename(columns = columns_to_rename)

In [17]:
# Preview Data
robbery_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 121 entries, 0 to 240
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    121 non-null    datetime64[ns]
 1   count   121 non-null    int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 2.8 KB


In [18]:
# Date Time
robbery_clean["date"] = pd.to_datetime(robbery_clean["date"])

# Group By
yearly = robbery_clean.groupby(
    robbery_clean['date'].dt.year)['count'].sum()

robbery_yearly = yearly.reset_index(name="count")

robbery_yearly = robbery_yearly.rename(columns={"date": "year"})

robbery_yearly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11 entries, 0 to 10
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   year    11 non-null     int32
 1   count   11 non-null     int64
dtypes: int32(1), int64(1)
memory usage: 264.0 bytes


In [19]:
robbery_yearly.to_csv("../data/exports/cleaned_robbery_data.csv",
                         index=False)