In [1]:
import pandas as pd
import numpy as np

# Load data
df = pd.read_csv('Data/MPS Stolen Animals Clean.csv')

# View
display(df.head(3))

Unnamed: 0,borough,animal_recovered,year_and_month
0,lambeth,not recovered,jan-2010
1,islington,not recovered,jan-2010
2,croydon,not recovered,jan-2010


#### TARGET - RECOVERY SUCCESS

In [2]:
# - convert recovery outcome to binary: 1 for recovered, 0 otherwise
df['recovery_success'] = (df['animal_recovered'] == 'recovered').astype(int)

# View
display(df.head(3))

Unnamed: 0,borough,animal_recovered,year_and_month,recovery_success
0,lambeth,not recovered,jan-2010,0
1,islington,not recovered,jan-2010,0
2,croydon,not recovered,jan-2010,0


#### DATES

In [3]:
# Convert datetime
df['year_and_month'] = pd.to_datetime(df['year_and_month'], format='%b-%Y')

# Extract Month name
df['month'] = df['year_and_month'].dt.month_name().str.lower()

# Extract Month number
df['month_num'] = df['year_and_month'].dt.month

# Extract Year
df['year'] = df['year_and_month'].dt.year

# Extract Quarter
###df['quarter'] = df['year_and_month'].dt.quarter

# View
display(df.head(3))

Unnamed: 0,borough,animal_recovered,year_and_month,recovery_success,month,month_num,year
0,lambeth,not recovered,2010-01-01,0,january,1,2010
1,islington,not recovered,2010-01-01,0,january,1,2010
2,croydon,not recovered,2010-01-01,0,january,1,2010


#### SEASON

In [4]:
# - map month numbers to seasons
season_map = {12: 'winter', 1: 'winter', 2: 'winter', 
              3: 'spring', 4: 'spring', 5: 'spring', 
              6: 'summer', 7: 'summer', 8: 'summer', 
              9: 'autumn', 10: 'autumn', 11: 'autumn'}

# Season
df['season'] = df['year_and_month'].dt.month.map(season_map)

# View
display(df.head(3))

Unnamed: 0,borough,animal_recovered,year_and_month,recovery_success,month,month_num,year,season
0,lambeth,not recovered,2010-01-01,0,january,1,2010,winter
1,islington,not recovered,2010-01-01,0,january,1,2010,winter
2,croydon,not recovered,2010-01-01,0,january,1,2010,winter


#### PANDEMIC PERIOD

In [5]:
# - classify rows as pre/post pandemic
df['pandemic_period'] = np.where(df['year_and_month'] < '2020-03-01', 'pre-pandemic', 'post-pandemic')

# View
display(df.head(3))

Unnamed: 0,borough,animal_recovered,year_and_month,recovery_success,month,month_num,year,season,pandemic_period
0,lambeth,not recovered,2010-01-01,0,january,1,2010,winter,pre-pandemic
1,islington,not recovered,2010-01-01,0,january,1,2010,winter,pre-pandemic
2,croydon,not recovered,2010-01-01,0,january,1,2010,winter,pre-pandemic


#### WRITE CLEAN DATAFRAME II

In [6]:
# Reorder columns
df = df[['borough', 'year_and_month', 'month', 'year', 'season', 'pandemic_period', 'recovery_success']].copy()

# Write
df.to_csv('Data/MPS Stolen Animals Clean II.csv', index=False)

display(df.head(3))

Unnamed: 0,borough,year_and_month,month,year,season,pandemic_period,recovery_success
0,lambeth,2010-01-01,january,2010,winter,pre-pandemic,0
1,islington,2010-01-01,january,2010,winter,pre-pandemic,0
2,croydon,2010-01-01,january,2010,winter,pre-pandemic,0
