In [101]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import prophet
import warnings
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose

warnings.filterwarnings('ignore')

### 01 - Prepare Dataset

In [102]:
shelter_df = pd.read_csv('DHS_weekly.csv')

column_dict = {'Date': 'ds',
               'Total Individuals in Shelter': 'y'}

shelter_df.rename(columns=column_dict, inplace=True)
shelter_df['ds'] = pd.to_datetime(shelter_df['ds'], format='%m/%d/%Y')

In [None]:
shelter_df.tail()

### Exploratory Data Analysis

##### Time-Series Visualization

In [None]:
sns.lineplot(shelter_df, x='ds', y='y')

2021-01-03 seems to be wrong. We'll remove it from our analysis.

In [26]:
shelter_df = shelter_df[:-1]

In [None]:
sns.lineplot(shelter_df, x='ds', y='y')

#### Distribution of individuals in shelter

In [None]:
sns.histplot(shelter_df['y'], kde=True);

#### Autocorrelation and Partial Autocorrelation

In [None]:
plot_acf(shelter_df.set_index('ds')[['y']]);
plot_pacf(shelter_df.set_index('ds')[['y']]);


The correlation to the day before is statiscally significant. Not with other lags.

#### Distribution of temperature

In [None]:
sns.histplot(shelter_df['Temperature'], kde=True);

#### Relationship between temperature and people seeking shelter

In [None]:
sns.regplot(shelter_df, x='Temperature', y='y')

There does not seem to be a correlation between the variables. However, removing the first and last months of data reveals an underlying correlation between them.

In [None]:
sns.regplot(shelter_df[30:-30], x='Temperature', y='y');

In [103]:
pearsonr(shelter_df['Temperature'], shelter_df['y'])

PearsonRResult(statistic=-0.10902963738892518, pvalue=0.037074389603055195)

In [104]:
spearmanr(shelter_df['Temperature'], shelter_df['y'])

SignificanceResult(statistic=-0.26172256099799834, pvalue=3.799944210975311e-07)

There is a statiscally significant negative correlation, though weak.

#### Seasonal Decomposition

In [None]:
seasonal_decomposition = seasonal_decompose(shelter_df.set_index('ds')['y'])

In [None]:
seasonal_decomposition.seasonal[:110].plot();