# Time Series Analysis: Population and Migration Trends


This notebook performs a comprehensive time series analysis on a dataset of population and migration trends for Afghanistan.
We will explore the following steps:
1. **Data Exploration and Visualization**
2. **Time Series Decomposition**
3. **Trend Estimation**
4. **Exponential Smoothing**
5. **ARIMA Forecasting**
6. **Model Evaluation**

Each step includes detailed code, plots, and insights.
    

## 1. Data Loading and Exploration

In [None]:

# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'world_pop_mig_186_countries.csv'
data = pd.read_csv(file_path)

# Filter data for Afghanistan
country_data = data[data['country'] == 'Afghanistan']

# Plot population and net migration trends
plt.figure(figsize=(14, 6))
plt.plot(country_data['year'], country_data['population'], label='Population', marker='o')
plt.plot(country_data['year'], country_data['netMigration'], label='Net Migration', marker='o', linestyle='--')
plt.gca().invert_xaxis()  # Reverse x-axis for chronological order
plt.title('Population and Net Migration Over Time (Afghanistan)')
plt.xlabel('Year')
plt.ylabel('Values')
plt.legend()
plt.grid()
plt.show()
    

## 2. Time Series Decomposition

In [None]:

from statsmodels.tsa.seasonal import seasonal_decompose

# Set the year as the index and sort
country_data = country_data.sort_values('year')
country_data.set_index('year', inplace=True)

# Decompose the population series
decomposition = seasonal_decompose(country_data['population'], model='additive', period=1)

# Plot decomposition components
fig = decomposition.plot()
fig.set_size_inches(14, 10)
plt.show()
    

## 3. Trend Estimation (SMA and WMA)

In [None]:

# Simple Moving Average (SMA)
country_data['SMA_5'] = country_data['population'].rolling(window=5).mean()

# Weighted Moving Average (WMA)
weights = [0.1, 0.2, 0.3, 0.4]  # Example weights
weights = weights[::-1]  # Reverse weights for chronological order
country_data['WMA_4'] = country_data['population'].rolling(window=4).apply(
    lambda x: sum(w * val for w, val in zip(weights, x)) if len(x) == 4 else None
)

# Plot SMA and WMA
plt.figure(figsize=(14, 6))
plt.plot(country_data.index, country_data['population'], label='Original Population', marker='o')
plt.plot(country_data.index, country_data['SMA_5'], label='SMA (5 years)', linestyle='--')
plt.plot(country_data.index, country_data['WMA_4'], label='WMA (4 years)', linestyle='-.')
plt.gca().invert_xaxis()
plt.title('Simple and Weighted Moving Averages (Population)')
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.grid()
plt.show()
    

## 4. Exponential Smoothing

In [None]:

from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing

# Single Exponential Smoothing (SES)
ses_model = SimpleExpSmoothing(country_data['population']).fit(smoothing_level=0.2, optimized=False)
country_data['SES'] = ses_model.fittedvalues

# Holt’s Linear Trend
holt_model = ExponentialSmoothing(country_data['population'], trend='additive', seasonal=None).fit(smoothing_level=0.8, smoothing_trend=0.2)
country_data['Holt'] = holt_model.fittedvalues

# Holt-Winters (Triple Exponential Smoothing)
holt_winters_model = ExponentialSmoothing(country_data['population'], trend='additive', seasonal='additive', seasonal_periods=5).fit()
country_data['Holt_Winters'] = holt_winters_model.fittedvalues

# Plot Exponential Smoothing Results
plt.figure(figsize=(14, 6))
plt.plot(country_data.index, country_data['population'], label='Original Population', marker='o')
plt.plot(country_data.index, country_data['SES'], label='SES', linestyle='--')
plt.plot(country_data.index, country_data['Holt'], label='Holt’s Linear Trend', linestyle='-.')
plt.plot(country_data.index, country_data['Holt_Winters'], label='Holt-Winters', linestyle=':')
plt.gca().invert_xaxis()
plt.title('Exponential Smoothing Methods')
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.grid()
plt.show()
    

## 5. ARIMA Forecasting

In [None]:

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Plot ACF and PACF
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plot_acf(country_data['population'], lags=20, ax=plt.gca())
plt.subplot(1, 2, 2)
plot_pacf(country_data['population'], lags=20, ax=plt.gca())
plt.tight_layout()
plt.show()

# Fit ARIMA model (order = (1, 1, 1))
arima_model = ARIMA(country_data['population'], order=(1, 1, 1)).fit()

# Forecast future values
forecast_steps = 10
forecast = arima_model.forecast(steps=forecast_steps)

# Plot forecast
plt.figure(figsize=(14, 6))
plt.plot(country_data.index, country_data['population'], label='Original Population', marker='o')
plt.plot(range(country_data.index[-1] + 1, country_data.index[-1] + 1 + forecast_steps), forecast, label='ARIMA Forecast', marker='x')
plt.gca().invert_xaxis()
plt.title('ARIMA Forecasting')
plt.xlabel('Year')
plt.ylabel('Population')
plt.legend()
plt.grid()
plt.show()
    

## 6. Model Evaluation

In [None]:

from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Evaluation metrics for all models
models = ['SES', 'Holt', 'Holt_Winters']
metrics = {}

for model in models:
    mae = mean_absolute_error(country_data['population'], country_data[model])
    rmse = np.sqrt(mean_squared_error(country_data['population'], country_data[model]))
    metrics[model] = {'MAE': mae, 'RMSE': rmse}

# ARIMA metrics
arima_predictions = arima_model.fittedvalues[1:]  # Align predictions with data
arima_mae = mean_absolute_error(country_data['population'][1:], arima_predictions)
arima_rmse = np.sqrt(mean_squared_error(country_data['population'][1:], arima_predictions))
metrics['ARIMA'] = {'MAE': arima_mae, 'RMSE': arima_rmse}

# Display metrics
metrics_df = pd.DataFrame(metrics).T
metrics_df
    