In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Description 
In this notebook I will try to forecast 12 months of future unemployment rate using fbprophet .
A very simple approach to a simple data set.

In [None]:
df = pd.read_csv('../input/unemployment-rate-aged-1564japan/LRUN64TTJPM156S (1).csv',parse_dates=["DATE"])

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")


# PREPARING DATA FOR FB propeht

In [None]:
#IF THERE IS ANY NULL VALUE
df.isnull().sum()

In [None]:
#CHECKING DATA INFO 
df.info()

In [None]:
df.iloc[0]["DATE"] # checking data format for an individual element

# DATA ANOMALY CHECKING
1. CHECK IF WE HAVE ALL MONTHLY DATA FROM 2011 TILL NOW
2. CHECKING FOR MISSING MONTHS
3. CHECKING IF ANY YEAR-MONTH PAIR REPEATS

In [None]:
#Checking if total data count is equal to the number of months between starting and ending date.

print("\nDATA CHECKING : checking if total data count is equal to the number of months between starting and ending date.")


starting_year=df.iloc[0]["DATE"].year
starting_month=df.iloc[0]["DATE"].month

ending_year=df.iloc[len(df)-1]["DATE"].year
ending_month=df.iloc[len(df)-1]["DATE"].month

totalmonths = ((ending_year - (starting_year+1) ) * 12 )
totalmonths = totalmonths + ending_month + (12-starting_month) + 1

print(" Data starting year = {0}\n Data starting month ={1} \n Data ending year ={2}\n Data ending month = {3}\n".format(starting_year,starting_month,ending_year,ending_month))
print(" Total months = ",totalmonths)
print(" Total data count",len(df))

if len(df) == totalmonths:
    print(" Total data count is equal to total month count.")
else:
    print(" Data has missing monthly data, total data count should be = ",totalmonths)



In [None]:
# checking if there are same year-month data exists for multiple times

print("\nDATA CHECKING : checking if there are same year-month data exists for multiple times")

year_month = [] 

for index,row in df.iterrows():
    year_month.append(str(row["DATE"].year)+"-"+str(row["DATE"].month))

df["YEAR-MONTH"] = year_month

print("Data format after adding YEAR-MONTH column :\n\n",df.head())



# COUNTING GROUP BY DATA GROUPING ON YEAR-MONTH COLUMN

sumof_unique_year_months = df.groupby(by="YEAR-MONTH")["DATE"].count().sum()

if len(df) == sumof_unique_year_months:
    print("\n\nData has no missing months or repeating year-month pair(s).")
else:
    print("\n\nData has missing months or repeating year-month pair(s)")

In [None]:
df.head()
df ["ds"] = df["DATE"]
df ["y"] = df["LRUN64TTJPM156S"]


In [None]:
try:
    df =df.drop("DATE",axis=1)
    df = df.drop("LRUN64TTJPM156S",axis=1)
except Exception as e:
    print("ERROR",e)

# PLOTTING DATA 
1. x-axis defining dates [ monthly frequency :format yyyy-mm-dd]
2. y-axis defining the unemployment rate : [ floaring point]

In [None]:
fig, ax = plt.subplots(figsize=(20,10))

ax.plot(df["ds"],df["y"],label='Unemployment Rate Line')
# plt.xticks(df["ds"],rotation=90)
ax.plot(df["ds"],[df["y"].mean()]*len(df),label='Unemployment Rate Average Line')


ax.set(xlabel='Date (monthly)', ylabel='Unemployment Rate',
       title='Monthly Unemployment Rate (2011-03-01 to 2021-03-01)\n SHOWING YEARLY X-LABEL')
ax.grid(True)
plt.legend()

plt.show()



In [None]:
fig, ax = plt.subplots(figsize=(20,10))

ax.plot(df["ds"],df["y"])
plt.xticks(df["ds"],rotation=90)

ax.set(xlabel='Date (monthly)', ylabel='Unemployment Rate',
       title='Monthly Unemployment Rate (2011-03-01 to 2021-03-01)\nSHOWING ALL X-LABEL')
ax.grid(True)
plt.show()

In [None]:
df20 = df[:20]
fig, ax = plt.subplots(figsize=(20,10))

ax.plot(df20["ds"],df20["y"],label='Unemployment Rate Line')
ax.plot(df20["ds"],[df20["y"].mean()]*20,label='Unemployment Rate Average Line')
plt.xticks(df20["ds"],rotation=90)


ax.set(xlabel='Date (monthly)', ylabel='Unemployment Rate',
       title='Monthly Unemployment Rate (2011-03-01 to 2021-03-01)\nSHOWING FIRST 20 DATA FOR X-LABEL')
ax.grid(True)
plt.legend()
plt.show()

# INSTALLING FB PROPHET
We have to install fb prophet to use it.

In [None]:
#INSTALLING FB PROPHET
!pip install Prophet

In [None]:
from prophet import Prophet

In [None]:
print("TOTAL MONTHS OF DATA:",len(df))

# TRAIN-TEST SPLIT 
1. keeping fist (121-12)=109 monts of data for training purpose
2. keeping 12 months of data for testing purpose 

In [None]:
traindf = df[:-12]
testdf = df[-12:]

In [None]:
m = Prophet(seasonality_mode='multiplicative').fit(traindf)

future = m.make_future_dataframe(periods=12, freq='MS')
fcst = m.predict(future)
fig = m.plot(fcst)

In [None]:
fcst[-12:][["yhat","ds"]]

# ERROR CHECKING AND VISUAL REPRESENTATION
1. Creating result dataframe
2. Check MAE
3. Plot comparison of real data and forecast data

In [None]:
# creating result dataframe
resultdf = fcst[-12:][["yhat","ds"]]
resultdf["real"] = testdf["y"]
print("RESULT DATAFRAME :",resultdf)

In [None]:
# calculating mae
resultdf["error"] = abs(resultdf["real"]-resultdf["yhat"])
print("MAE = ",resultdf["error"].mean())

In [None]:
fig, ax = plt.subplots(figsize=(20,10))

ax.plot(resultdf["ds"],resultdf["real"],label='Unemployment Rate Line Real')
ax.plot(resultdf["ds"],resultdf["yhat"],label='Unemployment Rate Line Prediction')
plt.xticks(resultdf["ds"],rotation=90)


ax.set(xlabel='Date (monthly)', ylabel='Unemployment Rate',
       title='Monthly Unemployment Rate Forecast')
ax.grid(True)
plt.legend()
plt.show()

# Unknown future Forecast

In [None]:
m = Prophet(seasonality_mode='multiplicative').fit(df)

future = m.make_future_dataframe(periods=12, freq='MS')
fcst = m.predict(future)
fig = m.plot(fcst)


## Plotting last 24 months 
Where last 12 months are prediciton result.

In [None]:
#Plotting last 24 months .Last 12 months are prediciton result.

resultdf = fcst[-24:][["ds","yhat"]]
resultdf2 = df[-12:]


fig, ax = plt.subplots(figsize=(20,10))

ax.plot(resultdf2["ds"],resultdf2["y"],label='Unemployment Rate Line Real')
ax.plot(resultdf["ds"],resultdf["yhat"],label='Unemployment Rate Line Prediction')

ax.set(xlabel='Date (monthly)', ylabel='Unemployment Rate',
       title='Monthly Unemployment Rate Forecast')
ax.grid(True)
plt.legend()
plt.show()

# Conclution

1. Though it doesn't seems to be a promising solution but considering a simple approach just using prophet to forecast without any regressor or other kind of feature engineering MAE = 0.798753962937098 is not unexpected.Can't do a miracle here.
2. Forecast data shows an upper trend that started from the end(approximately) of 2018.
3. It seems our the model could understand the upper trend.