## Importing Libraries

In [1]:
import os
import random
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import pmdarima as pm
import tensorflow as tf
import plotly.express as px
import matplotlib.pyplot as plt
from keras.models import Sequential
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.stattools import adfuller
from pandas.plotting import autocorrelation_plot
from sklearn.model_selection import train_test_split
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense, Conv1D, InputLayer
warnings.filterwarnings("ignore")

## Loading the data

In [2]:
def loadData ():
    csvFiles = [os.path.join("datasets", filename) for filename in os.listdir("datasets") if filename.endswith(('.csv'))]
    data = []
    for file in csvFiles:
        frame = pd.read_csv(file, low_memory=False).drop(['index'], axis=1)
        if file == 'datasets\Mental health Depression disorder Data.csv' :
            frame = frame.loc[:6467]
            for column in frame.columns:
                if column != 'Entity' and column != 'Code':
                    frame[column] = frame[column].astype(float)
            frame['Year'] = frame['Year'].astype(int)
        if 'Code' in frame.columns:
            frame.drop(columns='Code',axis=1,inplace=True)
        if 'Country' in frame.columns:
            frame.rename(columns={'Country': 'Entity'}, inplace=True)
        data.append(frame)
    return data

## Data PreProcessing

In [3]:
def globalAverage(data, frame, disorder):
    averages = []
    for year in data['Year'].unique():
        averages.append(data[data['Year'] == year][disorder].mean())
    frame[disorder] = averages
    return frame

In [4]:
def yearlyAverage(data):
    frames = []   
    for dataframe in data:
        frame = pd.DataFrame()
        frame['Year'] = dataframe['Year'].unique()
        frame['Year'] = frame['Year'].astype(int)
        for feature in dataframe.columns[2:]:
            frame = globalAverage(dataframe,frame, feature)
        frames.append(frame)
    return frames

## Plotting the data

In [5]:
def plotting(df, sort_by, color_by, dpi=200):
    df.sort_values(by=sort_by, inplace=True)
    plt.figure(dpi=dpi) 
    fig = px.bar(df, x=sort_by, y="Entity", orientation='h', color=color_by)
    fig.show()

In [6]:
def plotHistogram(df, column, title, xlabel, ylabel, figsize=(10, 6), kde=True):
    plt.figure(figsize=figsize)
    sns.histplot(df[column], kde=kde)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

# Example usage
# plot_histogram(df, 'Schizophrenia disorders (share of population) - Sex: Both - Age: Age-standardized', 
#                'Distribution of Schizophrenia Disorder Prevalence', 'Prevalence (Age-standardized)', 'Frequency')


In [7]:

def plotLosses(trainLosses, valLosses, title='Training and Validation Losses', xlabel='Epoch', ylabel='Loss'):

    plt.plot(trainLosses, label='Train Loss')
    plt.plot(valLosses, label='Val Loss')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.show()

# Example usage
# plot_losses(train_losses, val_losses)


## Model Architecture

In [8]:
# Function to plot predicted vs actual data
def plotPredictionsVSActual(xTest, yTest, outputs, interval=(0, 39)):

    i = random.randint(*interval)

    # Extend the last point in the test sequence with the predicted and actual outcome
    predicted = np.append(xTest[i, :, 0], outputs[i])
    actual = np.append(xTest[i, :, 0], yTest[i])

    # Time points for plotting
    x = np.linspace(0, len(predicted) - 1, len(predicted))

    # Plotting the series
    plt.figure(figsize=(10, 6))
    plt.plot(x[:-1], actual[:-1], 'r-', label='Actual (History)')
    plt.plot(x[-1:], actual[-1:], 'ro', label='Actual (Latest)', markersize=10)
    plt.plot(x[:-1], predicted[:-1], 'b-', label='Predicted (History)')
    plt.plot(x[-1:], predicted[-1:], 'bo', label='Predicted (Latest)', markersize=10)

    # Identify and highlight overlapping regions in the prediction
    overlap = np.logical_and(predicted > 0, actual > 0)
    plt.plot(x[overlap], actual[overlap], 'k', label='Overlap')

    # Adding plot decorations
    plt.title('Depression Prediction and Actual Values for a Random Index')
    plt.xlabel('Time Step')
    plt.ylabel('Value')
    plt.legend()
    plt.show()

# Example usage (assuming xTest, yTest, and outputs are defined properly)
# plot_prediction_vs_actual(xTest, yTest, outputs)


In [9]:
def visualizePattern(data, feature):
    plt.figure(figsize=(10, 6))
    plt.plot(data['Year'], data[feature], marker='o')
    plt.xlabel("Year")
    plt.ylabel(feature)
    plt.title(f'Year vs {feature}')
    plt.grid(True)
    plt.show()


# for dataframe in data:
#     dataframe.info()
#     for feature in dataframe.columns[1:]:
#         visualizePattern(dataframe, feature)


In [10]:
# for dataframe in data:
#     autocorrelation_plot(dataframe)
#     plt.show()

In [11]:
def findDifferencingDegree(feature):
    frame = feature.copy()
    pValue =  adfuller(frame)[1]
    d = 0
    alpha = 0.05
    while pValue > alpha:
        frame = frame.diff().dropna() 
        d += 1
        pValue = adfuller(frame)[1]
    return d

## Forecasting

In [12]:
def movingAverage(feature, window):
    return feature.rolling(window=window).mean()

In [13]:
def exponentialSmoothing(feature):
    model = ExponentialSmoothing(feature, trend='add', seasonal=None).fit(smoothing_level=0.2)
    forecast = model.forecast(steps=10)
    return np.array('\n'.join(map(str, forecast)).split('\n')).astype(float)

In [14]:
def autoRegressive(feature, lag):
    model = AutoReg(feature, lags=lag).fit()
    forecast =  model.predict(start=len(feature)+1, end=len(feature)+10)
    return np.array('\n'.join(map(str, forecast)).split('\n')).astype(float)

In [15]:
def integratedAutoRegressiveMovingAverage(feature):
    model = pm.auto_arima(feature, seasonal=False, stepwise=True)
    forecast = model.predict(n_periods=10)
    return np.array('\n'.join(map(str, forecast)).split('\n')).astype(float)
   

In [16]:
neuralNetworks ={
    'RNN': SimpleRNN,
    'LSTM': LSTM,
    'FFNN': Dense,
    'CNN': Conv1D
}
def multiLayerNeuralNetwork(neuralNetwork, data, feature):
    model = Sequential()
    model.add(InputLayer(input_shape=(data.shape[1],data.shape[2])))
    model.add(neuralNetwork(64, activation="relu", return_sequences=True))
    model.add(neuralNetwork(50, activation="relu"))
    model.add(Dense(feature.shape[1], activation="softmax"))
    model.compile(optimizer='adam', loss='mse')
    return model

## Data Splitting

In [17]:
data = loadData()
data = yearlyAverage(data)

In [18]:
classicalModel = {
    'Moving Average': movingAverage,
    'Exponential Smoothing': exponentialSmoothing,
    'Auto Regressive': autoRegressive,
    'Auto Regressive Integrated Moving Average': integratedAutoRegressiveMovingAverage
}
lags = [14,13,13]
window = 3
predictions = []
for frame, lag in zip(data,lags):
    frame.sort_values(by='Year', ascending=True, inplace=True)
    frame['Year'] = pd.to_datetime(frame.sort_values(by='Year', ascending=True)['Year'], format='%Y')
    disorders = frame.iloc[:,1:]
    predicted = pd.DataFrame({'year': pd.date_range(start=frame['Year'].iloc[-1] + pd.DateOffset(years=1), periods=10, freq='Y')})   
    years = pd.to_datetime(frame['Year'], format='%Y')
    for disorder in disorders:
        feature = pd.Series(disorders[disorder].values, index=years)
        feature.index = feature.index.to_period('A') 
        for model in classicalModel:
            if model == 'Auto Regressive Integrated Moving Average':
                predicted[f"{disorder} using {model}"] = classicalModel[model](feature)
            elif model == 'Exponential Smoothing':
                predicted[f"{disorder} using {model}"]  = classicalModel[model](feature)
            elif model == 'Moving Average':
                predicted[f"{disorder} using {model}"] = classicalModel[model](disorders[disorder], window)
            elif model == 'Auto Regressive':
                predicted[f"{disorder} using {model}"] = classicalModel[model](feature, lag)
    predictions.append(predicted)