## Importing Libraries

In [1]:
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import plotly.express as px
import matplotlib.pyplot as plt
from keras.models import Sequential
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tsa.arima_model import ARIMA
from sklearn.preprocessing import StandardScaler
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense
from statsmodels.tsa.holtwinters import ExponentialSmoothing


## Loading the data

In [2]:
def loadData ():
    csvFiles = [os.path.join("datasets", filename) for filename in os.listdir("datasets") if filename.endswith(('.csv'))]
    data = []
    for file in csvFiles:
        frame = pd.read_csv(file).drop(['index'], axis=1)
        if file == 'datasets\Mental health Depression disorder Data.csv' :
            frame = frame.loc[:6467]
            for column in frame.columns:
                if column != 'Entity' and column != 'Code':
                    frame[column] = frame[column].astype(float)
            frame['Year'] = frame['Year'].astype(int)
        if 'Code' in frame.columns:
            frame.drop(columns='Code',axis=1,inplace=True)
        if 'Country' in frame.columns:
            frame.rename(columns={'Country': 'Entity'}, inplace=True)
        data.append(frame)
    return data

## Scaling the data

In [3]:
def scale(data,feature,scaler):
    data[feature] = scaler.fit_transform(data[feature].values.reshape(-1, 1))
    data[feature] = scaler.transform(data[feature].values.reshape(-1, 1))
    data[feature] = scaler.transform(data[feature].values.reshape(-1, 1))
    return data

## Handling Categorical data

In [4]:
def encode(data, feature):
    encoded = pd.get_dummies(data[feature], prefix=feature)
    data = pd.concat([data, encoded], axis=1)
    data.drop(columns=[feature], inplace=True)
    return data

## Handling Outlier Using Inter Quantile Range (IQR)

In [5]:
def detectOutliersIqr(feature):   
    Q1 = np.percentile(feature, 25)
    Q3 = np.percentile(feature, 75)
    IQR = Q3 - Q1

    threshold = 1.5
    lowerBound = Q1 - threshold * IQR
    upperBound = Q3 + threshold * IQR

    outlierIndices = feature[(feature < lowerBound) | (feature > upperBound)].index
    mean = feature[(feature >= lowerBound) & (feature <= upperBound)].median()
    feature.loc[outlierIndices] = mean
    outlierCount = len(outlierIndices)
    
    return {
        'featureName': feature.name,
        'outlierCount': outlierCount
    }

## Decclaring the variables

In [13]:
data = loadData()
data = yearlyAverage(data)

  frame = pd.read_csv(file).drop(['index'], axis=1)


In [10]:
def globalAverage(data, frame, disorder):
    averages = []
    for year in data['Year'].unique():
        averages.append(data[data['Year'] == year][disorder].mean())
    frame[disorder] = averages
    return frame
  

In [12]:
def yearlyAverage(data):
    frames = []   
    for dataframe in data:
        frame = pd.DataFrame()
        frame['Year'] = dataframe['Year'].unique()
        for feature in dataframe.columns[2:]:
            frame = globalAverage(dataframe,frame, feature)
        frames.append(frame)
    return frames

## Plotting the data

In [None]:
def plotting(df, sort_by, color_by, dpi=200):
    df.sort_values(by=sort_by, inplace=True)
    plt.figure(dpi=dpi) 
    fig = px.bar(df, x=sort_by, y="Entity", orientation='h', color=color_by)
    fig.show()

In [None]:
def plotHistogram(df, column, title, xlabel, ylabel, figsize=(10, 6), kde=True):
    plt.figure(figsize=figsize)
    sns.histplot(df[column], kde=kde)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

# Example usage
# plot_histogram(df, 'Schizophrenia disorders (share of population) - Sex: Both - Age: Age-standardized', 
#                'Distribution of Schizophrenia Disorder Prevalence', 'Prevalence (Age-standardized)', 'Frequency')


In [None]:

def plotLosses(trainLosses, valLosses, title='Training and Validation Losses', xlabel='Epoch', ylabel='Loss'):

    plt.plot(trainLosses, label='Train Loss')
    plt.plot(valLosses, label='Val Loss')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.show()

# Example usage
# plot_losses(train_losses, val_losses)


### Model Architecture

In [None]:
# Function to plot predicted vs actual data
def plotPredictionsVSActual(xTest, yTest, outputs, interval=(0, 39)):

    i = random.randint(*interval)

    # Extend the last point in the test sequence with the predicted and actual outcome
    predicted = np.append(xTest[i, :, 0], outputs[i])
    actual = np.append(xTest[i, :, 0], yTest[i])

    # Time points for plotting
    x = np.linspace(0, len(predicted) - 1, len(predicted))

    # Plotting the series
    plt.figure(figsize=(10, 6))
    plt.plot(x[:-1], actual[:-1], 'r-', label='Actual (History)')
    plt.plot(x[-1:], actual[-1:], 'ro', label='Actual (Latest)', markersize=10)
    plt.plot(x[:-1], predicted[:-1], 'b-', label='Predicted (History)')
    plt.plot(x[-1:], predicted[-1:], 'bo', label='Predicted (Latest)', markersize=10)

    # Identify and highlight overlapping regions in the prediction
    overlap = np.logical_and(predicted > 0, actual > 0)
    plt.plot(x[overlap], actual[overlap], 'k', label='Overlap')

    # Adding plot decorations
    plt.title('Depression Prediction and Actual Values for a Random Index')
    plt.xlabel('Time Step')
    plt.ylabel('Value')
    plt.legend()
    plt.show()

# Example usage (assuming xTest, yTest, and outputs are defined properly)
# plot_prediction_vs_actual(xTest, yTest, outputs)


In [None]:
def visualizePattern(data, feature):
    pattern = pd.Series(data[feature],index=data['Year'])
    plt.figure(figsize=(10, 6))
    plt.plot(pattern.index, pattern.values, marker='o', linestyle='-')
    plt.xlabel('Year')
    plt.ylabel(feature)
    plt.title(f'{feature} over Years')
    plt.grid(True)
    plt.show()

for dataframe in data:
    for feature in dataframe.columns[2:]:
        visualizePattern(dataframe, feature)


## Forecasting

In [None]:
def movingAverage(data, window):
    return data.rolling(window=window).mean()

In [None]:
def exponentialSmoothing(data,)