# Predicting Mental Health Disorders for future years

In [1]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
%matplotlib inline

In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd

# Retrieving the Data

In [3]:
# Importing the data
df_disorders = pd.read_csv("Clean_data/cleaned_diffdisorders.csv")
df_disorders.head()


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,Afghanistan,1990,0.228979,0.721207,0.131001,4.835127,0.454202,5.125291,0.444036,33.768006,66.238514
1,Afghanistan,1991,0.22812,0.719952,0.126395,4.821765,0.447112,5.116306,0.44425,33.768006,66.238514
2,Afghanistan,1992,0.227328,0.718418,0.121832,4.801434,0.44119,5.106558,0.445501,33.768006,66.238514
3,Afghanistan,1993,0.226468,0.717452,0.117942,4.789363,0.435581,5.100328,0.445958,33.768006,66.238514
4,Afghanistan,1994,0.225567,0.717012,0.114547,4.784923,0.431822,5.099424,0.445779,33.768006,66.238514


In [4]:
df_disorders.dtypes

Entity               object
Year                  int64
Schizophrenia       float64
Bipolar_disorder    float64
Eating_disorder     float64
Anxiety             float64
Drug_usage          float64
Depression          float64
Alcohol             float64
latitude            float64
longitude           float64
dtype: object

In [5]:
df_disorders.isnull().sum()

Entity              0
Year                0
Schizophrenia       0
Bipolar_disorder    0
Eating_disorder     0
Anxiety             0
Drug_usage          0
Depression          0
Alcohol             0
latitude            0
longitude           0
dtype: int64

In [6]:
def split_data_by_country(data, country_col):
    countries = data[country_col].unique()
    dfs = {}
    for country in countries:
        country_data = data[data[country_col] == country]
        df_name = f"df_{country}"
        dfs[df_name] = country_data
    return dfs

In [7]:
dfs = split_data_by_country(df_disorders, 'Entity')

In [8]:
dfs

{'df_Afghanistan':          Entity  Year  Schizophrenia  Bipolar_disorder  Eating_disorder  \
 0   Afghanistan  1990       0.228979          0.721207         0.131001   
 1   Afghanistan  1991       0.228120          0.719952         0.126395   
 2   Afghanistan  1992       0.227328          0.718418         0.121832   
 3   Afghanistan  1993       0.226468          0.717452         0.117942   
 4   Afghanistan  1994       0.225567          0.717012         0.114547   
 5   Afghanistan  1995       0.224713          0.716686         0.111129   
 6   Afghanistan  1996       0.223690          0.716388         0.107786   
 7   Afghanistan  1997       0.222424          0.716143         0.103931   
 8   Afghanistan  1998       0.221129          0.716139         0.100343   
 9   Afghanistan  1999       0.220065          0.716323         0.097946   
 10  Afghanistan  2000       0.219501          0.716534         0.097080   
 11  Afghanistan  2001       0.219364          0.716627         0.0967

In [9]:
df_disorders['Entity'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire",
       'Croatia', 'Cuba', 'Cyprus', 'Czechia',
       'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'England',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia',
       'Fiji', 'Finland', 'France', 'G20', 'Gabon', 'Gambia', 'Georgia',
       'Germany', 'Ghana', 'Greece

In [10]:
dfs['df_Spain'].head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
5280,Spain,1990,0.290367,1.006807,0.506776,5.044806,1.462729,4.469053,1.878107,39.326068,-4.837979
5281,Spain,1991,0.290775,1.008519,0.514091,5.066216,1.531355,4.391617,1.867819,39.326068,-4.837979
5282,Spain,1992,0.291166,1.010115,0.521585,5.079766,1.599079,4.322678,1.859803,39.326068,-4.837979
5283,Spain,1993,0.291521,1.011505,0.528746,5.086631,1.660895,4.262075,1.854322,39.326068,-4.837979
5284,Spain,1994,0.291848,1.012672,0.536276,5.088292,1.710753,4.217638,1.851661,39.326068,-4.837979


In [11]:
df_names = list(split_data_by_country(df_disorders, 'Entity'))

df_names


['df_Afghanistan',
 'df_Albania',
 'df_Algeria',
 'df_American Samoa',
 'df_Andorra',
 'df_Angola',
 'df_Antigua and Barbuda',
 'df_Argentina',
 'df_Armenia',
 'df_Australia',
 'df_Austria',
 'df_Azerbaijan',
 'df_Bahamas',
 'df_Bahrain',
 'df_Bangladesh',
 'df_Barbados',
 'df_Belarus',
 'df_Belgium',
 'df_Belize',
 'df_Benin',
 'df_Bermuda',
 'df_Bhutan',
 'df_Bolivia',
 'df_Bosnia and Herzegovina',
 'df_Botswana',
 'df_Brazil',
 'df_Brunei',
 'df_Bulgaria',
 'df_Burkina Faso',
 'df_Burundi',
 'df_Cambodia',
 'df_Cameroon',
 'df_Canada',
 'df_Cape Verde',
 'df_Central African Republic',
 'df_Chad',
 'df_Chile',
 'df_China',
 'df_Colombia',
 'df_Comoros',
 'df_Congo',
 'df_Cook Islands',
 'df_Costa Rica',
 "df_Cote d'Ivoire",
 'df_Croatia',
 'df_Cuba',
 'df_Cyprus',
 'df_Czechia',
 'df_Democratic Republic of Congo',
 'df_Denmark',
 'df_Djibouti',
 'df_Dominica',
 'df_Dominican Republic',
 'df_Ecuador',
 'df_Egypt',
 'df_El Salvador',
 'df_England',
 'df_Equatorial Guinea',
 'df_Eritrea

In [12]:
dfs = split_data_by_country(df_disorders, 'Entity')


for df_name, df in dfs.items():
    
    print(df_name, df)

   

df_Afghanistan          Entity  Year  Schizophrenia  Bipolar_disorder  Eating_disorder  \
0   Afghanistan  1990       0.228979          0.721207         0.131001   
1   Afghanistan  1991       0.228120          0.719952         0.126395   
2   Afghanistan  1992       0.227328          0.718418         0.121832   
3   Afghanistan  1993       0.226468          0.717452         0.117942   
4   Afghanistan  1994       0.225567          0.717012         0.114547   
5   Afghanistan  1995       0.224713          0.716686         0.111129   
6   Afghanistan  1996       0.223690          0.716388         0.107786   
7   Afghanistan  1997       0.222424          0.716143         0.103931   
8   Afghanistan  1998       0.221129          0.716139         0.100343   
9   Afghanistan  1999       0.220065          0.716323         0.097946   
10  Afghanistan  2000       0.219501          0.716534         0.097080   
11  Afghanistan  2001       0.219364          0.716627         0.096772   
12  Afghan

df_Guinea       Entity  Year  Schizophrenia  Bipolar_disorder  Eating_disorder  \
2280  Guinea  1990       0.209952          0.540905         0.090024   
2281  Guinea  1991       0.210405          0.540940         0.089949   
2282  Guinea  1992       0.210753          0.540977         0.089732   
2283  Guinea  1993       0.211011          0.541020         0.089359   
2284  Guinea  1994       0.211178          0.541067         0.089330   
2285  Guinea  1995       0.211234          0.541110         0.089184   
2286  Guinea  1996       0.211127          0.541179         0.088718   
2287  Guinea  1997       0.210876          0.541292         0.088424   
2288  Guinea  1998       0.210565          0.541432         0.087977   
2289  Guinea  1999       0.210295          0.541577         0.087805   
2290  Guinea  2000       0.210189          0.541720         0.087807   
2291  Guinea  2001       0.210330          0.541914         0.087834   
2292  Guinea  2002       0.210647          0.542197   

df_Portugal         Entity  Year  Schizophrenia  Bipolar_disorder  Eating_disorder  \
4470  Portugal  1990       0.291521          0.953822         0.366474   
4471  Portugal  1991       0.291240          0.953564         0.368715   
4472  Portugal  1992       0.291069          0.953353         0.372045   
4473  Portugal  1993       0.291012          0.953235         0.375730   
4474  Portugal  1994       0.291056          0.953223         0.379026   
4475  Portugal  1995       0.291192          0.953345         0.382344   
4476  Portugal  1996       0.291599          0.953682         0.388335   
4477  Portugal  1997       0.292330          0.954263         0.396043   
4478  Portugal  1998       0.293161          0.954932         0.404288   
4479  Portugal  1999       0.293874          0.955560         0.411782   
4480  Portugal  2000       0.294260          0.956048         0.417311   
4481  Portugal  2001       0.294300          0.955949         0.420976   
4482  Portugal  2002      

In [13]:
# create a list of years to predict
future_years = [2030, 2040, 2050, 2080]

In [14]:
# Schizophrenia

def predict_schizoprenia (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Schizophrenia



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_schi = pd.DataFrame(years, columns=['Year'])
    future_y_schi = model.predict(future_X_schi)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_schi = mean_squared_error(y_test, y_pred)
    print("Mean squared error Schizophrenia:", mse_schi)
    
    return future_y_schi

In [15]:
# Bipolar_disorder

def predict_Bipolar_disorder (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Bipolar_disorder



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_bip = pd.DataFrame(years, columns=['Year'])
    future_y_bip = model.predict(future_X_bip)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_bip = mean_squared_error(y_test, y_pred)
    print("Mean squared error Bipolar_disorder:", mse_bip)
    
    return future_y_bip

In [16]:
# Eating_disorder

def predict_Eating_disorder (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Eating_disorder



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_eating = pd.DataFrame(years, columns=['Year'])
    future_y_eating = model.predict(future_X_eating)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_eating = mean_squared_error(y_test, y_pred)
    print("Mean squared error Eating_disorder:", mse_eating)
    
    return future_y_eating

In [17]:
# Anxiety

def predict_Anxiety (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Anxiety



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_Anxiety = pd.DataFrame(years, columns=['Year'])
    future_y_Anxiety = model.predict(future_X_Anxiety)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_Anxiety = mean_squared_error(y_test, y_pred)
    print("Mean squared error Anxiety:", mse_Anxiety)
    
    return future_y_Anxiety

In [18]:
# Drug_usage

def predict_Drug_usage (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Drug_usage



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_Drug_usage = pd.DataFrame(years, columns=['Year'])
    future_y_Drug_usage = model.predict(future_X_Drug_usage)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_Drug_usage = mean_squared_error(y_test, y_pred)
    print("Mean squared error Drug_usage:", mse_Drug_usage)
    
    return future_y_Drug_usage


In [19]:
# Depression

def predict_Depression (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Depression



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_Depression = pd.DataFrame(years, columns=['Year'])
    future_y_Depression = model.predict(future_X_Depression)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_Depression = mean_squared_error(y_test, y_pred)
    print("Mean squared error Depression:", mse_Depression)
    
    return future_y_Depression


In [20]:
# Alcohol

def predict_Alcohol (data, years):
    # Separate input and output variables
    X = data.Year.to_frame()
    y = data.Alcohol



    # Split data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create a linear regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict Schizophrenia values for future years
    future_X_Alcohol = pd.DataFrame(years, columns=['Year'])
    future_y_Alcohol = model.predict(future_X_Alcohol)

    # Add the predicted values to the original dataframe
    # future_df_schi = pd.DataFrame({'Year': future_years, 'Schizophrenia': future_y_schi})


    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using mean squared error
    mse_Alcohol = mean_squared_error(y_test, y_pred)
    print("Mean squared error Alcohol:", mse_Alcohol)
    
    return future_y_Alcohol



In [21]:
predict_schizoprenia(dfs['df_Spain'], future_years)

Mean squared error Schizophrenia: 1.6963381490291262e-07


array([0.29397625, 0.29460482, 0.29523339, 0.2971191 ])

In [22]:
predict_Bipolar_disorder(dfs['df_Spain'], future_years)

Mean squared error Bipolar_disorder: 2.528285453288641e-06


array([1.02437063, 1.0279927 , 1.03161476, 1.04248096])

In [23]:
predict_Eating_disorder(dfs['df_Spain'], future_years)

Mean squared error Eating_disorder: 6.19988119081436e-05


array([0.72657457, 0.77934778, 0.83212099, 0.99044064])

In [24]:
predict_Anxiety(dfs['df_Spain'], future_years)

Mean squared error Anxiety: 0.08786222035918567


array([6.1505668 , 6.49381548, 6.83706416, 7.86681019])

In [25]:
predict_Drug_usage(dfs['df_Spain'], future_years)

Mean squared error Drug_usage: 0.03453121030184602


array([1.71703062, 1.7096168 , 1.70220299, 1.67996155])

In [26]:
predict_Depression(dfs['df_Spain'], future_years)

Mean squared error Depression: 0.04625803510319646


array([5.40399782, 5.74575945, 6.08752108, 7.11280597])

In [27]:
predict_Alcohol(dfs['df_Spain'], future_years)

Mean squared error Alcohol: 0.0025554168964014544


array([1.78897249, 1.76153884, 1.73410519, 1.65180423])

In [28]:
def get_latitude(data):
    
    lat = data.latitude.mean()
    
    
    return lat


In [29]:
def get_longitude(data):
    
    lon = data.longitude.mean()
    
    return lon


In [30]:
def get_country(data):
    country = data.Entity.unique()
    country = country[0]
    
    return country

In [31]:
get_country(dfs['df_Spain'])

'Spain'

In [32]:
def get_future_years(data, years):
    
    country = get_country(data)
    future_y_schi = predict_schizoprenia(data, years)
    future_y_bip = predict_Bipolar_disorder(data, years)
    future_y_eating = predict_Eating_disorder(data, years)
    future_y_Anxiety = predict_Anxiety(data, years)
    future_y_Drug_usage = predict_Drug_usage(data, years)
    future_y_Depression = predict_Depression(data, years)
    future_y_Alcohol = predict_Alcohol(data, years)
    
    latitude = get_latitude(data)
    longitude = get_longitude(data)
 
    
    
    
    
    df_dict = {'Year': years,
           'Schizophrenia': future_y_schi,
           'Bipolar_disorder': future_y_bip,
           'Eating_disorder': future_y_eating,
           'Anxiety': future_y_Anxiety,
           'Drug_usage': future_y_Drug_usage,
           'Depression': future_y_Depression,
           'Alcohol': future_y_Alcohol}
    
    df = pd.DataFrame(df_dict)
    
    df = df.assign(latitude=latitude, longitude=longitude)
   
    df.insert(0, 'Entity', country)
    
    return df

In [33]:
get_future_years(dfs['df_Spain'], future_years)

Mean squared error Schizophrenia: 1.6963381490291262e-07
Mean squared error Bipolar_disorder: 2.528285453288641e-06
Mean squared error Eating_disorder: 6.19988119081436e-05
Mean squared error Anxiety: 0.08786222035918567
Mean squared error Drug_usage: 0.03453121030184602
Mean squared error Depression: 0.04625803510319646
Mean squared error Alcohol: 0.0025554168964014544


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,Spain,2030,0.293976,1.024371,0.726575,6.150567,1.717031,5.403998,1.788972,39.326069,-4.837979
1,Spain,2040,0.294605,1.027993,0.779348,6.493815,1.709617,5.745759,1.761539,39.326069,-4.837979
2,Spain,2050,0.295233,1.031615,0.832121,6.837064,1.702203,6.087521,1.734105,39.326069,-4.837979
3,Spain,2080,0.297119,1.042481,0.990441,7.86681,1.679962,7.112806,1.651804,39.326069,-4.837979


In [34]:
df_Spain_copy = dfs['df_Spain'].append(get_future_years(dfs['df_Spain'], future_years))

Mean squared error Schizophrenia: 1.6963381490291262e-07
Mean squared error Bipolar_disorder: 2.528285453288641e-06
Mean squared error Eating_disorder: 6.19988119081436e-05
Mean squared error Anxiety: 0.08786222035918567
Mean squared error Drug_usage: 0.03453121030184602
Mean squared error Depression: 0.04625803510319646
Mean squared error Alcohol: 0.0025554168964014544


  df_Spain_copy = dfs['df_Spain'].append(get_future_years(dfs['df_Spain'], future_years))


In [35]:
df_Spain_copy.head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
5280,Spain,1990,0.290367,1.006807,0.506776,5.044806,1.462729,4.469053,1.878107,39.326068,-4.837979
5281,Spain,1991,0.290775,1.008519,0.514091,5.066216,1.531355,4.391617,1.867819,39.326068,-4.837979
5282,Spain,1992,0.291166,1.010115,0.521585,5.079766,1.599079,4.322678,1.859803,39.326068,-4.837979
5283,Spain,1993,0.291521,1.011505,0.528746,5.086631,1.660895,4.262075,1.854322,39.326068,-4.837979
5284,Spain,1994,0.291848,1.012672,0.536276,5.088292,1.710753,4.217638,1.851661,39.326068,-4.837979


# Predicting disorders in future years

The countries most affected by mental health disorders are Australia, Iran and Palestine.
The world and G20 data will also be analysed

In [36]:
# Years to predict

predict_years = [2025,2030,2050,2075,2090]

## Australia

In [38]:
get_future_years(dfs['df_Australia'], predict_years)

Mean squared error Schizophrenia: 6.2763415947409635e-06
Mean squared error Bipolar_disorder: 8.359534455981087e-05
Mean squared error Eating_disorder: 0.002039241774395441
Mean squared error Anxiety: 0.039698574463813405
Mean squared error Drug_usage: 0.0014066039494411693
Mean squared error Depression: 0.04183602400402079
Mean squared error Alcohol: 0.0051752203537392604


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,Australia,2025,0.425511,1.237506,1.27015,6.243975,2.290881,5.057451,2.017779,-24.776109,134.755
1,Australia,2030,0.425681,1.240325,1.342824,6.293776,2.267673,5.072203,2.039869,-24.776109,134.755
2,Australia,2050,0.426361,1.2516,1.633522,6.492978,2.174842,5.13121,2.128227,-24.776109,134.755
3,Australia,2075,0.427211,1.265693,1.996894,6.741981,2.058804,5.20497,2.238674,-24.776109,134.755
4,Australia,2090,0.427721,1.274149,2.214917,6.891382,1.98918,5.249225,2.304943,-24.776109,134.755


In [39]:
df_Australia_copy = dfs['df_Australia'].append(get_future_years(dfs['df_Australia'], predict_years))

Mean squared error Schizophrenia: 6.2763415947409635e-06
Mean squared error Bipolar_disorder: 8.359534455981087e-05
Mean squared error Eating_disorder: 0.002039241774395441
Mean squared error Anxiety: 0.039698574463813405
Mean squared error Drug_usage: 0.0014066039494411693
Mean squared error Depression: 0.04183602400402079
Mean squared error Alcohol: 0.0051752203537392604


  df_Australia_copy = dfs['df_Australia'].append(get_future_years(dfs['df_Australia'], predict_years))


In [40]:
df_Australia_copy.to_csv('Data_Tableau/Australia_prediction.csv', index=False)

In [41]:
df_Australia_copy.head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
270,Australia,1990,0.422418,1.211666,0.78032,6.238203,2.481159,4.775071,1.783328,-24.776109,134.755
271,Australia,1991,0.423661,1.216009,0.786834,6.161256,2.481941,4.823211,1.825198,-24.776109,134.755
272,Australia,1992,0.424717,1.219705,0.792016,6.085746,2.478852,4.866148,1.863898,-24.776109,134.755
273,Australia,1993,0.425557,1.222647,0.798384,6.015229,2.472036,4.901506,1.897901,-24.776109,134.755
274,Australia,1994,0.426133,1.224705,0.802795,5.953205,2.468251,4.932319,1.925561,-24.776109,134.755


## Iran

In [46]:
get_future_years(dfs['df_Iran'], predict_years)

Mean squared error Schizophrenia: 7.530067569157545e-08
Mean squared error Bipolar_disorder: 2.3046336541635546e-07
Mean squared error Eating_disorder: 2.143812699482556e-05
Mean squared error Anxiety: 0.0126067093593763
Mean squared error Drug_usage: 0.0007941839022386365
Mean squared error Depression: 0.015938971796451953
Mean squared error Alcohol: 0.0001429171063080223


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,Iran,2025,0.267248,0.858919,0.280205,7.876307,0.85266,5.276488,0.297758,32.647531,54.564352
1,Iran,2030,0.267922,0.860297,0.291075,7.962309,0.88899,5.367088,0.281408,32.647531,54.564352
2,Iran,2050,0.270615,0.865806,0.334555,8.306319,1.034307,5.729489,0.216009,32.647531,54.564352
3,Iran,2075,0.273981,0.872693,0.388905,8.736331,1.215955,6.182489,0.13426,32.647531,54.564352
4,Iran,2090,0.276,0.876825,0.421515,8.994339,1.324943,6.45429,0.08521,32.647531,54.564352


In [47]:
df_Iran_copy = dfs['df_Iran'].append(get_future_years(dfs['df_Iran'], predict_years))

Mean squared error Schizophrenia: 7.530067569157545e-08
Mean squared error Bipolar_disorder: 2.3046336541635546e-07
Mean squared error Eating_disorder: 2.143812699482556e-05
Mean squared error Anxiety: 0.0126067093593763
Mean squared error Drug_usage: 0.0007941839022386365
Mean squared error Depression: 0.015938971796451953
Mean squared error Alcohol: 0.0001429171063080223


  df_Iran_copy = dfs['df_Iran'].append(get_future_years(dfs['df_Iran'], predict_years))


In [48]:
df_Iran_copy.to_csv('Data_Tableau/Iran_prediction.csv', index=False)

In [49]:
df_Iran_copy.head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
2550,Iran,1990,0.264141,0.846347,0.212144,7.367739,0.578207,4.881391,0.38971,32.647531,54.564352
2551,Iran,1991,0.263571,0.848033,0.212615,7.361624,0.583561,4.836484,0.392762,32.647531,54.564352
2552,Iran,1992,0.263092,0.849754,0.2132,7.35904,0.589382,4.795795,0.395005,32.647531,54.564352
2553,Iran,1993,0.262759,0.851071,0.213582,7.354528,0.596429,4.758876,0.396952,32.647531,54.564352
2554,Iran,1994,0.262579,0.851933,0.213997,7.350175,0.604462,4.72986,0.398356,32.647531,54.564352


## Palestine

In [50]:
get_future_years(dfs['df_Palestine'], predict_years)

Mean squared error Schizophrenia: 6.099798346512058e-07
Mean squared error Bipolar_disorder: 3.002231309154103e-07
Mean squared error Eating_disorder: 5.010179829974406e-06
Mean squared error Anxiety: 0.0001751756438539994
Mean squared error Drug_usage: 5.734483951155974e-06
Mean squared error Depression: 0.13039706560670422
Mean squared error Alcohol: 2.5506400140473686e-05


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,Palestine,2025,0.263094,0.838777,0.172895,5.90152,0.460871,7.227485,0.414838,31.462421,34.262717
1,Palestine,2030,0.2637,0.840927,0.178184,5.939995,0.469243,7.290981,0.415212,31.462421,34.262717
2,Palestine,2050,0.266123,0.849528,0.199341,6.093897,0.50273,7.544965,0.416706,31.462421,34.262717
3,Palestine,2075,0.269153,0.860278,0.225786,6.286275,0.544588,7.862445,0.418574,31.462421,34.262717
4,Palestine,2090,0.27097,0.866729,0.241653,6.401701,0.569703,8.052932,0.419695,31.462421,34.262717


In [51]:
df_Plaestine_copy = dfs['df_Palestine'].append(get_future_years(dfs['df_Palestine'], predict_years))

Mean squared error Schizophrenia: 6.099798346512058e-07
Mean squared error Bipolar_disorder: 3.002231309154103e-07
Mean squared error Eating_disorder: 5.010179829974406e-06
Mean squared error Anxiety: 0.0001751756438539994
Mean squared error Drug_usage: 5.734483951155974e-06
Mean squared error Depression: 0.13039706560670422
Mean squared error Alcohol: 2.5506400140473686e-05


  df_Plaestine_copy = dfs['df_Palestine'].append(get_future_years(dfs['df_Palestine'], predict_years))


In [52]:
df_Plaestine_copy.to_csv('Data_Tableau/Palestine_prediction.csv', index=False)

In [53]:
df_Plaestine_copy.head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
4260,Palestine,1990,0.25796,0.825297,0.13554,5.663072,0.38708,6.55768,0.406774,31.462421,34.262717
4261,Palestine,1991,0.258258,0.825157,0.136207,5.662769,0.396908,6.539081,0.407426,31.462421,34.262717
4262,Palestine,1992,0.258569,0.82507,0.137123,5.663231,0.405145,6.529932,0.408173,31.462421,34.262717
4263,Palestine,1993,0.258893,0.825053,0.138195,5.663666,0.412727,6.52673,0.409031,31.462421,34.262717
4264,Palestine,1994,0.259202,0.825088,0.139378,5.663907,0.417382,6.528955,0.409984,31.462421,34.262717


## World

In [54]:
get_future_years(dfs['df_World'], predict_years)

Mean squared error Schizophrenia: 2.1672989722360707e-07
Mean squared error Bipolar_disorder: 5.835720189571359e-07
Mean squared error Eating_disorder: 8.654430586978334e-06
Mean squared error Anxiety: 0.0005086696859279736
Mean squared error Drug_usage: 0.0001269798747785357
Mean squared error Depression: 0.0020984618994329602
Mean squared error Alcohol: 0.00039270078832229883


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,World,2025,0.300881,0.511069,0.183505,3.962475,0.732647,3.541999,1.307951,-36.853571,174.745681
1,World,2030,0.30107,0.511998,0.188114,3.968629,0.734795,3.517337,1.257909,-36.853571,174.745681
2,World,2050,0.301829,0.515714,0.206551,3.993245,0.743389,3.418686,1.057739,-36.853571,174.745681
3,World,2075,0.302777,0.520359,0.229597,4.024014,0.75413,3.295373,0.807527,-36.853571,174.745681
4,World,2090,0.303346,0.523146,0.243424,4.042476,0.760575,3.221386,0.657399,-36.853571,174.745681


In [55]:
df_World_copy = dfs['df_World'].append(get_future_years(dfs['df_World'], predict_years))

Mean squared error Schizophrenia: 2.1672989722360707e-07
Mean squared error Bipolar_disorder: 5.835720189571359e-07
Mean squared error Eating_disorder: 8.654430586978334e-06
Mean squared error Anxiety: 0.0005086696859279736
Mean squared error Drug_usage: 0.0001269798747785357
Mean squared error Depression: 0.0020984618994329602
Mean squared error Alcohol: 0.00039270078832229883


  df_World_copy = dfs['df_World'].append(get_future_years(dfs['df_World'], predict_years))


In [56]:
df_World_copy.to_csv('Data_Tableau/World_prediction.csv', index=False)

In [57]:
df_World_copy.head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
6240,World,1990,0.299383,0.506084,0.155447,3.915345,0.714182,3.59997,1.61995,-36.853571,174.745681
6241,World,1991,0.299667,0.505912,0.155935,3.911624,0.717821,3.6378,1.624584,-36.853571,174.745681
6242,World,1992,0.299902,0.505864,0.156357,3.908025,0.72089,3.67013,1.626981,-36.853571,174.745681
6243,World,1993,0.300065,0.505837,0.156603,3.904543,0.723035,3.695832,1.627014,-36.853571,174.745681
6244,World,1994,0.300132,0.505773,0.156693,3.900894,0.724076,3.713974,1.625008,-36.853571,174.745681


In [65]:
df_world_melted = pd.melt(
    df_World_copy,
    id_vars=['Year'],
    value_vars=[
        'Schizophrenia', 'Bipolar_disorder', 'Eating_disorder',
        'Anxiety', 'Drug_usage', 'Depression', 'Alcohol'
    ],
    var_name='Disorder',
    value_name='Percentage of Population'
)



In [66]:
df_world_melted

Unnamed: 0,Year,Disorder,Percentage of Population
0,1990,Schizophrenia,0.299383
1,1991,Schizophrenia,0.299667
2,1992,Schizophrenia,0.299902
3,1993,Schizophrenia,0.300065
4,1994,Schizophrenia,0.300132
...,...,...,...
240,2025,Alcohol,1.307951
241,2030,Alcohol,1.257909
242,2050,Alcohol,1.057739
243,2075,Alcohol,0.807527


In [67]:
df_world_melted.to_csv('Data_Tableau/World_melted_prediction.csv', index=False)

## G20

In [58]:
get_future_years(dfs['df_G20'], predict_years)

Mean squared error Schizophrenia: 3.3226862717244425e-07
Mean squared error Bipolar_disorder: 5.614333141560019e-07
Mean squared error Eating_disorder: 1.0565412307974984e-05
Mean squared error Anxiety: 0.0012483162355215397
Mean squared error Drug_usage: 0.0003083066224771167
Mean squared error Depression: 0.0038351512607681863
Mean squared error Alcohol: 0.0006565575515663978


Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
0,G20,2025,0.319205,0.474552,0.207519,3.901706,0.87755,3.35704,1.423124,36.832497,117.438495
1,G20,2030,0.320171,0.473497,0.213759,3.896421,0.887511,3.319704,1.366712,36.832497,117.438495
2,G20,2050,0.324038,0.469274,0.238721,3.875283,0.927356,3.170358,1.141066,36.832497,117.438495
3,G20,2075,0.328872,0.463996,0.269924,3.84886,0.977161,2.983676,0.859009,36.832497,117.438495
4,G20,2090,0.331772,0.46083,0.288646,3.833006,1.007044,2.871667,0.689775,36.832497,117.438495


In [59]:
df_G20_copy = dfs['df_G20'].append(get_future_years(dfs['df_G20'], predict_years))

Mean squared error Schizophrenia: 3.3226862717244425e-07
Mean squared error Bipolar_disorder: 5.614333141560019e-07
Mean squared error Eating_disorder: 1.0565412307974984e-05
Mean squared error Anxiety: 0.0012483162355215397
Mean squared error Drug_usage: 0.0003083066224771167
Mean squared error Depression: 0.0038351512607681863
Mean squared error Alcohol: 0.0006565575515663978


  df_G20_copy = dfs['df_G20'].append(get_future_years(dfs['df_G20'], predict_years))


In [60]:
df_G20_copy.to_csv('Data_Tableau/G20_prediction.csv', index=False)

In [61]:
df_G20_copy.head()

Unnamed: 0,Entity,Year,Schizophrenia,Bipolar_disorder,Eating_disorder,Anxiety,Drug_usage,Depression,Alcohol,latitude,longitude
1950,G20,1990,0.3121,0.483174,0.167977,3.918146,0.805568,3.4567,1.761262,36.832497,117.438495
1951,G20,1991,0.312619,0.482707,0.168985,3.913578,0.811828,3.509092,1.770626,36.832497,117.438495
1952,G20,1992,0.313076,0.482404,0.169904,3.909058,0.817476,3.554178,1.777256,36.832497,117.438495
1953,G20,1993,0.313442,0.482099,0.170584,3.904027,0.821859,3.590222,1.780759,36.832497,117.438495
1954,G20,1994,0.313698,0.481735,0.171033,3.898306,0.824745,3.615649,1.781201,36.832497,117.438495


In [69]:
df_G20_melted = pd.melt(
    df_G20_copy,
    id_vars=['Year'],
    value_vars=[
        'Schizophrenia', 'Bipolar_disorder', 'Eating_disorder',
        'Anxiety', 'Drug_usage', 'Depression', 'Alcohol'
    ],
    var_name='Disorder',
    value_name='Percentage of Population'
)


In [70]:
df_G20_melted

Unnamed: 0,Year,Disorder,Percentage of Population
0,1990,Schizophrenia,0.312100
1,1991,Schizophrenia,0.312619
2,1992,Schizophrenia,0.313076
3,1993,Schizophrenia,0.313442
4,1994,Schizophrenia,0.313698
...,...,...,...
240,2025,Alcohol,1.423124
241,2030,Alcohol,1.366712
242,2050,Alcohol,1.141066
243,2075,Alcohol,0.859009


In [71]:
df_G20_melted.to_csv('Data_Tableau/G20_melted_prediction.csv', index=False)