In [1]:
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import LabelEncoder

In [2]:
mental_df = pd.read_csv("Resources/Indicators_of_Anxiety_or_Depression_Based_on_Reported_Frequency_of_Symptoms_During_Last_7_Days.csv")
mental_df.head()

Unnamed: 0,Indicator,Group,State,Subgroup,Phase,Time Period,Time Period Label,Time Period Start Date,Time Period End Date,Value,Low CI,High CI,Confidence Interval,Quartile Range
0,Symptoms of Depressive Disorder,National Estimate,United States,United States,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.5,22.7,24.3,22.7 - 24.3,
1,Symptoms of Depressive Disorder,By Age,United States,18 - 29 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,32.7,30.2,35.2,30.2 - 35.2,
2,Symptoms of Depressive Disorder,By Age,United States,30 - 39 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,25.7,24.1,27.3,24.1 - 27.3,
3,Symptoms of Depressive Disorder,By Age,United States,40 - 49 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,24.8,23.3,26.2,23.3 - 26.2,
4,Symptoms of Depressive Disorder,By Age,United States,50 - 59 years,1,1,"Apr 23 - May 5, 2020",04/23/2020,05/05/2020,23.2,21.5,25.0,21.5 - 25.0,


In [3]:
mental_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8901 entries, 0 to 8900
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Indicator               8901 non-null   object 
 1   Group                   8901 non-null   object 
 2   State                   8901 non-null   object 
 3   Subgroup                8901 non-null   object 
 4   Phase                   8901 non-null   object 
 5   Time Period             8901 non-null   int64  
 6   Time Period Label       8901 non-null   object 
 7   Time Period Start Date  8901 non-null   object 
 8   Time Period End Date    8901 non-null   object 
 9   Value                   8604 non-null   float64
 10  Low CI                  8604 non-null   float64
 11  High CI                 8604 non-null   float64
 12  Confidence Interval     8604 non-null   object 
 13  Quartile Range          6120 non-null   object 
dtypes: float64(3), int64(1), object(10)
memo

In [4]:
#Clean out unused columns
mental_df=mental_df.drop(columns=["Low CI", "High CI", "Confidence Interval", "Quartile Range", "Phase", "Time Period", "Time Period End Date", "Time Period Label","State"])

In [5]:
mental_df['Value'].describe()

count    8604.000000
mean       30.146897
std         7.763409
min         6.400000
25%        24.700000
50%        29.700000
75%        35.100000
max        71.900000
Name: Value, dtype: float64

In [6]:
#Change Values into risk categories for ML
mental_df["Risk"] = ""
mental_df.loc[(mental_df["Value"] <=30 ), "Risk"] = "Mild"
mental_df.loc[(mental_df["Value"] >30 ) & (mental_df['Value'] <=35 ), "Risk"] = "Moderate"
mental_df.loc[(mental_df["Value"] >35 ) & (mental_df['Value'] <=40 ), "Risk"] = "High"
mental_df.loc[(mental_df["Value"] >40 ), "Risk"] = "Extreme"

In [7]:
mental_df.to_csv("Resources/Almost_Clean.csv")

In [8]:
mental_df.head()

Unnamed: 0,Indicator,Group,Subgroup,Time Period Start Date,Value,Risk
0,Symptoms of Depressive Disorder,National Estimate,United States,04/23/2020,23.5,Mild
1,Symptoms of Depressive Disorder,By Age,18 - 29 years,04/23/2020,32.7,Moderate
2,Symptoms of Depressive Disorder,By Age,30 - 39 years,04/23/2020,25.7,Mild
3,Symptoms of Depressive Disorder,By Age,40 - 49 years,04/23/2020,24.8,Mild
4,Symptoms of Depressive Disorder,By Age,50 - 59 years,04/23/2020,23.2,Mild


In [9]:
#Create year column for further table manipulation
mental_df['Time Period Start Date'] = pd.to_datetime(mental_df['Time Period Start Date'])
mental_df['Time Period Start Date'] = pd.to_datetime(mental_df['Time Period Start Date'],format='%Y%m%d')
mental_df['year'] = pd.DatetimeIndex(mental_df['Time Period Start Date']).year
mental_df = mental_df.rename(columns={"Time Period Start Date": "Time_Period"})
mental_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8901 entries, 0 to 8900
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Indicator    8901 non-null   object        
 1   Group        8901 non-null   object        
 2   Subgroup     8901 non-null   object        
 3   Time_Period  8901 non-null   datetime64[ns]
 4   Value        8604 non-null   float64       
 5   Risk         8901 non-null   object        
 6   year         8901 non-null   int64         
dtypes: datetime64[ns](1), float64(1), int64(1), object(4)
memory usage: 486.9+ KB


In [10]:
mental_df['Group'].unique()

array(['National Estimate', 'By Age', 'By Sex',
       'By Race/Hispanic ethnicity', 'By Education', 'By State',
       'By Disability status', 'By Gender identity',
       'By Sexual orientation'], dtype=object)

In [11]:
mental_df = mental_df.drop(mental_df[mental_df['Group'] == "By Gender identity"].index)
mental_df = mental_df.drop(mental_df[mental_df['Group'] == "By Sexual orientation"].index)
mental_df = mental_df.drop(mental_df[mental_df['Group'] == "National Estimate"].index)
mental_df = mental_df.drop(mental_df[mental_df['Group'] == "By Disability status"].index)

In [13]:
mental_2020_df = mental_df.loc[(mental_df['year'] == 2020)].dropna()
mental_2021_df = mental_df.loc[(mental_df['year'] == 2021)].dropna()
mental_df = mental_df.dropna()

del mental_2020_df['year']
del mental_2021_df['year']
del mental_df['year']


In [14]:
mental_df.loc[(mental_df['Group'] == 'By Disability status')]

Unnamed: 0,Indicator,Group,Subgroup,Time_Period,Value,Risk


In [15]:
mental_2020_df.head()

Unnamed: 0,Indicator,Group,Subgroup,Time_Period,Value,Risk
1,Symptoms of Depressive Disorder,By Age,18 - 29 years,2020-04-23,32.7,Moderate
2,Symptoms of Depressive Disorder,By Age,30 - 39 years,2020-04-23,25.7,Mild
3,Symptoms of Depressive Disorder,By Age,40 - 49 years,2020-04-23,24.8,Mild
4,Symptoms of Depressive Disorder,By Age,50 - 59 years,2020-04-23,23.2,Mild
5,Symptoms of Depressive Disorder,By Age,60 - 69 years,2020-04-23,18.4,Mild


In [16]:
mental_2021_df.head()

Unnamed: 0,Indicator,Group,Subgroup,Time_Period,Value,Risk
4487,Symptoms of Depressive Disorder,By Age,18 - 29 years,2021-01-06,41.1,Extreme
4488,Symptoms of Depressive Disorder,By Age,30 - 39 years,2021-01-06,31.9,Moderate
4489,Symptoms of Depressive Disorder,By Age,40 - 49 years,2021-01-06,29.1,Mild
4490,Symptoms of Depressive Disorder,By Age,50 - 59 years,2021-01-06,27.1,Mild
4491,Symptoms of Depressive Disorder,By Age,60 - 69 years,2021-01-06,22.3,Mild


In [17]:
mental_2020_df.to_csv("Resources/Almost_Clean_2020.csv")
mental_2021_df.to_csv("Resources/Almost_Clean_2021.csv")
#del mental_2020_df['Value']
#del mental_2021_df['Value']

In [18]:
#Depression All
mental_Depression_df=mental_df.loc[(mental_df['Indicator'] == 'Symptoms of Depressive Disorder')]
del mental_Depression_df['Indicator']
mental_Depression_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
1,By Age,18 - 29 years,2020-04-23,32.7,Moderate
2,By Age,30 - 39 years,2020-04-23,25.7,Mild
3,By Age,40 - 49 years,2020-04-23,24.8,Mild
4,By Age,50 - 59 years,2020-04-23,23.2,Mild
5,By Age,60 - 69 years,2020-04-23,18.4,Mild


In [19]:
#Anxiety All
mental_Anxiety_df=mental_df.loc[(mental_df['Indicator'] == 'Symptoms of Anxiety Disorder')]
del mental_Anxiety_df['Indicator']
mental_Anxiety_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
71,By Age,18 - 29 years,2020-04-23,40.2,Extreme
72,By Age,30 - 39 years,2020-04-23,34.4,Moderate
73,By Age,40 - 49 years,2020-04-23,34.1,Moderate
74,By Age,50 - 59 years,2020-04-23,31.0,Moderate
75,By Age,60 - 69 years,2020-04-23,24.9,Mild


In [20]:
#Either All
mental_AnxOrDPRS_df=mental_df.loc[(mental_df['Indicator'] == 'Symptoms of Anxiety Disorder or Depressive Disorder')]
del mental_AnxOrDPRS_df['Indicator']
mental_AnxOrDPRS_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
141,By Age,18 - 29 years,2020-04-23,46.8,Extreme
142,By Age,30 - 39 years,2020-04-23,39.6,High
143,By Age,40 - 49 years,2020-04-23,38.9,High
144,By Age,50 - 59 years,2020-04-23,35.8,High
145,By Age,60 - 69 years,2020-04-23,28.9,Mild


In [21]:
#Depression 2020
mental_2020_Depression_df=mental_2020_df.loc[(mental_df['Indicator'] == 'Symptoms of Depressive Disorder')]
del mental_2020_Depression_df['Indicator']
mental_2020_Depression_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
1,By Age,18 - 29 years,2020-04-23,32.7,Moderate
2,By Age,30 - 39 years,2020-04-23,25.7,Mild
3,By Age,40 - 49 years,2020-04-23,24.8,Mild
4,By Age,50 - 59 years,2020-04-23,23.2,Mild
5,By Age,60 - 69 years,2020-04-23,18.4,Mild


In [22]:
#Anxiety 2020
mental_2020_Anxiety_df=mental_2020_df.loc[(mental_df['Indicator'] == 'Symptoms of Anxiety Disorder')]
del mental_2020_Anxiety_df['Indicator']
mental_2020_Anxiety_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
71,By Age,18 - 29 years,2020-04-23,40.2,Extreme
72,By Age,30 - 39 years,2020-04-23,34.4,Moderate
73,By Age,40 - 49 years,2020-04-23,34.1,Moderate
74,By Age,50 - 59 years,2020-04-23,31.0,Moderate
75,By Age,60 - 69 years,2020-04-23,24.9,Mild


In [23]:
#Either 2020
mental_2020_AnxOrDPRS_df=mental_2020_df.loc[(mental_df['Indicator'] == 'Symptoms of Anxiety Disorder or Depressive Disorder')]
del mental_2020_AnxOrDPRS_df['Indicator']
mental_2020_AnxOrDPRS_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
141,By Age,18 - 29 years,2020-04-23,46.8,Extreme
142,By Age,30 - 39 years,2020-04-23,39.6,High
143,By Age,40 - 49 years,2020-04-23,38.9,High
144,By Age,50 - 59 years,2020-04-23,35.8,High
145,By Age,60 - 69 years,2020-04-23,28.9,Mild


In [24]:
#Depression 2021
mental_2021_Depression_df=mental_2021_df.loc[(mental_df['Indicator'] == 'Symptoms of Depressive Disorder')]
del mental_2021_Depression_df['Indicator']
mental_2021_Depression_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
4487,By Age,18 - 29 years,2021-01-06,41.1,Extreme
4488,By Age,30 - 39 years,2021-01-06,31.9,Moderate
4489,By Age,40 - 49 years,2021-01-06,29.1,Mild
4490,By Age,50 - 59 years,2021-01-06,27.1,Mild
4491,By Age,60 - 69 years,2021-01-06,22.3,Mild


In [25]:
#Anxiety 2021
mental_2021_Anxiety_df=mental_2021_df.loc[(mental_df['Indicator'] == 'Symptoms of Anxiety Disorder')]
del mental_2021_Anxiety_df['Indicator']
mental_2021_Anxiety_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
4576,By Age,18 - 29 years,2021-01-06,46.3,Extreme
4577,By Age,30 - 39 years,2021-01-06,40.0,High
4578,By Age,40 - 49 years,2021-01-06,40.0,High
4579,By Age,50 - 59 years,2021-01-06,36.0,High
4580,By Age,60 - 69 years,2021-01-06,29.6,Mild


In [26]:
#Either 2021
mental_2021_AnxOrDPRS_df=mental_2021_df.loc[(mental_df['Indicator'] == 'Symptoms of Anxiety Disorder or Depressive Disorder')]
del mental_2021_AnxOrDPRS_df['Indicator']
mental_2021_AnxOrDPRS_df.head()

Unnamed: 0,Group,Subgroup,Time_Period,Value,Risk
4665,By Age,18 - 29 years,2021-01-06,53.5,Extreme
4666,By Age,30 - 39 years,2021-01-06,45.8,Extreme
4667,By Age,40 - 49 years,2021-01-06,43.9,Extreme
4668,By Age,50 - 59 years,2021-01-06,40.6,Extreme
4669,By Age,60 - 69 years,2021-01-06,34.1,Moderate


In [27]:
mental_2020_Anxiety_df.isna().sum()

Group          0
Subgroup       0
Time_Period    0
Value          0
Risk           0
dtype: int64

In [28]:
mental_2020_Depression_df.to_csv("Resources/mental_2020_Depression_df.csv", index=False)
mental_2020_Anxiety_df.to_csv("Resources/mental_2020_Anxiety_df.csv", index=False)
mental_2020_AnxOrDPRS_df.to_csv("Resources/mental_2020_AnxOrDPRS_df.csv", index=False)
mental_2021_Depression_df.to_csv("Resources/mental_2021_Depression_df.csv", index=False)
mental_2021_Anxiety_df.to_csv("Resources/mental_2021_Anxiety_df.csv", index=False)
mental_2021_AnxOrDPRS_df.to_csv("Resources/mental_2021_AnxOrDPRS_df.csv", index=False)

mental_Anxiety_df.to_csv("Resources/mental_Anxiety_df.csv", index=False)
mental_AnxOrDPRS_df.to_csv("Resources/mental_AnxOrDPRS_df.csv", index=False)
mental_Depression_df.to_csv("Resources/mental_Depression_df.csv", index=False)