In [None]:
!pip3 install statsmodels
!pip3 install vaderSentiment
!pip3 install nbconvert
!pip3 install tabulate
!pip3 install --upgrade scipy

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable


In [None]:
import numpy as np
import pandas as pd

from IPython.display import display
from statistics import mean

from datetime import date, timedelta
import datetime

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.gof import chisquare as chisquare
import vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from statsmodels.stats.anova import anova_lm

import csv

from tabulate import tabulate

import scipy


In [None]:
current_date = datetime.datetime.strptime("2020-08-22", "%Y-%m-%d").date()

def convertToTime(row, columnName):
    return datetime.datetime.strptime(row[columnName], "%Y-%m-%d").date()

def convertToDate(row, columnName):
    return datetime.datetime.strptime(row[columnName], "%Y-%m-%d %H:%M:%S").date()
    
def update_end_date(row, columnName, latestDate):
    if row[columnName] == row[columnName]:
        return row[columnName]
    else:
        return latestDate

def getDays(row, beginColumnName, endColumnName):
    v = datetime.datetime.strptime(str(row[endColumnName]), "%Y-%m-%d").date() - datetime.datetime.strptime(row[beginColumnName], "%Y-%m-%d").date()
    return v.days

def calculate_recency(row, columnName):
    return (current_date - datetime.datetime.strptime(str(row[columnName]), "%Y-%m-%d").date()).days

def lookup_index(row, columnName, array):
    if(row[columnName] not in array):
        return -1
    return array.index(row[columnName]) + 1

def colour_life_events(row):
    colours = {'personal':'lightcoral', 'health':'orange', 'work':'lightgreen', 'financial':'teal', 'weather':'blueviolet', 'societal':'navy','other':'skyblue'}
    return colours[row['life_event_type']]

def remove_rows(base_df, other_df):
    modified_df = other_df.drop(other_df[other_df['snapshot_id'] not in base_df['snapshot_id'].values].index)
    return modified_df

def fix_signficance(row):
    if('significance' in row['valence']):
        return row['valence']
    else:
        return row['significance']

def fix_valence(row):
    if('significance' in row['significance']):
        return row['valence']
    else:
        return row['significance']  

def get_broad_category(row, categories, column_name):
    if(row[column_name] in categories):
        return categories[row[column_name]]
    return "UNKNOWN"
    
def compute_sentiment(row):
    post = row['Text']
    analyzer = SentimentIntensityAnalyzer()
    vs = analyzer.polarity_scores(post)
    sentiment = 0
    if (vs["neu"]>0.8):
        sentiment = 0
    elif (vs["pos"]==vs["neg"]):
        sentiment = 0
    elif (vs["pos"]>vs["neg"]):
        sentiment = 1
    elif (vs["neg"]>vs["neu"]):
        sentiment = -1
    return sentiment

def convert_valence_to_sentiment(row):
    valence = row['valence']
    retVal = 0
    if (valence == 'Neither Positive or Negative'):
        retVal = 0
    elif("Positive" in valence):
        retVal = 1
    elif("Negative" in valence):
        retVal = -1
    return retVal

def update_status(row, columnName):
    if row[columnName] == row[columnName]:
        if "ongoing" in row[columnName].lower():
            return "Ongoing"
        return row[columnName]
    else:
        return "Ended"

def update_education_level(row, columnName):
    if 'college' in row[columnName].lower():
        return "College Degree"
    elif 'doctoral' in row[columnName].lower():
        return "Doctoral Degree"
    elif 'master' in row[columnName].lower() or 'grad' in row[columnName].lower():
        return "Graduate Degree"
    elif 'hs' in row[columnName].lower() or 'high school' in row[columnName].lower():
        return "High School"
    
    return row[columnName]

def add_avg_date_based_dependent_variable(row, weekly_data):
    date = datetime.datetime.strptime(str(row['created_date']), "%Y-%m-%d").date()
    year, week = (date.isocalendar()[0], date.isocalendar()[1])
    
    if (year,week) in weekly_data[row['snapshot_id']]:
        return weekly_data[row['snapshot_id']][(year, week)]
    else:
        return -1

In [None]:
control_variables             = 'shipley_vocab + shipley_abs + openness + conscientiousness + extraversion + agreeableness + neuroticism + pos_affect + neg_affect + stai_trait + education_level + psqi + age + gender'

sr_life_event_variables       = 'Anticipation + LifeEventFamily + valence + recency + status + Intimacy + Scope + significance_label'
sm_life_event_variables       = 'Anticipation + LifeEventFamily + valence + recency + status + Intimacy + Scope'
combined_life_event_variables = 'Anticipation + LifeEventFamily + valence + recency + status + Intimacy + Scope + data_type'

sr_all_variables     = 'shipley_vocab + shipley_abs + openness + conscientiousness + extraversion + agreeableness + neuroticism + pos_affect + neg_affect + stai_trait + education_level + psqi + age + gender + Anticipation + LifeEventFamily + valence + recency + status + Intimacy + Scope + significance_label'
sm_all_variables     = 'shipley_vocab + shipley_abs + openness + conscientiousness + extraversion + agreeableness + neuroticism + pos_affect + neg_affect + stai_trait + education_level + psqi + age + gender + Anticipation + LifeEventFamily + valence + recency + status + Intimacy + Scope'
combined_all_variables        = 'shipley_vocab + shipley_abs + openness + conscientiousness + extraversion + agreeableness + neuroticism + pos_affect + neg_affect + stai_trait + education_level + psqi + age + gender + Anticipation + LifeEventFamily + valence + recency + status + Intimacy + Scope + data_type'

In [None]:
def load_demographics_data():
    demographics_data = pd.read_csv('data/igtbs_demographics_complete.csv', parse_dates=True)
    demographics_data = demographics_data[['age','gender','snapshot_id', 'shipley.vocab', 'shipley.abs', 'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism','pos.affect','neg.affect','stai.trait','psqi','educ']]
    demographics_data['education_level'] = demographics_data.apply(update_education_level, columnName='educ', axis=1)
    demographics_data = demographics_data.drop(columns=['educ'])
    demographics_data = demographics_data.rename(columns={
        'shipley.vocab': 'shipley_vocab',
        'shipley.abs': 'shipley_abs',
        'pos.affect': 'pos_affect',
        'neg.affect': 'neg_affect',
        'stai.trait': 'stai_trait'
    })
    return demographics_data

In [None]:
def load_survey_categories():
    df_self_reported_categories = pd.read_csv('data/Life Events Categories Mapping - Self-Reported Categories.csv')
    return df_self_reported_categories

def load_survey_data_without_categories():
    df_survey = pd.read_csv('data/Superimposed/LifeEvents_Curated_non_blinded.csv', parse_dates=True)    
    df_survey = df_survey[['snapshot_id', 'description','UpdatedBeginDate', 'UpdatedEndDate','life_event_type', 'work_perf_impact', 'significance','valence', 'ended_or_ongoing']]

    # Date manipulation
    latest_date = max(datetime.datetime.strptime(str(x), "%Y-%m-%d").date() if x == x else datetime.date.min for x in df_survey['UpdatedEndDate'])
    latest_date = max(latest_date, max(datetime.datetime.strptime(str(x), "%Y-%m-%d").date() if x == x else datetime.date.min for x in df_survey['UpdatedBeginDate']))
    df_survey = df_survey.drop(df_survey[df_survey['UpdatedBeginDate'].isnull() == True].index)
    df_survey['UpdatedEndDate'] = df_survey.apply(update_end_date, columnName='UpdatedEndDate', latestDate=latest_date, axis=1)
    df_survey['num_of_days'] = df_survey.apply(getDays, endColumnName='UpdatedEndDate', beginColumnName='UpdatedBeginDate', axis=1)
    df_survey['UpdatedBeginDate_time'] = df_survey.apply(convertToTime, columnName='UpdatedBeginDate', axis=1)
    df_survey['recency'] = df_survey.apply(calculate_recency, columnName='UpdatedEndDate', axis=1)

    # Update values for valence and significance
    df_survey.replace({'valence': {np.nan: 'Neither Positive or Negative'}, 'significance': {np.nan: 'Neither Positive or Negative'}}, inplace=True)
    df_survey['fixed_signficance'] = df_survey.apply(fix_signficance, axis = 1)
    df_survey['fixed_valence'] = df_survey.apply(fix_valence, axis = 1)
    df_survey = df_survey.drop(columns = ['valence', 'significance'])
    df_survey = df_survey.rename(columns={"fixed_signficance": "significance", "fixed_valence": "valence"})
    df_survey['valence'] = df_survey.apply(convert_valence_to_sentiment, axis=1)
    df_survey['ended_or_ongoing'] = df_survey.apply(update_status, columnName='ended_or_ongoing', axis=1)

    # Select columns we are interested in
    df_survey = df_survey[['snapshot_id', 'description', 'UpdatedBeginDate', 'UpdatedEndDate', 'significance', 'valence', 'ended_or_ongoing', 'recency']]

    # Label encoding for significance
    le_significance = LabelEncoder()
    le_significance.fit(df_survey['significance'].values)
    df_survey['significance_label'] = df_survey.apply(lambda x: le_significance.transform([x['significance']])[0], axis=1)
    df_survey = df_survey.drop(columns=['significance'])

    return df_survey

def load_survey_data():
    df_survey_without_categories = load_survey_data_without_categories()
    df_self_reported_categories = load_survey_categories()
    df_survey = pd.merge(df_survey_without_categories, df_self_reported_categories, how="inner", left_on="description", right_on="SR_LifeEvent")
    df_survey = df_survey.drop(columns=['description', 'SR_LifeEvent', 'LifeEventFinal', 'LifeEventFamily2'])
    df_survey = df_survey.rename(columns={'ended_or_ongoing':'status'})

    return df_survey

In [None]:
def load_social_media_categories():
    df_social_media_categories = pd.read_csv('data/Life Events Categories Mapping - Social Media Categories-2.csv')
    return df_social_media_categories

def load_social_media_data_without_categories():
    df_social_media_data = pd.read_csv('data/Superimposed/Facebook Data For Life Events-Combined - FB Data.csv')
    df_social_media_data = df_social_media_data[['snapshot_id', 'created_time', 'Text', 'final_life_event_category_2', 'ended/ongoing']]
    df_social_media_data = df_social_media_data.replace({'PostiveMove':'PositiveMove', 'Negative Move':'NegativeMove'})
    df_social_media_data = df_social_media_data.drop(df_social_media_data[((df_social_media_data['final_life_event_category_2'].isnull() == True))].index)
    df_social_media_data['created_date'] = df_social_media_data.apply(convertToDate, columnName='created_time', axis=1)
    df_social_media_data['valence'] = df_social_media_data.apply(compute_sentiment, axis=1)
    df_social_media_data = df_social_media_data.drop(columns=['created_time','Text'])
    df_social_media_data['recency'] = df_social_media_data.apply(calculate_recency, columnName='created_date', axis=1)
    df_social_media_data['ended/ongoing'] = df_social_media_data.apply(update_status, columnName='ended/ongoing', axis=1)
    return df_social_media_data

def load_social_media_data():
    df_social_media_data = load_social_media_data_without_categories()
    df_social_media_categories = load_social_media_categories()
    df_social_media_data_with_categories = pd.merge(df_social_media_data, df_social_media_categories, how="inner", left_on='final_life_event_category_2', right_on='SM_LifeEvent')
    df_social_media_data_with_categories = df_social_media_data_with_categories.drop(columns=['final_life_event_category_2','SM_LifeEvent','LifeEventFamily2','Comments','SignificanceRank'])
    df_social_media_data_with_categories = df_social_media_data_with_categories.rename(columns={'ended/ongoing':'status'})
    return df_social_media_data_with_categories

In [None]:
def load_dailies_data():
    df_dailies = pd.read_csv('data/Superimposed/dailies_scores.csv', low_memory=False)
    df_dailies = df_dailies[['snapshot_id','day', 'alc_status', 'alc.quantity.d', 'anxiety.d', 'pos.affect.d', 'neg.affect.d','sleep.d', 'stress.d']]
    df_dailies['day_time'] = df_dailies.apply(convertToTime, columnName='day', axis=1)
    df_dailies = df_dailies.rename(columns={'alc.quantity.d': 'alc_quantity',
    'anxiety.d': 'anxiety',
    'pos.affect.d': 'pos_affect',
    'neg.affect.d': 'neg_affect',
    'sleep.d': 'sleep',
    'stress.d': 'stress'})
    df_dailies['sleep'] = df_dailies['sleep'] + 1
    return df_dailies

In [None]:
def weekly_average_by_snapshot_id(df_dailies, dependent_variable):
    dependent_variable_by_snapshotid = {}

    for i in df_dailies.iterrows():
        snapshot_id = i[1]['snapshot_id']
        if snapshot_id in dependent_variable_by_snapshotid:
            dependent_variable_by_snapshotid[snapshot_id].append((i[1]['day_time'], i[1][dependent_variable]))
        else:
            dependent_variable_by_snapshotid[snapshot_id] = [(i[1]['day_time'], i[1][dependent_variable])]

    weekly_data_by_snapshot_id = {}

    for i in dependent_variable_by_snapshotid.items():
        snapshot_id = i[0]
        values = sorted(i[1])
        weekly_data = {}
        for j in values:
            (year, week) = (j[0].isocalendar()[0], j[0].isocalendar()[1])
            if(j[1] == j[1]):
                if (year, week) in weekly_data:
                    weekly_data[(year, week)].append(j[1])
                else:
                    weekly_data[(year, week)] = [j[1]]

        for j in weekly_data.items():
            weekly_data[j[0]] = round(mean(j[1]), 3)

        weekly_data_by_snapshot_id[snapshot_id] = weekly_data

    return weekly_data_by_snapshot_id

In [None]:
def build_df_for_regression_baseline(dependent_variable):
    try:
        merged_data = pd.read_csv('Linear Regression/FixedWeek/baseline_regression_'+dependent_variable+'.csv')
    except:
        df_demographics = load_demographics_data()
        df_dailies = load_dailies_data()
        weekly_data_by_snapshot_id = weekly_average_by_snapshot_id(df_dailies, dependent_variable)

        df_dailies = df_dailies[[dependent_variable, 'day', 'snapshot_id']]
        merged_data = pd.merge(df_dailies, df_demographics, how="inner", on=["snapshot_id"])

        data_per_week = []
        merged_data[dependent_variable] = -1

        for index, row in merged_data.iterrows():
            start_date = datetime.datetime.strptime(str(row['day']), "%Y-%m-%d").date()
            year, week = start_date.isocalendar()[0], start_date.isocalendar()[1]
            if (year, week) in weekly_data_by_snapshot_id[row['snapshot_id']]:
                v = weekly_data_by_snapshot_id[row['snapshot_id']][(year, week)]
                row[dependent_variable] = v
                data_per_week.append(row.copy(deep=True))

        merged_data = pd.DataFrame(data_per_week, columns=merged_data.columns)
        merged_data.reset_index(drop=True, inplace=True)
        merged_data.to_csv('Linear Regression/FixedWeek/baseline_regression_'+dependent_variable+'.csv', index=False)

    merged_data = merged_data.drop(columns=['day', 'snapshot_id'])
    merged_data = merged_data.dropna()
    return merged_data

In [None]:
def build_df_for_regression_survey_average_weekly(dependent_variable):
    try:
        merged_data = pd.read_csv('Linear Regression/FixedWeek/linear_regression_survey_weekly_'+dependent_variable+'.csv')
    except:
        df_survey = load_survey_data()
        df_demographics = load_demographics_data()
        df_dailies = load_dailies_data()
        merged_data = pd.merge(df_survey, df_demographics, how="inner", on=["snapshot_id"])

        weekly_data_by_snapshot_id = weekly_average_by_snapshot_id(df_dailies, dependent_variable)
        data_per_week = []
        
        merged_data[dependent_variable] = -1
        merged_data['year'] = -1
        merged_data['week'] = -1
        for index, row in merged_data.iterrows():
            start_date = datetime.datetime.strptime(str(row['UpdatedBeginDate']), "%Y-%m-%d").date()
            end_date = datetime.datetime.strptime(str(row['UpdatedEndDate']), "%Y-%m-%d").date()
            delta = timedelta(days=7)

            while(start_date <= end_date):
                year, week = start_date.isocalendar()[0], start_date.isocalendar()[1]
                if (year, week) in weekly_data_by_snapshot_id[row['snapshot_id']]:
                    
                    v = weekly_data_by_snapshot_id[row['snapshot_id']][(year, week)]
                    row[dependent_variable] = v
                    row['year'] = year
                    row['week'] = week

                    data_per_week.append(row.copy(deep=True))

                start_date+=delta
        merged_data = pd.DataFrame(data_per_week, columns=merged_data.columns)
        merged_data.reset_index(drop=True, inplace=True)
        merged_data.to_csv('Linear Regression/FixedWeek/linear_regression_survey_weekly_'+dependent_variable+'.csv', index=False)
        
    merged_data = merged_data.drop(columns=[ 'UpdatedBeginDate', 'UpdatedEndDate', 'snapshot_id', 'year', 'week'])
    merged_data = merged_data.dropna()
        
    return merged_data

In [None]:
def build_df_for_regression_social_media_average_weekly(dependent_variable):
    try:
        merged_data = pd.read_csv('Linear Regression/FixedWeek/linear_regression_social_media_weekly_'+dependent_variable+'.csv')
    except:
        df_social_media = load_social_media_data()
        df_demographics = load_demographics_data()
        df_dailies = load_dailies_data()
        merged_data = pd.merge(df_social_media, df_demographics, how="inner", on=["snapshot_id"])

        weekly_data_by_snapshot_id = weekly_average_by_snapshot_id(df_dailies, dependent_variable)

        merged_data[dependent_variable] = merged_data.apply(add_avg_date_based_dependent_variable, weekly_data=weekly_data_by_snapshot_id, axis=1)
        merged_data = merged_data[merged_data[dependent_variable] != -1]
        merged_data.to_csv('Linear Regression/FixedWeek/linear_regression_social_media_weekly_'+dependent_variable+'.csv', index=False)
    merged_data = merged_data.drop(columns=['snapshot_id', 'created_date'])
    merged_data = merged_data.dropna()
        
    return merged_data

In [None]:
def build_df_for_regression_combined_survey_social_media(dependent_variable):
    df_social_media = build_df_for_regression_social_media_average_weekly(dependent_variable)
    df_social_media['data_type'] = 'Social Media'

    df_survey = build_df_for_regression_survey_average_weekly(dependent_variable)
    df_survey['data_type'] = 'Survey'
    df_survey = df_survey.drop(columns=['significance_label'])
    
    X_input = pd.concat([df_survey, df_social_media])
    X_input = X_input.dropna()

    return X_input

In [None]:
def regression(X_train, dependent_variable, formula):
    mod = smf.ols(formula=dependent_variable+'~'+formula, data=X_train)
    res = mod.fit()
    return res

## Run Regression  

In [None]:
def baseline_control_variables(dependent_variable, split=0):
    X_input = build_df_for_regression_baseline(dependent_variable)
    X_input.to_csv('Linear Regression/baseline_control__variables_for_'+dependent_variable+'.csv', index=False)

    if split == 0:
        return regression(X_input, dependent_variable, control_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, control_variables)
        return X_test, model

In [None]:
def life_event_variables_all_data(dependent_variable, split=0):
    X_input = build_df_for_regression_combined_survey_social_media(dependent_variable)
    X_input.to_csv('Linear Regression/life_event_variables_all_data_for_'+dependent_variable+'.csv', index=False)
    
    if split == 0:
        return regression(X_input, dependent_variable, combined_life_event_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, combined_life_event_variables)
        return X_test, model

In [None]:
def life_event_variables_sr(dependent_variable, split=0):
    X_input = build_df_for_regression_survey_average_weekly(dependent_variable)
    X_input.to_csv('Linear Regression/life_event_variables_sr_for_'+dependent_variable+'.csv', index=False)

    if split == 0:
        return regression(X_input, dependent_variable, sr_life_event_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, sr_life_event_variables)
        return X_test, model

In [None]:
def life_event_variables_sm(dependent_variable, split=0):
    X_input = build_df_for_regression_social_media_average_weekly(dependent_variable)
    X_input.to_csv('Linear Regression/life_event_variables_sm_for_'+dependent_variable+'.csv', index=False)

    if split == 0:
        return regression(X_input, dependent_variable, sm_life_event_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, sm_life_event_variables)
        return X_test, model

In [None]:
def all_variables_all_data(dependent_variable, split=0):
    X_input = build_df_for_regression_combined_survey_social_media(dependent_variable)
    X_input.to_csv('Linear Regression/all_variables_all_data_for_'+dependent_variable+'.csv', index=False)

    if split == 0:
        return regression(X_input, dependent_variable, combined_all_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, combined_all_variables)
        return X_test, model

In [None]:
def all_variables_sr(dependent_variable, split=0):
    X_input = build_df_for_regression_survey_average_weekly(dependent_variable)
    X_input.to_csv('Linear Regression/all_variables_sr_for_'+dependent_variable+'.csv', index=False)

    if split == 0:
        return regression(X_input, dependent_variable, sr_all_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, sr_all_variables)
        return X_test, model

In [None]:
def all_variables_sm(dependent_variable, split=0):
    X_input = build_df_for_regression_social_media_average_weekly(dependent_variable)
    X_input.to_csv('Linear Regression/all_variables_sm_for_'+dependent_variable+'.csv', index=False)

    if split == 0:
        return regression(X_input, dependent_variable, sm_all_variables)
    else:
        X_train, X_test = train_test_split(X_input, test_size=split, random_state=85)
        model = regression(X_train, dependent_variable, sm_all_variables)
        return X_test, model

# Regression Results

In [None]:
def run_baseline_fn(fn, dependent_variable):
    model = fn(dependent_variable)
    dataset, train_model = fn(dependent_variable, 0.20)

    f_obs = train_model.predict(dataset).values
    f_exp = dataset[dependent_variable].values

    pearson = scipy.stats.pearsonr(f_obs, f_exp)
    return model, [round(model.rsquared_adj, 3), round(pearson[0], 3), "-", "-"]

def run_regression_fn(baseline, fn, dependent_variable):
    model = fn(dependent_variable)

    dataset, train_model = fn(dependent_variable, 0.20)

    f_obs = train_model.predict(dataset).values
    f_exp = dataset[dependent_variable].values

    pearson = scipy.stats.pearsonr(f_obs, f_exp)
    variance = anova_lm(baseline, model)
    return [round(model.rsquared_adj, 3), round(pearson[0], 3), round(variance.F.values[-1], 3), round(variance['Pr(>F)'].values[-1], 3)]


def run_all_regression(dependent_variable):
    data = []
    baseline_model, results = run_baseline_fn(baseline_control_variables, dependent_variable)
    data.append(["Dailies", "Control"] + results)
    
    data.append(["Social Media + Survey", "Life Event"] + run_regression_fn(baseline_model, life_event_variables_all_data, dependent_variable))
    data.append(["Social Media + Survey", "Control + Life Event"] + run_regression_fn(baseline_model, all_variables_all_data, dependent_variable))
    
    data.append(["Social Media", "Life Event"] + run_regression_fn(baseline_model, life_event_variables_sm, dependent_variable))
    data.append(["Social Media", "Control + Life Event"] + run_regression_fn(baseline_model, all_variables_sm, dependent_variable))

    data.append(["Survey", "Life Event"] + run_regression_fn(baseline_model, life_event_variables_sr, dependent_variable))
    data.append(["Survey", "Control + Life Event"] + run_regression_fn(baseline_model, all_variables_sr, dependent_variable))

    return data

### STRESS

In [None]:
d = run_all_regression('stress')
regression_results = pd.DataFrame(d, columns=["Dataset", "Variables", "Adjusted R-square", "Pearson Correlation", "F-statistic", "P-value"])
regression_results

### SLEEP 

In [None]:
d = run_all_regression('sleep')
regression_results = pd.DataFrame(d, columns=["Dataset", "Variables", "Adjusted R-square", "Pearson Correlation", "F-statistic", "P-value"])
regression_results

### ANXIETY 

In [None]:
d = run_all_regression('anxiety')
regression_results = pd.DataFrame(d, columns=["Dataset", "Variables", "Adjusted R-square", "Pearson Correlation", "F-statistic", "P-value"])
regression_results