In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, LinearRegression
from sklearn.metrics import mean_squared_error

import seaborn as sns
import matplotlib.pyplot as plt



# Extremity

In [7]:
prob_df = pd.read_csv("z_score_table.csv")

In [8]:
prob_df

Unnamed: 0,outlet,Extremity,label,Drives_zscore,Cognition_zscore,emo_pos_zscore,emo_neg_zscore,emo_anx_zscore,emo_anger_zscore,emo_sad_zscore,Social_zscore,Lifestyle_zscore,Physical_zscore,focuspast_zscore,focuspresent_zscore,focusfuture_zscore,Moral_zscore,Affect_zscore,Moral-Emotional_zscore
0,Daily Caller,0.499892,0,-0.953551,0.005567,-0.506355,0.535480,-0.462869,0.648593,-0.260377,0.256576,-0.298635,-0.217865,0.292218,-0.565834,-0.748367,0.023332,-1.582358,0.359187
1,NYTimes,0.499977,1,-1.620571,-0.469588,-0.506355,-0.377186,-0.462869,-0.556865,1.533608,-1.588470,-0.558101,-0.554234,-0.683851,-0.074194,-0.558605,-0.169902,0.312998,1.225537
2,HuffPost,0.499961,1,0.991925,0.718300,-0.506355,-0.911429,-0.462869,-0.556865,-0.260377,0.743354,0.716862,-0.751133,-0.889910,-0.008153,1.022750,-0.069715,-0.472217,-0.381973
3,FoxNews,0.499971,0,-0.167949,1.145940,-0.506355,0.713561,0.452696,-0.556865,1.533608,0.337706,-1.510968,-0.357335,0.129540,0.131267,-0.862225,-0.487775,0.617208,0.986738
4,CNN,0.499972,1,-0.167949,0.623269,-0.506355,0.713561,1.368261,0.319832,-0.260377,0.403133,-1.077033,-0.152232,-0.526595,1.907039,-0.558605,-0.725825,1.675490,-0.685518
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,HuffPost,0.499933,1,-0.986902,0.683744,0.275267,0.112537,0.414548,-0.556865,-0.260377,-0.101965,0.609497,-0.562438,-0.504905,0.512838,0.567320,0.056417,1.450810,-0.204633
176,HuffPost,0.495781,1,0.006217,-0.987939,2.144361,-0.332665,-0.462869,-0.556865,1.683107,-0.630616,-0.781779,-0.111211,-0.542863,-0.485117,-0.836923,3.101734,-0.799911,3.534732
177,HuffPost,0.499833,1,-1.335235,0.830610,0.309250,-0.377186,-0.462869,0.319832,-0.260377,1.224898,-0.535733,0.438465,2.493796,-1.651844,-0.862225,-1.916075,1.557903,1.822866
178,HuffPost,0.499985,1,0.954868,-1.186640,-0.506355,1.225544,-0.462869,0.319832,1.533608,0.418836,0.515552,2.391047,-1.584003,1.679565,-0.862225,0.894964,0.324366,0.956734


In [9]:
prob_df.columns

Index(['outlet', 'Extremity', 'label', 'Drives_zscore', 'Cognition_zscore',
       'emo_pos_zscore', 'emo_neg_zscore', 'emo_anx_zscore',
       'emo_anger_zscore', 'emo_sad_zscore', 'Social_zscore',
       'Lifestyle_zscore', 'Physical_zscore', 'focuspast_zscore',
       'focuspresent_zscore', 'focusfuture_zscore', 'Moral_zscore',
       'Affect_zscore', 'Moral-Emotional_zscore'],
      dtype='object')

In [10]:
columns_to_drop = ['outlet','label', 'Drives_zscore', 'Cognition_zscore',
       'emo_pos_zscore', 'emo_neg_zscore', 'emo_anx_zscore',
       'emo_anger_zscore', 'emo_sad_zscore', 'Social_zscore',
       'Lifestyle_zscore', 'Physical_zscore', 'focuspast_zscore',
       'focuspresent_zscore', 'focusfuture_zscore']

In [11]:
reg_df = prob_df.drop(columns=columns_to_drop)

In [12]:
X = reg_df.drop('Extremity', axis=1)
y = reg_df['Extremity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
reg_df.columns

Index(['Extremity', 'Moral_zscore', 'Affect_zscore', 'Moral-Emotional_zscore'], dtype='object')

In [14]:
reg_col = ['Moral_zscore', 'Affect_zscore', 'Moral-Emotional_zscore']

In [15]:
# Define the list of alpha values
alpha_values = [0.00001, 0.0001, 0.001, 0.01, 0.1, 0.5, 0.75, 1]

In [16]:
# Create an empty dataframe to store the results
results_df = pd.DataFrame(columns=['Model','Alpha', 'MSE', 'Moral_Coefficient',
                                   'Affect_Coefficient', 'ME_Coefficient'])

## Lasso

In [17]:
for alpha in alpha_values:
    # Lasso Regression
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    y_pred_lasso = lasso_model.predict(X_test)
    lasso_mse = mean_squared_error(y_test, y_pred_lasso)
    lasso_coefficients = lasso_model.coef_

    # Append the Lasso regression results to the dataframe
    results_df = results_df.append({'Model': 'Lasso Regression','Alpha': alpha, 'MSE': lasso_mse, 
                                    'Moral_Coefficient': lasso_coefficients[0], 
                                    'Affect_Coefficient': lasso_coefficients[1], 
                                    'ME_Coefficient': lasso_coefficients[2]}, ignore_index=True)

## Ridge

In [18]:
for alpha in alpha_values:
    # Ridge Regression
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)
    y_pred_ridge = ridge_model.predict(X_test)
    ridge_mse = mean_squared_error(y_test, y_pred_ridge)
    ridge_coefficients = ridge_model.coef_

    # Append the Ridge regression results to the dataframe
    results_df = results_df.append({'Model': 'Ridge Regression', 'Alpha': alpha, 'MSE': ridge_mse, 
                                    'Moral_Coefficient': ridge_coefficients[0], 
                                    'Affect_Coefficient': ridge_coefficients[1], 
                                    'ME_Coefficient': ridge_coefficients[2]}, ignore_index=True)

## Linear

In [19]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
linear_mse = mean_squared_error(y_test, y_pred_linear)
linear_coefficients = linear_model.coef_

# Append the linear regression results to the dataframe
results_df = results_df.append({'Model': 'Linear Regression', 'Alpha': 'N/A', 'Model': 'Linear Regression', 'MSE': linear_mse, 
                                'Moral_Coefficient': linear_coefficients[0], 
                                'Affect_Coefficient': linear_coefficients[1], 
                                'ME_Coefficient': linear_coefficients[2]}, ignore_index=True)

## Linear without Split

In [20]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X, y)
linear_coefficients = linear_model.coef_

# Append the linear regression results to the dataframe
results_df = results_df.append({'Model': 'Linear Regression (no split)', 'Alpha': 'N/A', 'Model': 'Linear Regression', 'MSE': 'N/A', 
                                'Moral_Coefficient': linear_coefficients[0], 
                                'Affect_Coefficient': linear_coefficients[1], 
                                'ME_Coefficient': linear_coefficients[2]}, ignore_index=True)

In [21]:
results_df

Unnamed: 0,Model,Alpha,MSE,Moral_Coefficient,Affect_Coefficient,ME_Coefficient
0,Lasso Regression,1e-05,0.002317,-0.006039,0.002649,0.005246
1,Lasso Regression,0.0001,0.002315,-0.005957,0.002553,0.005159
2,Lasso Regression,0.001,0.0023,-0.005138,0.001597,0.004284
3,Lasso Regression,0.01,0.002233,-0.0,0.0,0.0
4,Lasso Regression,0.1,0.002233,-0.0,0.0,0.0
5,Lasso Regression,0.5,0.002233,-0.0,0.0,0.0
6,Lasso Regression,0.75,0.002233,-0.0,0.0,0.0
7,Lasso Regression,1.0,0.002233,-0.0,0.0,0.0
8,Ridge Regression,1e-05,0.002317,-0.006048,0.00266,0.005256
9,Ridge Regression,0.0001,0.002317,-0.006048,0.00266,0.005256


# Left - Right Prediction

In [17]:
prob2_df = pd.read_csv("regression2_z.csv")
reg2_df = prob2_df.drop(columns=columns_to_drop)
reg2_df = reg2_df.drop(columns='probability_1')

In [18]:
reg2_df.columns

Index(['probability_0', 'Extremity', 'Moral_Percentage_zscore',
       'Affect_Percentage_zscore', 'ME_Percentage_zscore'],
      dtype='object')

In [19]:
X2 = reg2_df.drop(['probability_0'], axis=1)
y2 = reg2_df['probability_0']

# Split the data into training and testing sets
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.2, random_state=42)

In [20]:
results2_df = pd.DataFrame(columns=['Model', 'Alpha', 'MSE', 'Moral_Coefficient', 'Affect_Coefficient', 'ME_Coefficient'])

## Lasso

In [21]:
for alpha in alpha_values:
    # Lasso Regression
    lasso_model2 = Lasso(alpha=alpha)
    lasso_model2.fit(X2_train, y2_train)
    y2_pred_lasso = lasso_model2.predict(X2_test)
    lasso_mse2 = mean_squared_error(y2_test, y2_pred_lasso)
    lasso_coefficients2 = lasso_model2.coef_

    # Append the Lasso regression results to the dataframe
    results2_df = results2_df.append({'Alpha': alpha, 'Model': 'Lasso Regression', 'MSE': lasso_mse2, 
                                    'Moral_Coefficient': lasso_coefficients2[0], 
                                    'Affect_Coefficient': lasso_coefficients2[1], 
                                    'ME_Coefficient': lasso_coefficients2[2]}, ignore_index=True)

## Ridge

In [22]:
for alpha in alpha_values:
    # Ridge Regression
    ridge_model2 = Ridge(alpha=alpha)
    ridge_model2.fit(X2_train, y2_train)
    y2_pred_ridge = ridge_model2.predict(X2_test)
    ridge_mse2 = mean_squared_error(y2_test, y2_pred_ridge)
    ridge_coefficients2 = ridge_model2.coef_

    # Append the Ridge regression results to the dataframe
    results2_df = results2_df.append({'Alpha': alpha, 'Model': 'Ridge Regression', 'MSE': ridge_mse2, 
                                    'Moral_Coefficient': ridge_coefficients2[0], 
                                    'Affect_Coefficient': ridge_coefficients2[1], 
                                    'ME_Coefficient': ridge_coefficients2[2]}, ignore_index=True)

## Linear

In [23]:
# Linear Regression
linear_model2 = LinearRegression()
linear_model2.fit(X2_train, y2_train)
y2_pred_linear = linear_model2.predict(X2_test)
linear_mse2 = mean_squared_error(y2_test, y2_pred_linear)
linear_coefficients2 = linear_model2.coef_

# Append the linear regression results to the dataframe
results2_df = results2_df.append({'Alpha': 'N/A', 'Model': 'Linear Regression', 'MSE': linear_mse2, 
                                'Moral_Coefficient': linear_coefficients2[0], 
                                'Affect_Coefficient': linear_coefficients2[1], 
                                'ME_Coefficient': linear_coefficients2[2]}, ignore_index=True)

## Linear without Split

In [24]:
# Linear Regression
linear_model2 = LinearRegression()
linear_model2.fit(X2, y2)
linear_coefficients2 = linear_model2.coef_

# Append the linear regression results to the dataframe
results2_df = results2_df.append({'Alpha': 'N/A', 'Model': 'Linear Regression (no split)', 'MSE': 'N/A', 
                                'Moral_Coefficient': linear_coefficients2[0], 
                                'Affect_Coefficient': linear_coefficients2[1], 
                                'ME_Coefficient': linear_coefficients2[2]}, ignore_index=True)

In [25]:
results2_df

Unnamed: 0,Alpha,MSE,Moral_Coefficient,Affect_Coefficient,ME_Coefficient,Model
0,1e-05,0.280019,-0.486303,0.02519,-0.108598,Lasso Regression
1,0.0001,0.280128,-0.453358,0.025308,-0.10859,Lasso Regression
2,0.001,0.281496,-0.123903,0.026482,-0.10851,Lasso Regression
3,0.01,0.278304,-0.0,0.018867,-0.099527,Lasso Regression
4,0.1,0.265565,-0.0,0.0,-0.007724,Lasso Regression
5,0.5,0.265265,-0.0,0.0,-0.0,Lasso Regression
6,0.75,0.265265,-0.0,0.0,-0.0,Lasso Regression
7,1.0,0.265265,-0.0,0.0,-0.0,Lasso Regression
8,1e-05,0.280007,-0.489951,0.025177,-0.108599,Ridge Regression
9,0.0001,0.280007,-0.489838,0.025178,-0.1086,Ridge Regression


## Fitting 1-1 Linear Models

In [29]:
prob3_df = pd.read_csv("regression2_z.csv")
reg3_df = prob3_df.drop(columns=columns_to_drop)

In [30]:
reg3_df = reg3_df.drop(columns=['probability_1', 'Extremity'])

In [32]:
reg3_df.columns

Index(['probability_0', 'Moral_Percentage_zscore', 'Affect_Percentage_zscore',
       'ME_Percentage_zscore'],
      dtype='object')

In [35]:
target_column = 'probability_0'
predictor_columns = ['Moral_Percentage_zscore', 'Affect_Percentage_zscore', 'ME_Percentage_zscore']

results3_df = pd.DataFrame(columns=['Column', 'Model', 'Alpha', 'MSE', 'Coefficients'])

# Iterate over each predictor column
for column in predictor_columns:
    X = reg3_df[[column]].values
    y = reg3_df[target_column].values

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Linear Regression
    linear_model = LinearRegression()
    linear_model.fit(X_train, y_train)
    linear_predictions = linear_model.predict(X_test)
    linear_mse = mean_squared_error(y_test, linear_predictions)

    results3_df = results3_df.append({
        'Column': column,
        'Model': 'Linear Regression',
        'Alpha': 'N/A',
        'MSE': linear_mse,
        'Coefficients': linear_model.coef_[0]
    }, ignore_index=True)

    # Ridge Regression
    for alpha in alpha_values:
        ridge_model = Ridge(alpha=alpha)
        ridge_model.fit(X_train, y_train)
        ridge_predictions = ridge_model.predict(X_test)
        ridge_mse = mean_squared_error(y_test, ridge_predictions)

        results3_df = results3_df.append({
            'Column': column,
            'Model': 'Ridge Regression',
            'Alpha': alpha,
            'MSE': ridge_mse,
            'Coefficients': ridge_model.coef_[0]
        }, ignore_index=True)

    # Lasso Regression
    for alpha in alpha_values:
        lasso_model = Lasso(alpha=alpha)
        lasso_model.fit(X_train, y_train)
        lasso_predictions = lasso_model.predict(X_test)
        lasso_mse = mean_squared_error(y_test, lasso_predictions)

        results3_df = results3_df.append({
            'Column': column,
            'Model': 'Lasso Regression',
            'Alpha': alpha,
            'MSE': lasso_mse,
            'Coefficients': lasso_model.coef_[0]
        }, ignore_index=True)

In [36]:
results3_df

Unnamed: 0,Column,Model,Alpha,MSE,Coefficients
0,Moral_Percentage_zscore,Linear Regression,,0.263359,0.03078
1,Moral_Percentage_zscore,Ridge Regression,1e-05,0.263359,0.03078
2,Moral_Percentage_zscore,Ridge Regression,0.0001,0.263359,0.03078
3,Moral_Percentage_zscore,Ridge Regression,0.001,0.263359,0.03078
4,Moral_Percentage_zscore,Ridge Regression,0.01,0.26336,0.030778
5,Moral_Percentage_zscore,Ridge Regression,0.1,0.26336,0.030759
6,Moral_Percentage_zscore,Ridge Regression,0.5,0.263363,0.030676
7,Moral_Percentage_zscore,Ridge Regression,0.75,0.263365,0.030625
8,Moral_Percentage_zscore,Ridge Regression,1.0,0.263367,0.030573
9,Moral_Percentage_zscore,Lasso Regression,1e-05,0.26336,0.03077
