In [19]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MultipleLocator
from scipy.stats.stats import pearsonr
from pandas import DataFrame

path = '/Users/emilysf098/Desktop/Research/SN-Spectral-Evolution'

# Making Pearson Tables

In [20]:
sdss_pearson = pd.read_csv(path + '/results/sdss_combined.csv')
features = [f'pW{i}' for i in range(1, 9)]
from scipy.stats.stats import pearsonr

CIDs = list(set(sdss_pearson['obj_id']))
feats = ['pW1', 'pW2', 'pW3', 'pW4', 'pW5', 'pW6', 'pW7']

In [21]:
def pearson_val(xcol, ycol):
    """Creates lists of pearson statistic p and c values for each feature for any two given columns
        used in pearson_table_make function
    
    Args:
        df     (DataFrame): The dataframe in which the columns we want to compare are
        features (List[str]): The feature ids to add to table
        xcol           (str): Name of one column to compare (pEW or vel)
        ycol           (str): Name of second column to compare (pEW or vel)
    
    Returns:
        p_list   (List[float]): List of P values
        c_list   (List[float]): List of c values
    """
    
    if len(xcol) >= 3:
        corr, p = pearsonr(xcol, ycol)
        p_val = p
        c_val = corr
            
    else:
        p_val = np.nan
        c_val = np.nan
        
    return p_val, c_val
        

In [22]:
def two_feat_table2(feat1, feat2, data):
    valid_obj = []
    for cid in CIDs:
        df = data[data['obj_id'] == cid]
        if feat1 in list(df['feat_name']) and feat2 in list(df['feat_name']):
            valid_obj.append(cid)
        else:
            pass
    indexed_df = data.set_index('obj_id')
    new_df = indexed_df.loc[valid_obj, :]
    return new_df[new_df['feat_name'] == feat1] , new_df[new_df['feat_name'] == feat2]

In [23]:
def pearson_table_make(df, features, qual):
    """Creates data frame of p and c pearson stat values for specific columns vs. pew and vel
    
    Args:
        df     (DataFrame): The dataframe in which the columns we want to compare are
        features (List[str]): The feature ids to add to table 
        qual     (str): string either pew or vel
        
    Returns:
        data    (DataFrame): Table of p and c values for different qualities vs pew and vel.
    """
        
    data = pd.DataFrame()
    data['features'] = features
    
    for feat1 in features:
       # feat_df1 = df[df['feat_name'] == feat1]
        feat_1_p = []
        feat_1_c = []
        
        for feat2 in features:
          #  feat_df2 = df[df['feat_name'] == feat2]
            
            valid_obj = []
            
            feat_df1, feat_df2 = two_feat_table2(feat1, feat2, df)
            
            feat_df1 = feat_df1.reset_index()
            feat_df2 = feat_df2.reset_index()
            
            
            col1 = feat_df1[qual]
            col2 = feat_df2[qual]

            nans1 = np.isnan(col1)
            nans2 = np.isnan(col2)
            
            is_nan = np.logical_not(np.logical_or(nans1,nans2))
            
            col1 = col1[is_nan]
            col2 = col2[is_nan]
            
            p_val, c_val = pearson_val(list(col1), list(col2))
            feat_1_p.append(p_val)
            feat_1_c.append(c_val)
         
        data[f'{feat1}_c_{qual}'] = feat_1_c
        data[f'{feat1}_p_{qual}'] = feat_1_p
    
    data = data.set_index('features')

    return data

In [24]:
vel_comparison = pearson_table_make(sdss_pearson, features, 'vel')
vel_comparison.to_csv(path + '/results/vel_pearson_comparison.csv')
vel_comparison

Unnamed: 0_level_0,pW1_c_vel,pW1_p_vel,pW2_c_vel,pW2_p_vel,pW3_c_vel,pW3_p_vel,pW4_c_vel,pW4_p_vel,pW5_c_vel,pW5_p_vel,pW6_c_vel,pW6_p_vel,pW7_c_vel,pW7_p_vel,pW8_c_vel,pW8_p_vel
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
pW1,1.0,0.0,0.178367,0.1003609,0.300869,0.001266773,0.338087,0.0004197353,0.249356,0.01428543,0.003963,0.971457,0.278635,0.009382206,,
pW2,0.178367,0.100361,1.0,0.0,0.514196,9.826132e-08,0.356056,0.0006159414,0.400572,0.0001325206,0.286105,0.011653,0.60938,1.990438e-09,,
pW3,0.300869,0.001267,0.514196,9.826132e-08,1.0,0.0,0.219601,0.01641102,0.487373,7.677899e-08,-0.015474,0.881055,0.36811,0.0001777875,0.751169,0.458981
pW4,0.338087,0.00042,0.356056,0.0006159414,0.219601,0.01641102,1.0,0.0,0.230695,0.01735154,0.213592,0.039808,0.487613,4.686202e-07,,
pW5,0.249356,0.014285,0.400572,0.0001325206,0.487373,7.677899e-08,0.230695,0.01735154,1.0,0.0,0.082896,0.429546,0.454137,3.791004e-06,0.625168,0.570061
pW6,0.003963,0.971457,0.286105,0.01165349,-0.015474,0.8810548,0.213592,0.03980829,0.082896,0.4295456,1.0,0.0,0.317917,0.002693575,0.336043,0.781822
pW7,0.278635,0.009382,0.60938,1.990438e-09,0.36811,0.0001777875,0.487613,4.686202e-07,0.454137,3.791004e-06,0.317917,0.002694,1.0,0.0,0.730247,0.47881
pW8,,,,,0.751169,0.4589806,,,0.625168,0.5700612,0.336043,0.781822,0.730247,0.4788099,1.0,0.0


In [25]:
pew_comparison = pearson_table_make(sdss_pearson, features, 'pew')
pew_comparison.to_csv(path + '/results/pew_pearson_comparison.csv')
pew_comparison

Unnamed: 0_level_0,pW1_c_pew,pW1_p_pew,pW2_c_pew,pW2_p_pew,pW3_c_pew,pW3_p_pew,pW4_c_pew,pW4_p_pew,pW5_c_pew,pW5_p_pew,pW6_c_pew,pW6_p_pew,pW7_c_pew,pW7_p_pew,pW8_c_pew,pW8_p_pew
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
pW1,1.0,0.0,0.226784,0.01772144,0.298746,0.001243767,0.299942,0.001696487,0.289843,0.003281962,0.13432,0.1968033,0.338061,0.001275873,,
pW2,0.226784,0.017721,1.0,0.0,0.392986,1.072958e-05,0.633236,8.815394e-14,0.333754,0.0004717998,0.482519,4.244932e-07,0.625309,2.678319e-11,0.426697,0.719354
pW3,0.298746,0.001244,0.392986,1.072958e-05,1.0,0.0,0.634684,7.03678e-15,0.585424,1.207484e-11,0.523252,1.023355e-08,0.606919,2.755251e-11,-0.586617,0.600919
pW4,0.299942,0.001696,0.633236,8.815394e-14,0.634684,7.03678e-15,1.0,0.0,0.552665,3.850954e-10,0.581451,1.193413e-10,0.708146,4.986467e-16,0.722177,0.486283
pW5,0.289843,0.003282,0.333754,0.0004717998,0.585424,1.207484e-11,0.552665,3.850954e-10,1.0,0.0,0.405036,1.820256e-05,0.546833,5.696975e-09,0.981608,0.122285
pW6,0.13432,0.196803,0.482519,4.244932e-07,0.523252,1.023355e-08,0.581451,1.193413e-10,0.405036,1.820256e-05,1.0,0.0,0.670336,1.470127e-13,-0.995779,0.058516
pW7,0.338061,0.001276,0.625309,2.678319e-11,0.606919,2.755251e-11,0.708146,4.986467e-16,0.546833,5.696975e-09,0.670336,1.470127e-13,1.0,0.0,0.998364,0.036425
pW8,,,0.426697,0.7193543,-0.586617,0.6009188,0.722177,0.4862835,0.981608,0.122285,-0.995779,0.05851629,0.998364,0.03642466,1.0,0.0


In [26]:
sdss_cl = sdss_pearson[sdss_pearson['branch_type'] == 'CL']
sdss_bl = sdss_pearson[sdss_pearson['branch_type'] == 'BL']
sdss_ss = sdss_pearson[sdss_pearson['branch_type'] == 'SS']
sdss_cn = sdss_pearson[sdss_pearson['branch_type'] == 'CN']

In [27]:
pew_comparison_cl = pearson_table_make(sdss_cl, features, 'pew')
pew_comparison_cl.to_csv(path + '/results/pew_pearson_comparison_cl.csv')

pew_comparison_bl = pearson_table_make(sdss_bl, features, 'pew')
pew_comparison_bl.to_csv(path + '/results/pew_pearson_comparison_bl.csv')

pew_comparison_ss = pearson_table_make(sdss_ss, features, 'pew')
pew_comparison_ss.to_csv(path + '/results/pew_pearson_comparison_ss.csv')

pew_comparison_cn = pearson_table_make(sdss_cn, features, 'pew')
pew_comparison_cn.to_csv(path + '/results/pew_pearson_comparison_cn.csv')

In [28]:
vel_comparison_cl = pearson_table_make(sdss_cl, features, 'vel')
vel_comparison_cl.to_csv(path + '/results/vel_pearson_comparison_cl.csv')

vel_comparison_bl = pearson_table_make(sdss_bl, features, 'vel')
vel_comparison_bl.to_csv(path + '/results/vel_pearson_comparison_bl.csv')

vel_comparison_ss = pearson_table_make(sdss_ss, features, 'vel')
vel_comparison_ss.to_csv(path + '/results/vel_pearson_comparison_ss.csv')

vel_comparison_cn = pearson_table_make(sdss_cn, features, 'vel')
vel_comparison_cn.to_csv(path + '/results/vel_pearson_comparison_cn.csv')