# My Functions

In [12]:
def matrix_unique_indices(parameter, header_letter):
    """
    :param parameter: variable that repeats itself in a grid (float or int)
    :param header_letter: select one letter from a to z to make reference to your parameter; this will be the header 
    (string)
    :return: a matrix in which the first line is the header, the second the actual value of the variable, and the 
             following indices in which this feature is repeated. 
    """

    import numpy as np

    unique_params = np.unique(parameter)

    indices_unique = []
    for i in range(unique_params.size):                 # Looking at every unique z at each time (50 z's)
        indices_unique.append(header_letter+"_%d" % i)  # Creating a list with the sequential number of each z
        indices_unique.append(unique_params[i])         # Adding the actual value of such redshift in the list above
        for j in range(parameter.size):                 # Now looking at the entire redshift grid of all z's (2500)
            if unique_params[i] == parameter[j]:        # Comparing every z (from the 2500) with each unique z (1 of 50)
                indices_unique.append(j)                # If comparison true, the list will append the memory location
            # of the z's that are equal to each unique z
            else:
                continue                                 # If not true, continue the loop for the next z within the same
            # unique z, until another unique z takes place
    new_matrix = np.array(indices_unique).reshape(unique_params.size,unique_params.size+2).T
    # Finally, my list will be enormous, incorporating z_number (1 to 50), z value, and 50 memory locations  where such
    # z appears in the entire 2500 grid of z's ; then transforming all that into a matrix that 'makes sense'.
    return new_matrix

In [13]:
def rearranging_other_features(new_matrix, mean, percentile02, percentile25, percentile50, percentile75, percentile97,
                               standard_deviation, parameter_name):
    """
    
    :param new_matrix: matrix with the indices pointing to the repeated values of the unique parameters.
    :param mean: array of all means of the original fit (say 2500)
    :param percentile02: array of all 02.5th percentile of the original fit file (say 2500)
    :param percentile25: idem for the 25.0th percentile
    :param percentile50: idem for the 50.0th percentile
    :param percentile75: idem for the 75.0th percentile
    :param percentile97: idem for the 97.5th percentile
    :param standard_deviation: idem for the standard deviation.
    :return: all parameters and the final matrix with the mean of all the aforementioned parameters for each unique 
    parameter inserted in the new_matrix.
    """

    import numpy as np
    import pandas as pd

    sd_i = []
    matrix = []
    mean_i = []
    percentile02_i = []
    percentile25_i = []
    percentile50_i = []
    percentile75_i = []
    percentile97_i = []

    for k in range(new_matrix[2:, 0].size):
        column = new_matrix[:, k]
        header = column[1:2]
        indices_col = column[2:]
        mean_probability = list(column[1:2])                # Maintaining the original header
        for m in indices_col:                               # Grouping the results that refer to repeated parameters
            mean_i.append(mean[int(m)])                     # Appending the mean of all the indices of column "m"
            percentile02_i.append(percentile02[int(m)])     # Appending the  2.5nd percentile of all indices of col "m"
            percentile25_i.append(percentile25[int(m)])     # Appending the 25.0th percentile of all indices of col "m"
            percentile50_i.append(percentile50[int(m)])     # Appending the 50.0th percentile of all indices of col "m"
            percentile75_i.append(percentile75[int(m)])     # Appending the 75.0th percentile of all indices of col "m"
            percentile97_i.append(percentile97[int(m)])     # Appending the 97.5th percentile of all indices of col "m"
            sd_i.append(standard_deviation[int(m)])         # Appending the standard deviation of all indices of col "m"
        prob = np.mean(np.array(mean_i).astype(float))         # Mean of the "mean"
        p02 = np.mean(np.array(percentile02_i).astype(float))  # Mean of the 2.5nd percentile for all indices in cal "m"
        p25 = np.mean(np.array(percentile25_i).astype(float))  # ...and so forth.
        p50 = np.mean(np.array(percentile50_i).astype(float))
        p75 = np.mean(np.array(percentile75_i).astype(float))
        p97 = np.mean(np.array(percentile97_i).astype(float))
        sdv = np.mean(np.array(sd_i).astype(float))

        mean_probability.append(prob)          # Appending each parameter at a time, so they can be joined horizontally
        mean_probability.append(p02)
        mean_probability.append(p25)
        mean_probability.append(p50)
        mean_probability.append(p75)
        mean_probability.append(p97)
        mean_probability.append(sdv)
        matrix.append(mean_probability)        # The final matrix

        remodled_data = {}
        remodled_data[parameter_name] = np.array(matrix).T[0]
        remodled_data['mean'] = np.array(matrix).T[1]
        remodled_data['perc_02.5%'] = np.array(matrix).T[2]
        remodled_data['perc_25.0%'] = np.array(matrix).T[3]
        remodled_data['perc_50.0%'] = np.array(matrix).T[4]
        remodled_data['perc_75.0%'] = np.array(matrix).T[5]
        remodled_data['perc_97.5%'] = np.array(matrix).T[6]
        remodled_data['std'] = np.array(matrix).T[7]
        remodled_data_df = pd.DataFrame(remodled_data)

    return (remodled_data_df)

# Libaries

In [14]:
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import seaborn           as sns
from mpl_toolkits.mplot3d import Axes3D

# Dataset

In [15]:
results = np.loadtxt('./Results/fit_results_3d_small_teste.csv', delimiter=',', dtype=str)

In [16]:
results_df = pd.DataFrame(results[1:,:])
results_df.columns = results[0,:]

In [17]:
parameter  = results_df['parameter'].astype(str)
redshift   = results_df['Z'].astype(float)
whan_class = results_df['WHAN_CLASS'].astype(int)
mean       = results_df['mean'].astype(float)
perc_2_5   = results_df['2.5%'].astype(float)
perc_25    = results_df['25%'].astype(float)
perc_50    = results_df['50%'].astype(float)
perc_75    = results_df['75%'].astype(float)
perc_97_5  = results_df['97.5%'].astype(float)
sd         = results_df['sd'].astype(float)

In [18]:
results_df

Unnamed: 0,parameter,mean,se_mean,sd,2.5%,25%,50%,75%,97.5%,n_eff,Rhat,Z,WHAN_CLASS
0,pnew[0],0.17,0.001,0.0497,0.0883,0.1342,0.1649,0.2009,0.2802,2624,1.0002,0.06794,0
1,pnew[1],0.1824,0.0009,0.0483,0.1012,0.1477,0.1782,0.2131,0.288,2699,1.0002,0.0736616326531,0
2,pnew[2],0.1953,0.0009,0.0468,0.1148,0.1619,0.1919,0.2252,0.2964,2795,1.0002,0.0793832653061,0
3,pnew[3],0.2085,0.0008,0.0452,0.1292,0.176,0.2058,0.2374,0.305,2915,1.0002,0.0851048979592,0
4,pnew[4],0.2221,0.0008,0.0436,0.1441,0.1911,0.2201,0.2502,0.3142,3064,1.0002,0.0908265306122,0
5,pnew[5],0.236,0.0007,0.0419,0.1598,0.2062,0.2344,0.2634,0.3228,3253,1.0002,0.0965481632653,0
6,pnew[6],0.25,0.0007,0.0404,0.1758,0.2212,0.2491,0.2763,0.3315,3489,1.0002,0.102269795918,0
7,pnew[7],0.2641,0.0006,0.0389,0.1918,0.2365,0.2634,0.2898,0.3423,3777,1.0002,0.107991428571,0
8,pnew[8],0.2783,0.0006,0.0377,0.207,0.2517,0.2774,0.3031,0.3549,4116,1.0001,0.113713061224,0
9,pnew[9],0.2924,0.0005,0.0368,0.2223,0.2669,0.2919,0.3166,0.3661,4488,1.0001,0.119434693878,0


# Regrouping the dataset based on unique values of each parameter

### For the first parameter: redshift

In [19]:
unique_z_idxs = matrix_unique_indices(redshift, 'z')

In [20]:
print unique_z_idxs

[['z_0' 'z_1' 'z_2' ..., 'z_47' 'z_48' 'z_49']
 ['0.06794' '0.0736616326531' '0.0793832653061' ..., '0.336856734694'
  '0.342578367347' '0.3483']
 ['0' '1' '2' ..., '47' '48' '49']
 ..., 
 ['2350' '2351' '2352' ..., '2397' '2398' '2399']
 ['2400' '2401' '2402' ..., '2447' '2448' '2449']
 ['2450' '2451' '2452' ..., '2497' '2498' '2499']]


In [22]:
fit_mean_params_z = rearranging_other_features(unique_z_idxs, mean, perc_2_5, perc_25, perc_50, perc_75, perc_97_5, 
                                               sd, 'redshift')

In [23]:
check = []
for k in range(unique_z_idxs[2:,0].size):
    check.append(unique_z_idxs[0,k])
    check.append(unique_z_idxs[1,k])
    column           = unique_z_idxs[:, k]
    header           = column[1:2]
    indices_col      = column[2:]
    for l in range(indices_col.size):
        idx_temp = int(indices_col[l])
#         if whan_class[idx_temp]==0:
        
        check.append(whan_class[idx_temp])
#         if whan_class==0:
#             median_class.append()
print np.array(check).reshape(50,52).T 
        
#         print whan_class.values[idx_temp]
#         print whan_class[l]
#         retrieving_whan.append(whan_class[l])
    
#     print retrieving_whan
        

[['z_0' 'z_1' 'z_2' ..., 'z_47' 'z_48' 'z_49']
 ['0.06794' '0.0736616326531' '0.0793832653061' ..., '0.336856734694'
  '0.342578367347' '0.3483']
 ['0' '0' '0' ..., '0' '0' '0']
 ..., 
 ['3' '3' '3' ..., '3' '3' '3']
 ['3' '3' '3' ..., '3' '3' '3']
 ['4' '4' '4' ..., '4' '4' '4']]


### For the first parameter: whan_class

In [None]:
print whan_class[unique_z_idxs[2:,:]]

In [None]:
unique_w_idxs = matrix_unique_indices(whan_class, 'w_class')

In [None]:
print unique_z_idxs

In [None]:
fit_mean_params_z = rearranging_other_features(unique_z_idxs, mean, perc_2_5, perc_25, perc_50, perc_75, perc_97_5, 
                                               sd, 'redshift')

In [None]:
u_redshift, u_mean, u_y02, u_y25, u_y50, u_y75, u_y97, remodled_data = fit_mean_params_z

# Analyzing the dataset

In [None]:
palette = ['#1b9e77','#d95f02','#7570b3']

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_trisurf(redshift.values, whan_class.values, mean.values, color='#a6611a', alpha=0.8, linewidth=0, antialiased=False)
ax.set_xlabel("\n z", fontsize=14)
ax.set_ylabel("\n Log M$_*$ (M$_\odot$)", fontsize=14)
ax.set_zlabel("\n Probability of \n UV upturn", fontsize=14)
for t in ax.yaxis.get_major_ticks(): t.label.set_fontsize(10)
ax.set_yticks([9.75, 10.25, 10.75, 11.25, 11.75])
ax.view_init(elev=22., azim=315)
# ax.zaxis.set_rotate_label(False)
# plt.savefig('./../../Figs/logit3D_grid.pdf', dpi=100)

# for ii in xrange(180,360,10):
#     ax.view_init(elev=10., azim=ii)
#     plt.savefig("./Results/movie_short/movie_0%d_0.png" % ii)
    
# plt.savefig("./movie_0%d.png" % 359)

plt.show()

In [None]:
plt.scatter(redshift.values[idx_sf], mean.values[idx_sf], alpha=0.2)
plt.xlabel("Redshift")
plt.ylabel("Probability")
# plt.savefig("./Results/dependency01.png")
plt.show()

In [None]:
sns.set_style("white")
plot01 = plt.
# plot02 = sns.swarmplot(y='PROB', x='WHAN_CLASS', data=bxplt)
# plt.xticks([0, 1, 2, 3, 4], ["NA", "RP", "wAGN", "sAGN", "SF"])
plt.xlabel("WHAN Class", fontsize=13)
plt.ylabel("Probability", fontsize=13)
plt.legend(loc='best')
plt.savefig("./Results/emlines_prob_teste.png")
plt.show()

In [None]:
print bxplt.loc[bxplt['WHAN_CLASS']==1]

In [None]:
sns.set_style("white")
# plot01 = sns.boxplot(y='PROB', x='WHAN_CLASS', data=bxplt, palette=palette,
#                      fliersize=4, orient="h")
plt.scatter(x=bxplt['WHAN_CLASS'], y=bxplt['PROB'])
plt.scatter(whan_class, new_q1)
plt.scatter(whan_class, new_q3)
# plot02 = sns.swarmplot(y='PROB', x='WHAN_CLASS', data=bxplt)
plt.xticks([0, 1, 2, 3, 4], ["NA", "RP", "wAGN", "sAGN", "SF"])
plt.xlabel("WHAN Class", fontsize=13)
plt.ylabel("Probability", fontsize=13)
plt.legend(loc='best')
# plt.savefig("./Results/emlines_prob_teste.png")
plt.show()

In [None]:
print whan_class.size, new_mean.size
print np.unique(whan_class), np.unique(new_mean)

In [None]:
sns.set_style("white")
plt.boxplot(x=np.unique(whan_class), usermedians=np.unique(new_mean), conf_intervals=np.column_stack((new_q1, new_q3)))
# plt.boxplot(x=new_mean[np.where(whan_class==1)])
plt.hist(new_mean[np.where(whan_class==0)], bins=100)
# plt.xticks([0, 1, 2, 3, 4], ["NA", "RP", "wAGN", "sAGN", "SF"])
# plt.xlabel("WHAN Class", fontsize=13)
# plt.ylabel("Probability", fontsize=13)
plt.legend(loc='best')
# plt.savefig("./Results/emlines_prob_teste.png")
plt.show()