In [9]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.decomposition import PCA
import cPickle as pickle

In [7]:
def convert_rgb(org_color):
    
    # org_color is type string: '174.199.232'
    # Original values are scaled 1-255
    # rgb_color is type tuple
    # Final color is scaled 0-1: (0.643,0.416,0.894)
    
    str_vec = org_color.strip().split('.')#type list - of strings
    num_vec = [float(s) for s in str_vec]#type list - of floats
    rgb_scale = [(n/255) for n in num_vec]#type list - of scaled floats
    
    # Note: Tried a regular expression...
    # It doesn't work - e.g.,:  t2 = [t[i] for i in t]
    # TypeError: tuple indices must be integers, not float
    rgb_tuple = (rgb_scale[0],rgb_scale[1],rgb_scale[2])
    return rgb_tuple
    

In [8]:
def tableau10():
    # returns a dict with color names as keys and converted rgb tuples as values
    #http://tableaufriction.blogspot.ro/2012/11/finally-you-can-use-tableau-data-colors.html

    tableau={}#dict
    color_key = [
    'dark_grey','med_grey','med2_grey','med3_grey','med4_grey','light_grey',
    'blue','orange','green','red','purple',
    'turq','puke','brown','lav',
    'dark_red','bright_red','fushia_red','salmon','blue_red',
    'cool_blue','purp_blue','med_blue','med2_blue','med3_blue',
    'lav_blue','sky_blue']
    color_str = [
    '50.50.50','75.75.75','100.100.100','125.125.125','150.150.150','200.200.200',
    '31.119.180','255.127.14','44.160.44','214.39.40','148.103.189',
    '23.190.207','188.189.34','140.86.75','227.119.194',
    '177.3.24','240.39.32','189.10.54','242.108.100','177.3.74',
    '44.105.176','144.158.206','107.163.214','137.183.214','95.158.209',
    '181.200.226','177.213.240']
    
    for idx in range(0,len(color_key)):
        #print(color_str[idx])
        tup = convert_rgb(color_str[idx])
        #tableau[color_key[idx]] = color_str[idx]
        tableau[color_key[idx]] = tup
    
    #For Displaying/debugging
    #print(tableau.items())
    #print(tableau['dark_grey'])#access by color-name
    return tableau

In [5]:
def pca_scatter(T,group,xlab,ylab):
    
    tab = tableau10()
    
    #fig = plt.figure()
    ax = plt.axes()
    plt.plot(T[group==0,0],T[group==0,1],color=tab['light_grey'],
             marker='D',ms=6, mec=tab['med2_grey'],
            linewidth=0, label='CTRL')
    plt.plot(T[group==1,0],T[group==1,1],color=tab['purple'],
             marker='D',ms=6, mec=tab['med2_grey'],lw=0, label='PTSD')
    
    plt.setp([ax.set_xlabel(xlab),ax.set_ylabel(ylab)],fontsize=11,color=tab['dark_grey'])
    plt.setp([ax.get_xticklines(),ax.get_yticklines()],color=tab['light_grey'])
    plt.setp([ax.get_xticklabels(),ax.get_yticklabels()],color=tab['med2_grey'],fontsize=9)
    
    leg = plt.legend(loc='upper center',fontsize=11, numpoints=1)
    leg.get_frame().set_edgecolor(tab['light_grey'])
    for text in leg.get_texts():
        plt.setp(text,color=tab['med3_grey'])
    
    #Change just one axis
    #ax.spines['bottom'].set_color((tab['light_grey']))
    
    plt.draw()
    plt.show()
    return

In [14]:
gammas = pickle.load(open('gammas.p','rb'))
norm_gammas = pickle.load(open('norm_gammas.p','rb'))
ids_gammas_df = pickle.load(open('ids_gammas_df.p','rb'))
ids_ngammas_df = pickle.load(open('ids_ngammas_df.p','rb'))

#ids_gammas_df.shape#(13081, 102)
#ids_ngammas_df.shape#(13081, 102)



In [33]:
# made in Extract_Reviews
reviews_df = pickle.load(open('ortho_review_df.p','rb'))

reviews_df.head(3)
stars = reviews_df.loc[:,'stars']
stars.shape
reviews_df.shape

(7320, 5)

In [None]:
# PCA - instantiate object and fit
pca = PCA(n_components=2)
pca.fit(ngammas)
print('\nPCA: % Variance Explained: ',np.round(pca.explained_variance_ratio_*100,3))

# components are eigenvectors
EV = np.round((pca.components_),5).T

# PROJECTED GAMMAS
# Use this d x k eigenvector matrix to transform the original samples onto the component subspace. 
# Arg for transform is projected on the principal components extracted from the training set
X_pca = pca.transform(ngammas)# gammas mapped onto new feature space

print('\nPCA Component Loadings (Eigenvectors): \n')
for i in range(0,len(EV)):
    print(EV[i,:],'   ','Topic: ',i)
#print('\nTransformed X: \n', T[:6])
xlabel='PCA 1';ylabel='PCA 2'
    
pca_scatter(X_pca, group, xlabel, ylabel)