In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import quandl
import numpy as np
from sklearn.cluster import SpectralClustering 
from sklearn.preprocessing import StandardScaler, normalize 
from sklearn.decomposition import PCA 
from sklearn.metrics import silhouette_score
from math import sqrt

In [None]:
prices_df = pd.read_csv("C:\k.v.singh\SJSU\Fall_2019\CMPE257\machine-learning-stock-market\one_year_data.csv", index_col=[0], parse_dates=[0])
returns = prices_df.pct_change().mean() * 252
returns = pd.DataFrame(returns)
#print(returns)
returns.columns = ['Returns']
print (returns.columns)
returns['Volatility'] = prices_df.pct_change().std() * sqrt(252)
data = np.asarray([np.asarray(returns['Returns']),np.asarray(returns['Volatility'])]).T
#removing NaN values, replacing them by 0
cleaned_data = np.where(np.isnan(data), 0, data)

In [None]:
scaler = StandardScaler() 
X_scaled = scaler.fit_transform(cleaned_data) 

#print(X_scaled)
X_normalized = normalize(X_scaled) 
X_normalized = pd.DataFrame(X_normalized) 
  
# Reducing the dimensions of the data 
pca = PCA(n_components = 2) 
X_principal = pca.fit_transform(X_normalized) 
X_principal = pd.DataFrame(X_principal) 
X_principal.columns = ['P1', 'P2'] 
  
X_principal.head() 

In [None]:
#Building the cluster model
spectral_model_rbf = SpectralClustering(n_clusters = 2, affinity ='rbf') 
  
# Training the model and Storing the predicted cluster labels 
labels_rbf = spectral_model_rbf.fit_predict(X_principal) 
# Building the label to colour mapping 
colours = {} 
colours[0] = 'b'
colours[1] = 'y'
  
# Building the colour vector for each data point 
cvec = [colours[label] for label in labels_rbf] 
  
# Plotting the clustered scatter plot 
  
b = plt.scatter(X_principal['P1'], X_principal['P2'], color ='b'); 
y = plt.scatter(X_principal['P1'], X_principal['P2'], color ='y'); 
  
plt.figure(figsize =(6, 6)) 
plt.scatter(X_principal['P1'], X_principal['P2'], c = cvec) 
plt.legend((b,y), ('Label 0', 'Label 1')) 
#plt.gca().legend(('Label 0', 'Label 1')) 
#plt.show() 

In [None]:
# Building the clustering model 
spectral_model_nn = SpectralClustering(n_clusters = 2, affinity ='nearest_neighbors') 
  
# Training the model and Storing the predicted cluster labels 
labels_nn = spectral_model_nn.fit_predict(X_principal) 

# Building the label to colour mapping 
colours = {} 
colours[0] = 'g'
colours[1] = 'r'
  
# Building the colour vector for each data point 
cvec = [colours[label] for label in labels_nn] 
  
# Plotting the clustered scatter plot 
  
b = plt.scatter(X_principal['P1'], X_principal['P2'], color ='g'); 
#y = plt.scatter(X_principal['P1'], X_principal['P2'], color ='r'); 
  
plt.figure(figsize =(6, 6)) 
plt.scatter(X_principal['P1'], X_principal['P2'], c = cvec) 
plt.legend((b, y), ('Label 0', 'Label 1')) 
plt.show() 

In [None]:
# List of different values of affinity 
affinity = ['rbf', 'nearest-neighbours'] 
  
# List of Silhouette Scores 
s_scores = [] 
  
# Evaluating the performance 
s_scores.append(silhouette_score(cleaned_data, labels_rbf)) 
s_scores.append(silhouette_score(cleaned_data, labels_nn)) 
  
print(s_scores) 

In [None]:
# Plotting a Bar Graph to compare the models 
plt.bar(affinity, s_scores) 
plt.xlabel('Affinity') 
plt.ylabel('Silhouette Score') 
plt.title('Comparison of different Clustering Models') 
plt.show() 