In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import numpy as np

# 1. Load price data
prices = pd.read_csv('D:\\Downloads\\prices.csv', parse_dates=['Date'], index_col='Date')

# 2. Compute daily returns and drop first NaNs
returns = prices.pct_change().dropna()

# 3. Feature engineering: mean return & volatility
features = pd.DataFrame({
    'mean_return': returns.mean(),
    'volatility': returns.std()
})


In [3]:
#Cluster Stocks by Risk/Return Profile
# 1. Scale features
scaler = StandardScaler()
X = scaler.fit_transform(features)

# 2. KMeans clustering
kmeans = KMeans(n_clusters=2, random_state=42)
features['cluster'] = kmeans.fit_predict(X)

print(features)


      mean_return  volatility  cluster
AAPL     0.002407    0.011772        1
MSFT     0.000077    0.014341        0
GOOG     0.004951    0.014620        1
AMZN     0.004396    0.007048        1
FB       0.003408    0.013083        1




In [5]:
#Pairwise Similarity via Correlation
# 1. Compute correlation matrix of returns
corr_matrix = returns.corr()

# 2. Recommendation function based on correlation
def recommend_by_correlation(ticker, corr_mat, top_n=3):
    if ticker not in corr_mat:
        return []
    sims = corr_mat[ticker].drop(index=ticker).sort_values(ascending=False)
    return list(sims.head(top_n).index)

# Example: top 3 tickers most correlated with AAPL
print("Stocks similar to AAPL:", recommend_by_correlation('AAPL', corr_matrix))


Stocks similar to AAPL: ['MSFT', 'FB', 'GOOG']


In [6]:
#Putting It All Together
def recommend_stocks(ticker, features, corr_mat, top_n=3):
    # 1. Cluster-based recommendations
    cl = features.loc[ticker, 'cluster']
    same_cluster = features[features['cluster']==cl].index.drop(ticker)
    
    # 2. Correlation-based recommendations
    corr_rec = recommend_by_correlation(ticker, corr_mat, top_n)
    
    return {
        'cluster_peers': list(same_cluster),
        'correlation_peers': corr_rec
    }

recs = recommend_stocks('AAPL', features, corr_matrix)
print("Cluster peers for AAPL:", recs['cluster_peers'])
print("Correlation peers for AAPL:", recs['correlation_peers'])


Cluster peers for AAPL: ['GOOG', 'AMZN', 'FB']
Correlation peers for AAPL: ['MSFT', 'FB', 'GOOG']
