In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import yfinance as yf

# List of stocks
stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']

# Download historical stock data
data = yf.download(stocks, start='2023-01-01', end='2025-01-01', group_by='ticker', auto_adjust=True)

# Extract Adjusted Close prices for all tickers
adj_close = pd.DataFrame()

for stock in stocks:
    adj_close[stock] = data[stock]['Close']  # Using 'Close' after auto_adjust=True

# Display first few rows
print(adj_close.head())

# Calculate daily percentage change (returns)
returns = adj_close.pct_change().dropna()

# Display first few rows
print(returns.head())
# Annualized Return
mean_returns = returns.mean() * 252  # 252 trading days per year

# Annualized Volatility
volatility = returns.std() * np.sqrt(252)

# Optional: Sharpe Ratio (risk-adjusted return)
risk_free_rate = 0.03  # 3% example
sharpe_ratio = (mean_returns - risk_free_rate) / volatility

# Combine into a DataFrame
risk_data = pd.DataFrame({
    'Return': mean_returns,
    'Volatility': volatility,
    'Sharpe': sharpe_ratio
})

# Display risk metrics
print(risk_data)

from sklearn.cluster import KMeans

# Use only Return and Volatility for clustering
kmeans = KMeans(n_clusters=3, random_state=42)
risk_data['Cluster'] = kmeans.fit_predict(risk_data[['Return', 'Volatility']])

# Display the DataFrame with cluster labels
print(risk_data)


# Set plot size
plt.figure(figsize=(10,6))

# Define colors for clusters
colors = ['red', 'green', 'blue']

# Plot each cluster
for cluster in risk_data['Cluster'].unique():
    cluster_data = risk_data[risk_data['Cluster'] == cluster]
    plt.scatter(cluster_data['Volatility'], cluster_data['Return'], 
                s=100, c=colors[cluster], label=f'Cluster {cluster}')

# Add stock symbols as labels
for i in risk_data.index:
    plt.text(x=risk_data['Volatility'][i]+0.001, 
             y=risk_data['Return'][i], 
             s=i, fontsize=9)

# Labels, title, legend
plt.xlabel('Annualized Volatility')
plt.ylabel('Annualized Return')
plt.title('Financial Risk Clustering using K-Means')
plt.legend()
plt.grid(True)
plt.show()


# Map cluster numbers to risk levels
cluster_risk_map = {0: 'Low Risk', 1: 'Medium Risk', 2: 'High Risk'}
risk_data['Risk_Level'] = risk_data['Cluster'].map(cluster_risk_map)

# Display the final DataFrame
print(risk_data)

# Correlation of daily returns
correlation = returns.corr()

# Plot correlation matrix using Matplotlib
plt.figure(figsize=(8,6))
plt.imshow(correlation, cmap='coolwarm', interpolation='none')
plt.colorbar(label='Correlation')
plt.xticks(range(len(correlation)), correlation.columns, rotation=45)
plt.yticks(range(len(correlation)), correlation.columns)
plt.title('Stock Correlation Matrix')
plt.show()


# Save risk metrics with cluster and risk labels
risk_data.to_csv('risk_analysis.csv', index=True)

# Save the cluster plot (from Step 6)
plt.figure(figsize=(10,6))
colors = ['red', 'green', 'blue']
for cluster in risk_data['Cluster'].unique():
    cluster_data = risk_data[risk_data['Cluster'] == cluster]
    plt.scatter(cluster_data['Volatility'], cluster_data['Return'], 
                s=100, c=colors[cluster], label=f'Cluster {cluster}')
for i in risk_data.index:
    plt.text(x=risk_data['Volatility'][i]+0.001, 
             y=risk_data['Return'][i], 
             s=i, fontsize=9)
plt.xlabel('Annualized Volatility')
plt.ylabel('Annualized Return')
plt.title('Financial Risk Clustering using K-Means')
plt.legend()
plt.grid(True)
plt.savefig('cluster_plot.png')
plt.show()

