In [1]:
pip install --upgrade yfinance



In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns

# ==============================================
# PART 0: INSTALL REQUIRED PACKAGES
# ==============================================

try:
    import kaleido
except ImportError:
    print("Installing Kaleido for image export...")
    !pip install -U kaleido
    import kaleido

# ==============================================
# PART 1: DATA DOWNLOAD AND PREPROCESSING
# ==============================================

# Define assets
assets = {
    "Gold": "GC=F",
    "Oil": "CL=F",
    "Bitcoin": "BTC-USD",
    "ICICI Bank": "ICICIBANK.NS"
}

# Define time periods
covid_period = ("2020-01-01", "2021-12-31")
post_covid_period = ("2022-01-01", "2025-04-30")

# Download data function with error handling
def download_data(assets, start_date, end_date):
    try:
        df = yf.download(list(assets.values()), start=start_date, end=end_date)["Close"]
        df.columns = assets.keys()
        return df.dropna()
    except Exception as e:
        print(f"Error downloading data: {e}")
        return None

# Get data for both periods
print("Downloading COVID period data...")
covid_data = download_data(assets, *covid_period)
print("Downloading post-COVID data...")
post_covid_data = download_data(assets, *post_covid_period)

# Check if data was downloaded successfully
if covid_data is None or post_covid_data is None:
    raise ValueError("Failed to download data. Please check your internet connection and try again.")

# Calculate weekly returns
covid_returns = covid_data.resample('W').last().pct_change().dropna()
post_covid_returns = post_covid_data.resample('W').last().pct_change().dropna()

# ==============================================
# PART 2: CORRELATION ANALYSIS
# ==============================================

def plot_correlation_matrix(returns, title):
    corr = returns.corr()
    fig = px.imshow(
        corr,
        text_auto=".2f",
        color_continuous_scale='RdBu',
        zmin=-1,
        zmax=1,
        title=f"Correlation Matrix: {title} Period"
    )
    fig.update_layout(template="plotly_dark")
    fig.show()
    return corr

# Generate correlation matrices
print("\nGenerating correlation matrices...")
covid_corr = plot_correlation_matrix(covid_returns, "COVID (2020-2021)")
post_covid_corr = plot_correlation_matrix(post_covid_returns, "Post-COVID (2022-2025)")

# ==============================================
# PART 3: COMPARATIVE VISUALIZATIONS
# ==============================================

def plot_comparative_scatter(asset1, asset2):
    fig = make_subplots(rows=1, cols=2, subplot_titles=[
        f"COVID Period: {asset1} vs {asset2}",
        f"Post-COVID Period: {asset1} vs {asset2}"
    ])

    # COVID period
    fig.add_trace(
        go.Scatter(
            x=covid_returns[asset1],
            y=covid_returns[asset2],
            mode='markers',
            name='COVID',
            marker=dict(color='red')
        ),
        row=1, col=1
    )

    # Post-COVID period
    fig.add_trace(
        go.Scatter(
            x=post_covid_returns[asset1],
            y=post_covid_returns[asset2],
            mode='markers',
            name='Post-COVID',
            marker=dict(color='blue')
        ),
        row=1, col=2
    )

    fig.update_xaxes(title_text=f"{asset1} Returns", row=1, col=1)
    fig.update_yaxes(title_text=f"{asset2} Returns", row=1, col=1)
    fig.update_xaxes(title_text=f"{asset1} Returns", row=1, col=2)
    fig.update_yaxes(title_text=f"{asset2} Returns", row=1, col=2)

    fig.update_layout(
        title=f"Comparative Analysis: {asset1} vs {asset2}",
        template="plotly_dark",
        showlegend=False
    )

    fig.show()

# Plot key pairs
print("\nGenerating comparative scatter plots...")
plot_comparative_scatter("Gold", "Oil")
plot_comparative_scatter("Gold", "Bitcoin")
plot_comparative_scatter("Oil", "ICICI Bank")
plot_comparative_scatter("Oil", "Bitcoin")
plot_comparative_scatter("Gold", "ICICI Bank")
plot_comparative_scatter("Bitcoin", "ICICI Bank")



# ==============================================
# PART 4: ROLLING CORRELATION ANALYSIS
# ==============================================

def plot_rolling_correlation(asset1, asset2, window=12):
    covid_rolling = covid_data[asset1].pct_change().rolling(window).corr(covid_data[asset2].pct_change())
    print(covid_data[asset2].pct_change())
    #covid_rolling = covid_data[asset1].rolling(window).corr(covid_data[asset2])
    post_covid_rolling = post_covid_data[asset1].pct_change().rolling(window).corr(post_covid_data[asset2].pct_change())


    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=covid_rolling.index,
        y=covid_rolling,
        name="COVID Period",
        line=dict(color='red')
    ))

    fig.add_trace(go.Scatter(
        x=post_covid_rolling.index,
        y=post_covid_rolling,
        name="Post-COVID Period",
        line=dict(color='blue')
    ))


    fig.update_layout(
        title=f"{window}-Week Rolling Correlation: {asset1} vs {asset2}",
        xaxis_title="Date",
        yaxis_title="Correlation",
        template="plotly_dark",
        hovermode="x unified"
    )


    fig.show()

print("\nGenerating rolling correlations...")
plot_rolling_correlation("Gold", "Oil")
plot_rolling_correlation("Bitcoin", "ICICI Bank")
plot_rolling_correlation("Gold", "Bitcoin")
plot_rolling_correlation("Oil", "ICICI Bank")
plot_rolling_correlation("Oil", "Bitcoin")
plot_rolling_correlation("Gold", "ICICI Bank")



# ==============================================
# PART 5: DIFFERENCE IN CORRELATIONS
# ==============================================

# Calculate correlation differences
corr_diff = post_covid_corr - covid_corr

# Plot the difference
fig = px.imshow(
    corr_diff,
    text_auto=".2f",
    color_continuous_scale='RdBu',
    zmin=-1,
    zmax=1,
    title="Change in Correlations: Post-COVID vs COVID Period"
)
fig.update_layout(template="plotly_dark")
fig.show()

# ==============================================
# PART 6: GENERATE REPORT READY OUTPUTS
# ==============================================

def save_visualizations():
    """Save visualizations with error handling"""
    try:
        print("\nSaving visualizations...")

        # Save correlation matrices
        fig = px.imshow(covid_corr, text_auto=".2f", color_continuous_scale='RdBu')
        fig.write_image("covid_correlation.png", engine="kaleido", scale=2)

        fig = px.imshow(post_covid_corr, text_auto=".2f", color_continuous_scale='RdBu')
        fig.write_image("post_covid_correlation.png", engine="kaleido", scale=2)

        fig = px.imshow(corr_diff, text_auto=".2f", color_continuous_scale='RdBu')
        fig.write_image("correlation_difference.png", engine="kaleido", scale=2)

        # Save sample scatter plot
        fig = make_subplots(rows=1, cols=2)
        fig.add_trace(go.Scatter(x=covid_returns["Gold"], y=covid_returns["Oil"], mode='markers'), row=1, col=1)
        fig.add_trace(go.Scatter(x=post_covid_returns["Gold"], y=post_covid_returns["Oil"], mode='markers'), row=1, col=2)
        fig.write_image("gold_oil_scatter.png", engine="kaleido", scale=2)

        print("Successfully saved visualizations as PNG files")

    except Exception as e:
        print(f"\nError saving images: {e}")
        print("Using matplotlib as fallback...")

        # Fallback using matplotlib
        plt.figure(figsize=(10, 8))
        sns.heatmap(covid_corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
        plt.title("COVID Period Correlation")
        plt.savefig("covid_correlation.png", dpi=300, bbox_inches='tight')
        plt.close()

        plt.figure(figsize=(10, 8))
        sns.heatmap(post_covid_corr, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
        plt.title("Post-COVID Period Correlation")
        plt.savefig("post_covid_correlation.png", dpi=300, bbox_inches='tight')
        plt.close()

        plt.figure(figsize=(10, 8))
        sns.heatmap(corr_diff, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
        plt.title("Correlation Differences")
        plt.savefig("correlation_difference.png", dpi=300, bbox_inches='tight')
        plt.close()

        print("Saved fallback visualizations using matplotlib")

save_visualizations()

# Generate summary statistics table
summary_table = pd.DataFrame({
    "Asset Pair": [f"{i} vs {j}" for i in assets for j in assets if i != j],
    "COVID Corr": [covid_corr.loc[i, j] for i in assets for j in assets if i != j],
    "Post-COVID Corr": [post_covid_corr.loc[i, j] for i in assets for j in assets if i != j],
    "Difference": [corr_diff.loc[i, j] for i in assets for j in assets if i != j]
})

print("\nCorrelation Summary Table:")
print(summary_table.to_markdown(index=False))

# Save summary table to CSV
summary_table.to_csv("correlation_summary.csv", index=False)

# ==============================================
# PART 7: KEY INSIGHTS
# ==============================================

print("\nKey Insights:")
print(f"1. Gold-Oil correlation changed from {covid_corr.loc['Gold', 'Oil']:.2f} (COVID) to {post_covid_corr.loc['Gold', 'Oil']:.2f} (Post-COVID)")
print(f"2. Bitcoin-Gold correlation increased by {corr_diff.loc['Gold', 'Bitcoin']:.2f}")
print(f"3. ICICI Bank's correlation with Oil changed by {corr_diff.loc['Oil', 'ICICI Bank']:.2f}")

print("\nAnalysis complete! Check your directory for:")
print("- COVID correlation heatmap (covid_correlation.png)")
print("- Post-COVID correlation heatmap (post_covid_correlation.png)")
print("- Correlation differences (correlation_difference.png)")
print("- Gold vs Oil scatter plot (gold_oil_scatter.png)")
print("- Summary table (correlation_summary.csv)")







Downloading COVID period data...
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  4 of 4 completed


Downloading post-COVID data...


[*********************100%***********************]  4 of 4 completed



Generating correlation matrices...



Generating comparative scatter plots...



Generating rolling correlations...
Date
2020-01-02         NaN
2020-01-03    0.030566
2020-01-06    0.003489
2020-01-07   -0.009009
2020-01-08   -0.049282
                ...   
2021-12-23    0.014156
2021-12-27    0.024122
2021-12-28    0.005425
2021-12-29    0.007634
2021-12-30    0.005617
Name: Oil, Length: 483, dtype: float64


Date
2020-01-02         NaN
2020-01-03   -0.003237
2020-01-06   -0.024404
2020-01-07   -0.005326
2020-01-08    0.005833
                ...   
2021-12-23   -0.002047
2021-12-27    0.006427
2021-12-28   -0.000272
2021-12-29   -0.000136
2021-12-30    0.000000
Name: ICICI Bank, Length: 483, dtype: float64


Date
2020-01-02         NaN
2020-01-03    0.016202
2020-01-06    0.010973
2020-01-07    0.003576
2020-01-08   -0.009161
                ...   
2021-12-23    0.005329
2021-12-27   -0.001712
2021-12-28    0.001161
2021-12-29   -0.002817
2021-12-30    0.004210
Name: Bitcoin, Length: 483, dtype: float64


Date
2020-01-02         NaN
2020-01-03   -0.003237
2020-01-06   -0.024404
2020-01-07   -0.005326
2020-01-08    0.005833
                ...   
2021-12-23   -0.002047
2021-12-27    0.006427
2021-12-28   -0.000272
2021-12-29   -0.000136
2021-12-30    0.000000
Name: ICICI Bank, Length: 483, dtype: float64


Date
2020-01-02         NaN
2020-01-03    0.016202
2020-01-06    0.010973
2020-01-07    0.003576
2020-01-08   -0.009161
                ...   
2021-12-23    0.005329
2021-12-27   -0.001712
2021-12-28    0.001161
2021-12-29   -0.002817
2021-12-30    0.004210
Name: Bitcoin, Length: 483, dtype: float64


Date
2020-01-02         NaN
2020-01-03   -0.003237
2020-01-06   -0.024404
2020-01-07   -0.005326
2020-01-08    0.005833
                ...   
2021-12-23   -0.002047
2021-12-27    0.006427
2021-12-28   -0.000272
2021-12-29   -0.000136
2021-12-30    0.000000
Name: ICICI Bank, Length: 483, dtype: float64



Saving visualizations...
Successfully saved visualizations as PNG files

Correlation Summary Table:
| Asset Pair            |   COVID Corr |   Post-COVID Corr |   Difference |
|:----------------------|-------------:|------------------:|-------------:|
| Gold vs Oil           |    0.217676  |         0.0232389 |   -0.194437  |
| Gold vs Bitcoin       |    0.226872  |         0.137019  |   -0.0898532 |
| Gold vs ICICI Bank    |    0.129619  |         0.247811  |    0.118192  |
| Oil vs Gold           |    0.217676  |         0.0232389 |   -0.194437  |
| Oil vs Bitcoin        |    0.0896907 |         0.265467  |    0.175776  |
| Oil vs ICICI Bank     |    0.123163  |        -0.0779222 |   -0.201085  |
| Bitcoin vs Gold       |    0.226872  |         0.137019  |   -0.0898532 |
| Bitcoin vs Oil        |    0.0896907 |         0.265467  |    0.175776  |
| Bitcoin vs ICICI Bank |    0.163815  |         0.0270913 |   -0.136723  |
| ICICI Bank vs Gold    |    0.129619  |         0.247811  |   

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np

tickers = {
    "Gold": "GC=F",
    "Oil": "CL=F",
    "Bitcoin": "BTC-USD",
    "ICICI Bank": "ICICIBANK.NS"
}

start_date = "2024-04-30"
end_date = "2025-04-30"

print(f"Downloading data from {start_date} to {end_date}...\n")

asset_series = {}

for name, symbol in tickers.items():
    df = yf.download(symbol, start=start_date, end=end_date, progress=False)
    if not df.empty:
        prices = df['Adj Close'] if 'Adj Close' in df.columns else df['Close']
        asset_series[name] = prices.dropna()
        print(f"✓ {name}: {len(prices)} data points")
    else:
        print(f"✗ {name}: No data found")

# Combine all series with proper date alignment
if asset_series:
    asset_df = pd.concat(asset_series.values(), axis=1, join='inner')
    asset_df.columns = asset_series.keys()

    def annualized_volatility(series):
        returns = series.pct_change().dropna()
        return returns.std() * np.sqrt(252)

    vol_dict = {
        asset: annualized_volatility(asset_df[asset])
        for asset in asset_df.columns
    }

    vol_df = pd.DataFrame.from_dict(vol_dict, orient='index', columns=["Annualized Volatility"])
    vol_df.index.name = "Asset"

    print("\n=== Annualized Volatility (Apr 30, 2024 – Apr 30, 2025) ===")
    print(vol_df.applymap(lambda x: f"{x:.2%}"))

    vol_df.to_csv("volatility_2024_2025.csv")
    print("\n✓ Saved to 'volatility_2024_2025.csv'")
else:
    print("✗ No valid data to calculate volatility.")


Downloading data from 2024-04-30 to 2025-04-30...

✓ Gold: 251 data points
✓ Oil: 251 data points
✓ Bitcoin: 365 data points
✓ ICICI Bank: 247 data points

=== Annualized Volatility (Apr 30, 2024 – Apr 30, 2025) ===
           Annualized Volatility
Asset                           
Gold                      18.02%
Oil                       31.73%
Bitcoin                   52.77%
ICICI Bank                20.92%

✓ Saved to 'volatility_2024_2025.csv'



DataFrame.applymap has been deprecated. Use DataFrame.map instead.

