In [None]:
!pip install vaderSentiment

Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
import re
from wordcloud import WordCloud

Reading News file

In [None]:
df = pd.read_csv('/content/News_Scraped - final_output.csv')
df.head()

Unnamed: 0,text,url,Published Date,Title,Text,Summary
0,"Business StandardMarket Wrap, Dec 31: Here's a...",https://www.business-standard.com/podcast/mark...,31 Dec 2020,,\nReference #18.aefdd417.1726759285.b101bdc\nh...,https reference
1,"The New Indian ExpressSensex, Nifty end flat o...",https://www.newindianexpress.com/business/2020...,31 Dec 2020,"Sensex, Nifty end flat on last trading day of ...",MUMBAI: Key stock indices Sensex and Nifty clo...,per cent sensex nifty closed
2,Business StandardMarkets in 2020: Sensex ends ...,https://www.business-standard.com/article/mark...,31 Dec 2020,,\nReference #18.aefdd417.1726759285.b101cb1\nh...,https reference
3,"mintSensex, Nifty end last day of 2020 on flat...",https://www.livemint.com/market/stock-market-n...,31 Dec 2020,"Sensex, Nifty end last day of 2020 on flat not...",Welcome to the Mint live blog. Track this spac...,said company growth crore thursday
4,"mintNifty hits 14,000 on last trading day of 2...",https://www.livemint.com/market/stock-market-n...,31 Dec 2020,"Nifty hits 14,000 on last trading day of 2020....",Indian markets ended the last day of 2020 on a...,expect 2020 year nifty markets


In [None]:
df.isnull().sum()

Unnamed: 0,0
text,0
url,0
Published Date,172
Title,41
Text,8
Summary,8


In [None]:
df.shape

(1654, 6)

Data Cleaning and Preprocessing

In [None]:
# Convert 'Publication Date' to datetime format
df['Published Date'] = pd.to_datetime(df['Published Date'], errors='coerce')

# Drop rows with null values in 'Publication Date' and 'Text'
df = df.dropna(subset=['Published Date','Title', 'Text'])

# Confirm null values are removed
df.isnull().sum()


Unnamed: 0,0
text,0
url,0
Published Date,0
Title,0
Text,0
Summary,0


In [None]:
df.shape

(1437, 6)

In [None]:
# Text cleaning function
def clean_text(text):
    # Remove special characters and convert to lowercase
    text = re.sub(r'[^a-zA-Z\s]', '', text).lower()
    # Remove stopwords
    stop_words = ENGLISH_STOP_WORDS
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

# Apply text cleaning to 'Text' column
df['Cleaned_Text'] = df['Text'].apply(clean_text)

# Check cleaned text
df[['Text', 'Cleaned_Text']].head()


Unnamed: 0,Text,Cleaned_Text
1,MUMBAI: Key stock indices Sensex and Nifty clo...,mumbai key stock indices sensex nifty closed f...
3,Welcome to the Mint live blog. Track this spac...,welcome mint live blog track space latest stoc...
4,Indian markets ended the last day of 2020 on a...,indian markets ended day dull note logged stro...
5,Welcome to the Mint live blog. Track this spac...,welcome mint live blog track space latest stoc...
6,Welcome to the Mint live blog. Track this spac...,welcome mint live blog track space latest stoc...


Sentiment Analysis using VADER, BERT and FinBERT

In [None]:
# Initialize VADER sentiment analyzer
vader = SentimentIntensityAnalyzer()

# Apply VADER sentiment analysis on the cleaned text
df['VADER_Sentiment'] = df['Cleaned_Text'].apply(lambda x: vader.polarity_scores(x)['compound'])

# Check VADER sentiment results
df[['Cleaned_Text', 'VADER_Sentiment']].head()


Unnamed: 0,Cleaned_Text,VADER_Sentiment
1,mumbai key stock indices sensex nifty closed f...,0.959
3,welcome mint live blog track space latest stoc...,0.9999
4,indian markets ended day dull note logged stro...,0.9983
5,welcome mint live blog track space latest stoc...,0.9997
6,welcome mint live blog track space latest stoc...,0.9998


In [None]:
# Load pre-trained BERT sentiment analysis pipeline
bert_pipeline = pipeline('sentiment-analysis')

# Apply BERT sentiment analysis
df['BERT_Sentiment'] = df['Cleaned_Text'].apply(lambda x: bert_pipeline(x[:512])[0]['score'] if bert_pipeline(x[:512])[0]['label'] == 'POSITIVE' else -bert_pipeline(x[:512])[0]['score'])

# BERT sentiment results
df[['Cleaned_Text', 'BERT_Sentiment']].head()

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]



Unnamed: 0,Cleaned_Text,BERT_Sentiment
1,mumbai key stock indices sensex nifty closed f...,-0.984474
3,welcome mint live blog track space latest stoc...,-0.997903
4,indian markets ended day dull note logged stro...,-0.991068
5,welcome mint live blog track space latest stoc...,-0.831471
6,welcome mint live blog track space latest stoc...,0.91641


In [None]:
# Load pre-trained FinBERT pipeline (specific for financial news sentiment)
!pip install transformers
from transformers import pipeline
import torch




In [None]:
# Check if CUDA is available and set the device accordingly
if torch.cuda.is_available():
    device = 0 # Use the first CUDA device
else:
    device = -1 # Use the CPU

finbert_pipeline = pipeline('sentiment-analysis', model='yiyanghkust/finbert-tone', device=device) # Added from_pt=True to load the PyTorch model and device to specify the device

# Apply FinBERT sentiment analysis
df['FinBERT_Sentiment'] = df['Cleaned_Text'].apply(lambda x: finbert_pipeline(x[:512], truncation=True)[0]['score'] if finbert_pipeline(x[:512], truncation=True)[0]['label'] == 'positive' else -finbert_pipeline(x[:512], truncation=True)[0]['score'])

# Check FinBERT sentiment results
df[['Cleaned_Text', 'FinBERT_Sentiment']].head()

config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Unnamed: 0,Cleaned_Text,FinBERT_Sentiment
1,mumbai key stock indices sensex nifty closed f...,-0.831084
3,welcome mint live blog track space latest stoc...,-0.99908
4,indian markets ended day dull note logged stro...,-0.999825
5,welcome mint live blog track space latest stoc...,-0.995942
6,welcome mint live blog track space latest stoc...,-0.99998


Reading Stock Data (NIFTY50)

In [None]:
stock_data = pd.read_csv('/content/NIFTY 50_Historical_PR_01012020to31122020.csv')


In [None]:
stock_data['Date'] = pd.to_datetime(stock_data['Date'])

# Sort the data by date
stock_data = stock_data.sort_values(by='Date')

# Calculate daily returns from the 'Close' column
stock_data['Stock_Returns'] = stock_data['Close'].pct_change()

# Drop the first row, as it will have NaN due to lack of previous day data
stock_data = stock_data.dropna(subset=['Stock_Returns'])

# Display the updated stock data with the 'Stock_Returns' column
stock_data.head()


Unnamed: 0,Index Name,Date,Open,High,Low,Close,Stock_Returns
250,NIFTY 50,2020-01-02,12198.55,12289.9,12195.25,12282.2,0.008184
249,NIFTY 50,2020-01-03,12261.1,12265.6,12191.35,12226.65,-0.004523
248,NIFTY 50,2020-01-06,12170.6,12179.1,11974.2,11993.05,-0.019106
247,NIFTY 50,2020-01-07,12079.1,12152.15,12005.35,12052.95,0.004995
246,NIFTY 50,2020-01-08,11939.1,12044.95,11929.6,12025.35,-0.00229


In [None]:
# Merge stock prices with sentiment data
df['Published Date'] = pd.to_datetime(df['Published Date'])
merged_data = pd.merge(df, stock_data, left_on='Published Date', right_on='Date', how='inner')
print(merged_data.shape)

(1216, 17)


Calculating daily returns

In [None]:
# Handle missing values
merged_data.dropna(inplace=True)

merged_data['Daily_Return'] = merged_data['Close'].pct_change()

Additional features

In [None]:
# Create lagged sentiment features
for sentiment in ['VADER_Sentiment', 'BERT_Sentiment', 'FinBERT_Sentiment']:
    for lag in range(1, 6):  # Creating lagged features for 1 to 5 days
        merged_data[f'{sentiment}_lag{lag}'] = merged_data[sentiment].shift(lag)

# Drop rows with NaN values due to lagging
merged_data.dropna(inplace=True)

In [None]:
# Define event dates (example dates, replace with your actual event dates)
event_dates = pd.to_datetime(['2020-03-24', '2020-04-15', '2020-05-01','2020-07-01'])

In [None]:
# Create Event_Occurred feature
merged_data['Event_Occurred'] = merged_data['Published Date'].isin(event_dates).astype(int)


In [None]:
# Create additional features (e.g., moving averages)
merged_data['MA_5'] = merged_data['Close'].rolling(window=5).mean()
merged_data['MA_10'] = merged_data['Close'].rolling(window=10).mean()

In [None]:
# Drop NaN values created by moving averages
merged_data.dropna(inplace=True)

Data Preparation

In [None]:
X = merged_data[['VADER_Sentiment', 'BERT_Sentiment', 'FinBERT_Sentiment', 'Daily_Return', 'MA_5', 'MA_10', 'Event_Occurred'] +
                 [f'VADER_Sentiment_lag{lag}' for lag in range(1, 6)] +
                 [f'BERT_Sentiment_lag{lag}' for lag in range(1, 6)] +
                 [f'FinBERT_Sentiment_lag{lag}' for lag in range(1, 6)]]
y = merged_data['Daily_Return']  # Predicting daily returns


Random Forest Regressor

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Cross-Validation

cv_scores = cross_val_score(model, X, y, cv=5)
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Mean Cross-Validation Score: {np.mean(cv_scores)}')

Cross-Validation Scores: [0.97750799 0.9965451  0.98861131 0.99772415 0.90538977]
Mean Cross-Validation Score: 0.9731556641796051


In [None]:
# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [None]:
# Evaluate the model
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print(f'Random Forest - Training R^2 Score: {train_r2}')
print(f'Random Forest - Testing R^2 Score: {test_r2}')
print(f'Random Forest - Training MSE: {train_mse}')
print(f'Random Forest - Testing MSE: {test_mse}')


Random Forest - Training R^2 Score: 0.9986130138305931
Random Forest - Testing R^2 Score: 0.866756780626613
Random Forest - Training MSE: 2.8767423093823257e-06
Random Forest - Testing MSE: 0.0005109661014155563


Random Forest with Hyperparameter tuning

In [None]:
from sklearn.model_selection import GridSearchCV

# Prepare data for modeling
X = merged_data[['VADER_Sentiment', 'BERT_Sentiment', 'FinBERT_Sentiment', 'Daily_Return', 'MA_5', 'MA_10', 'Event_Occurred'] +
                 [f'VADER_Sentiment_lag{lag}' for lag in range(1, 6)] +
                 [f'BERT_Sentiment_lag{lag}' for lag in range(1, 6)] +
                 [f'FinBERT_Sentiment_lag{lag}' for lag in range(1, 6)]]
y = merged_data['Daily_Return']  # Predicting daily returns

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter tuning using Grid Search
rf = RandomForestRegressor(random_state=42)

# Define the parameters grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# Setup Grid Search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid,
                           cv=5, scoring='r2', verbose=2, n_jobs=-1)

# Fit Grid Search
grid_search.fit(X_train, y_train)

# Best parameters and best score
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best Cross-Validation Score: {grid_search.best_score_}')

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Best Parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Cross-Validation Score: 0.9910718252975338


In [None]:
# Train Random Forest Regressor with best parameters
best_rf = grid_search.best_estimator_
best_rf.fit(X_train, y_train)

# Make predictions
y_train_pred = best_rf.predict(X_train)
y_test_pred = best_rf.predict(X_test)

# Evaluate the model
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print(f'Random Forest - Training R^2 Score: {train_r2}')
print(f'Random Forest - Testing R^2 Score: {test_r2}')
print(f'Random Forest - Training MSE: {train_mse}')
print(f'Random Forest - Testing MSE: {test_mse}')

Random Forest - Training R^2 Score: 0.9986130138305931
Random Forest - Testing R^2 Score: 0.866756780626613
Random Forest - Training MSE: 2.8767423093823257e-06
Random Forest - Testing MSE: 0.0005109661014155563


Gradient Boosting Regressor

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbm = GradientBoostingRegressor(random_state=42)

# Fit the model
gbm.fit(X_train, y_train)

# Make predictions
y_train_pred_gbm = gbm.predict(X_train)
y_test_pred_gbm = gbm.predict(X_test)

# Evaluate the model
train_r2_gbm = r2_score(y_train, y_train_pred_gbm)
test_r2_gbm = r2_score(y_test, y_test_pred_gbm)
train_mse_gbm = mean_squared_error(y_train, y_train_pred_gbm)
test_mse_gbm = mean_squared_error(y_test, y_test_pred_gbm)

print(f'Gradient Boosting - Training R^2 Score: {train_r2_gbm}')
print(f'Gradient Boosting - Testing R^2 Score: {test_r2_gbm}')
print(f'Gradient Boosting - Training MSE: {train_mse_gbm}')
print(f'Gradient Boosting - Testing MSE: {test_mse_gbm}')

Gradient Boosting - Training R^2 Score: 0.999956083796404
Gradient Boosting - Testing R^2 Score: 0.8961299182002616
Gradient Boosting - Training MSE: 9.108641725393842e-08
Gradient Boosting - Testing MSE: 0.000398324890381085


In [None]:
# Ensemble: Average predictions from Random Forest and Gradient Boosting
y_ensemble_pred = (y_test_pred + y_test_pred_gbm) / 2

# Evaluate ensemble model
ensemble_r2 = r2_score(y_test, y_ensemble_pred)
ensemble_mse = mean_squared_error(y_test, y_ensemble_pred)

print(f'Ensemble Model - Testing R^2 Score: {ensemble_r2}')
print(f'Ensemble Model - Testing MSE: {ensemble_mse}')

Ensemble Model - Testing R^2 Score: 0.8821219312147465
Ensemble Model - Testing MSE: 0.0004520432449234708


Gradient Boosting Hyperparamter tuning

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid_gbm = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}

# Initialize the grid search
grid_search_gbm = GridSearchCV(GradientBoostingRegressor(random_state=42),
                                param_grid_gbm,
                                cv=5,
                                n_jobs=-1,
                                scoring='r2',
                                verbose=2)

# Fit grid search
grid_search_gbm.fit(X_train, y_train)

# Get the best parameters and score
best_params_gbm = grid_search_gbm.best_params_
best_score_gbm = grid_search_gbm.best_score_

print(f"Best Parameters for Gradient Boosting: {best_params_gbm}")
print(f"Best Cross-Validation Score: {best_score_gbm}")


Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Best Parameters for Gradient Boosting: {'learning_rate': 0.05, 'max_depth': 4, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 200}
Best Cross-Validation Score: 0.9975985757046452


In [None]:
# Refit the model using the best parameters
best_params_gbm = grid_search_gbm.best_params_
tuned_gbm = GradientBoostingRegressor(**best_params_gbm, random_state=42)
tuned_gbm.fit(X_train, y_train)

# Make predictions
y_train_pred_tuned = tuned_gbm.predict(X_train)
y_test_pred_tuned = tuned_gbm.predict(X_test)

# Evaluate the tuned model
train_r2_tuned = r2_score(y_train, y_train_pred_tuned)
test_r2_tuned = r2_score(y_test, y_test_pred_tuned)
train_mse_tuned = mean_squared_error(y_train, y_train_pred_tuned)
test_mse_tuned = mean_squared_error(y_test, y_test_pred_tuned)

print(f'Tuned Gradient Boosting - Training R^2 Score: {train_r2_tuned}')
print(f'Tuned Gradient Boosting - Testing R^2 Score: {test_r2_tuned}')
print(f'Tuned Gradient Boosting - Training MSE: {train_mse_tuned}')
print(f'Tuned Gradient Boosting - Testing MSE: {test_mse_tuned}')

Tuned Gradient Boosting - Training R^2 Score: 0.9999964299408576
Tuned Gradient Boosting - Testing R^2 Score: 0.8949815508887273
Tuned Gradient Boosting - Training MSE: 7.404644983970767e-09
Tuned Gradient Boosting - Testing MSE: 0.00040272869247268264


Extreme Gradient Boosting Regressor

In [None]:
import xgboost as xgb

xgb_model = xgb.XGBRegressor(random_state=42)

# Fit the model
xgb_model.fit(X_train, y_train)

# Make predictions
y_train_pred_xgb = xgb_model.predict(X_train)
y_test_pred_xgb = xgb_model.predict(X_test)

# Evaluate the model
train_r2_xgb = r2_score(y_train, y_train_pred_xgb)
test_r2_xgb = r2_score(y_test, y_test_pred_xgb)
train_mse_xgb = mean_squared_error(y_train, y_train_pred_xgb)
test_mse_xgb = mean_squared_error(y_test, y_test_pred_xgb)

print(f'XGBoost - Training R^2 Score: {train_r2_xgb}')
print(f'XGBoost - Testing R^2 Score: {test_r2_xgb}')
print(f'XGBoost - Training MSE: {train_mse_xgb}')
print(f'XGBoost - Testing MSE: {test_mse_xgb}')

XGBoost - Training R^2 Score: 0.9999146098458377
XGBoost - Testing R^2 Score: 0.901739532816934
XGBoost - Training MSE: 1.771073675438511e-07
XGBoost - Testing MSE: 0.0003768129295878493


XG Boost Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid_xgb = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0],
}

# Initialize GridSearchCV
grid_search_xgb = GridSearchCV(
    estimator=xgb.XGBRegressor(random_state=42),
    param_grid=param_grid_xgb,
    scoring='r2',  # Use R-squared for scoring
    cv=5,          # 5-fold cross-validation
    n_jobs=-1,      # Use all available cores
    verbose=2
)

# Fit the grid search
grid_search_xgb.fit(X_train, y_train)

# Get the best parameters and score
best_params_xgb = grid_search_xgb.best_params_
best_score_xgb = grid_search_xgb.best_score_

print(f"Best Parameters for XGBoost: {best_params_xgb}")
print(f"Best Cross-Validation Score: {best_score_xgb}")

Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Best Parameters for XGBoost: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.8}
Best Cross-Validation Score: 0.9758144941788822


In [None]:
# Train XGBoost with best parameters
best_xgb_model = xgb.XGBRegressor(**best_params_xgb, random_state=42)
best_xgb_model.fit(X_train, y_train)

# Make predictions
y_train_pred_best_xgb = best_xgb_model.predict(X_train)
y_test_pred_best_xgb = best_xgb_model.predict(X_test)

# Evaluate the model
train_r2_best_xgb = r2_score(y_train, y_train_pred_best_xgb)
test_r2_best_xgb = r2_score(y_test, y_test_pred_best_xgb)
train_mse_best_xgb = mean_squared_error(y_train, y_train_pred_best_xgb)
test_mse_best_xgb = mean_squared_error(y_test, y_test_pred_best_xgb)

print(f'Tuned XGBoost - Training R^2 Score: {train_r2_best_xgb}')
print(f'Tuned XGBoost - Testing R^2 Score: {test_r2_best_xgb}')
print(f'Tuned XGBoost - Training MSE: {train_mse_best_xgb}')
print(f'Tuned XGBoost - Testing MSE: {test_mse_best_xgb}')

Tuned XGBoost - Training R^2 Score: 0.9989278787133251
Tuned XGBoost - Testing R^2 Score: 0.9155791916354591
Tuned XGBoost - Training MSE: 2.2236823511267848e-06
Tuned XGBoost - Testing MSE: 0.00032374008622156525


Event Study

In [None]:
def calculate_car(merged_data, event_date, window=5):
    event_window = merged_data[(merged_data['Published Date'] >= event_date - pd.DateOffset(days=window)) &
                                (merged_data['Published Date'] <= event_date + pd.DateOffset(days=window))]
    car = event_window['Daily_Return'].sum()
    return car

car_results = {str(date.date()): calculate_car(merged_data, date) for date in event_dates}
print("Cumulative Abnormal Returns (CAR) for event dates:")
print(car_results)

Cumulative Abnormal Returns (CAR) for event dates:
{'2020-03-24': -0.14643465740796202, '2020-04-15': -0.21941411699243496, '2020-05-01': 0.02489950100259697, '2020-07-01': -0.6657215188385618}
