In [None]:
# Import necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import json
import os
import time
from google.colab import drive
from getpass import getpass

# Set display options for pandas
pd.set_option('display.max_columns', None)

# Upload the dataframe df_fomc with all statements from 2000 to 2023
drive.mount('/content/drive')
file_path = '/content/drive/My Drive/INTERNSHIP_ADAR1/Data/Input/Historical XBI Driver Data.xlsx'

# Load Excel file
xls = pd.ExcelFile(file_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# CLEAN DATA

In [None]:
# Load XBI data
df_xbi = pd.read_excel(xls, sheet_name="XBI", skiprows=6)

# Load IBB data
df_ibb = pd.read_excel(xls, sheet_name="IBB", skiprows=6)

# Load IWM data
df_iwm = pd.read_excel(xls, sheet_name="IWM", skiprows=6)

# Load IWC data
df_iwc = pd.read_excel(xls, sheet_name="IWC", skiprows=6)

# Load QQQ data
df_qqq = pd.read_excel(xls, sheet_name="QQQ", skiprows=6)

# Load XLV data
df_xlv = pd.read_excel(xls, sheet_name="XLV", skiprows=6)

# Load US2Y data
df_us2y = pd.read_excel(xls, sheet_name="US2Y", skiprows=5)

# Load US10Y data
df_us10y = pd.read_excel(xls, sheet_name="US10Y", skiprows=5)

# Load US30Y data
df_us30y = pd.read_excel(xls, sheet_name="US30Y", skiprows=5)

# Load XBI Market Cap data
df_mktcap_xbi = pd.read_excel(xls, sheet_name="XBI Market Cap", skiprows=6)

# Load All M&A data
df_all_ma = pd.read_excel(xls, sheet_name="All M&A (>$500M)", skiprows=6)

# Load Mega M&A data
df_mega_ma = pd.read_excel(xls, sheet_name="Mega M&A (>$10B)", skiprows=5)

# Load SMID M&A data
df_smid_ma = pd.read_excel(xls, sheet_name="SMID M&A ($1-10B)", skiprows=5)

# Rename columns for clarity in df_xbi, df_ibb, df_iwm
def rename_columns(df):
    df.rename(columns={
        'PX_LAST': 'Price_Last',
        'Change': 'Price_Change',
        '% Change': 'Price_Percent_Change',
        'PX_VOLUME': 'Volume',
        'Change.1': 'Volume_Change',
        '% Change.1': 'Volume_Percent_Change'
    }, inplace=True)

rename_columns(df_xbi)
rename_columns(df_ibb)
rename_columns(df_iwm)

# Convert dates in dataframes to datetime
def convert_dates(df):
    df['Date'] = pd.to_datetime(df['Date'], origin='1899-12-30', unit='D')
    return df

df_xbi = convert_dates(df_xbi)
df_ibb = convert_dates(df_ibb)
df_iwm = convert_dates(df_iwm)
df_iwc = convert_dates(df_iwc)
df_qqq = convert_dates(df_qqq)
df_xlv = convert_dates(df_xlv)
df_us2y = convert_dates(df_us2y)
df_us10y = convert_dates(df_us10y)
df_us30y = convert_dates(df_us30y)
df_mktcap_xbi = convert_dates(df_mktcap_xbi)

# Clean M&A datasets
def clean_ma_data(df):
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    df.rename(columns={
        'Date Announced': 'Date_Announced',
        'Deal Value \n($B)': 'Deal_Value_Billion',
        'LTM sales\n($M)': 'LTM_Sales_Million',
        'Peak Sales** \nEst ($M)': 'Peak_Sales_Est_Million',
        'Deal value/ LTM sales': 'Deal_Value_LTM_Sales',
        'Deal value/ Peak sales': 'Deal_Value_Peak_Sales',
        'Takeout Forward P/E': 'Takeout_Forward_PE',
        '90 Day Premium': '90_Day_Premium',
        'Cash / Stock*': 'Cash_Stock',
        'Termination Fees': 'Termination_Fees',
        'Financial Advisor (Target)': 'Financial_Advisor_Target',
        'Financial Advisor (Acquirer)': 'Financial_Advisor_Acquirer',
        'PR on Target\'s Website': 'PR_Target_Website'
    }, inplace=True)
    return df.dropna(subset=['Date_Announced'])

df_all_ma = clean_ma_data(df_all_ma)
df_mega_ma = clean_ma_data(df_mega_ma)
df_smid_ma = clean_ma_data(df_smid_ma)

# Define function to clean year dataset
def clean_year_dataset(df):
    df.columns = ['Date', 'PX_LAST', 'Change', '% Change', 'PX_BID', 'Change_BID', '% Change_BID']
    df['Date'] = pd.to_datetime(df['Date'])
    return df

df_us2y = clean_year_dataset(df_us2y)
df_us10y = clean_year_dataset(df_us10y)
df_us30y = clean_year_dataset(df_us30y)

# Additional cleaning for M&A data
for ma_df in [df_all_ma, df_smid_ma, df_mega_ma]:
    ma_df['Date_Announced_Str'] = ma_df['Date_Announced'].astype(str)
    ma_df = ma_df[ma_df['Date_Announced_Str'].str.contains('00:00:00', na=False)]
    ma_df.drop('Date_Announced_Str', axis=1, inplace=True)




ValueError: '0      2024-08-26
1      2024-08-23
2      2024-08-22
3      2024-08-21
4      2024-08-20
          ...    
4665   2006-02-10
4666   2006-02-09
4667   2006-02-08
4668   2006-02-07
4669   2006-02-06
Name: Date, Length: 4670, dtype: datetime64[ns]' is not compatible with origin='1899-12-30'; it must be numeric with a unit specified

In [None]:
#!pip install wrds #dreamspartan 82
import wrds
import pandas as pd

# Connect to WRDS
conn = wrds.Connection()

# Set the date range
start_date = '2010-01-01'
end_date = '2023-06-30'

# Define the indexes to download
indexes = ['XBI', 'NBI']

# Initialize an empty DataFrame to store the data
data = pd.DataFrame()

# Loop through each index and download the data
for index in indexes:
    # Query the data from WRDS
    index_data = conn.raw_sql(f"""
        SELECT date, vwretd AS return
        FROM crsp.dsi
        WHERE date BETWEEN '{start_date}' AND '{end_date}'
        AND indexno IN (
            SELECT indexno
            FROM crsp.dsi
            WHERE namedt <= '{end_date}' AND nameendt >= '{start_date}'
            AND indexname = '{index}'
        )
        ORDER BY date
    """)

    # Add a column to identify the index
    index_data['index'] = index

    # Append the data to the main DataFrame
    data = data.append(index_data, ignore_index=True)

# Close the WRDS connection
conn.close()

# Print the first few rows of the data
print(data.head())

# Save the data to a CSV file
data.to_csv('index_returns.csv', index=False)

Collecting wrds
  Downloading wrds-3.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting packaging<23.3 (from wrds)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting psycopg2-binary<2.10,>=2.9 (from wrds)
  Downloading psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting scipy<1.13,>=1.12 (from wrds)
  Downloading scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Downloading wrds-3.2.0-py3-none-any.whl (13 kB)
Downloading packaging-23.2-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0

OperationalError: (psycopg2.OperationalError) connection to server at "wrds-pgdata.wharton.upenn.edu" (165.123.60.118), port 9737 failed: SSL connection has been closed unexpectedly

(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [None]:
!pip install --upgrade wrds


Collecting wrds
  Downloading wrds-3.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting packaging<23.3 (from wrds)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting psycopg2-binary<2.10,>=2.9 (from wrds)
  Downloading psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting scipy<1.13,>=1.12 (from wrds)
  Downloading scipy-1.12.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Downloading wrds-3.2.0-py3-none-any.whl (13 kB)
Downloading packaging-23.2-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading psycopg2_binary-2.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0

In [None]:
import wrds
db = wrds.Connection(wrds_username='dreamspartan')
db.raw_sql('SELECT date,dji FROM djones.djdaily')

Enter your WRDS username [dreamspartan]:
Enter your password:··········


OperationalError: (psycopg2.OperationalError) connection to server at "wrds-pgdata.wharton.upenn.edu" (165.123.60.118), port 9737 failed: SSL connection has been closed unexpectedly

(Background on this error at: https://sqlalche.me/e/20/e3q8)