In [134]:
import requests
import pandas as pd
import json
from datetime import datetime
import time
import os
from dotenv import load_dotenv

## Macro Data

In [135]:
macro_data = pd.read_csv('data/macro_data.csv', parse_dates=['date'])

In [136]:
macro_data['date'] = pd.to_datetime(macro_data['date'], format='%Y-%m-%d')

In [137]:
macro_data = macro_data[macro_data['value'] != "."]

In [138]:
macro_data['value'] = macro_data['value'].astype(float)

In [139]:
macro_data['maturity'] = macro_data['maturity'].fillna('')

In [140]:
macro_data['Merged'] = macro_data['maturity'].astype(str) + macro_data['function']

In [142]:
macro_data = macro_data.sort_values(['Merged', 'date'])
macro_data['Macro Change%'] = macro_data.groupby('Merged')['value'].pct_change()

In [None]:
## Remove outliers based on IQR for 'Macro Change%'
Q1 = macro_data['Macro Change%'].quantile(0.25)
Q3 = macro_data['Macro Change%'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

filtered_macro_data = macro_data[(macro_data['Macro Change%'] >= lower_bound) & (macro_data['Macro Change%'] <= upper_bound)]


In [144]:
macro_data.shape, filtered_macro_data.shape

((7003, 6), (6051, 6))

# Price Data   

In [145]:
price_data = pd.read_csv('data/tech_stock_daily_price.csv', parse_dates=['Date'])

In [146]:
price_data = price_data.sort_values(['ticker', 'Date'])
price_data['Price Change%'] = price_data.groupby('ticker')['Close'].pct_change()

In [None]:
## Remove outliers based on IQR for 'Price Change%'
Q1 = price_data['Price Change%'].quantile(0.25)
Q3 = price_data['Price Change%'].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

filtered_price_data = price_data[(price_data['Price Change%'] >= lower_bound) & (price_data['Price Change%'] <= upper_bound)]


In [148]:
price_data.shape, filtered_price_data.shape

((55712, 5), (51788, 5))

# Combine data

In [149]:
results = filtered_macro_data.merge(filtered_price_data, left_on='date', right_on='Date', how='left')

In [150]:
results.to_csv('data/macro_data_with_prices.csv', index=False)

# Correlations

In [151]:
correlations = (
    results.groupby(['Merged','ticker'])
      .apply(lambda group: pd.Series({
          'CorrelationWithPrice': group['value'].corr(group['Close']),
          'Count': group.shape[0]
          }))
      .reset_index()
)


In [152]:
correlations.to_csv('data/function_corr_with_price.csv', index=False)